From ea9917d9bd921f9fc14028a8dbf9e3f0f2579120 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 1 May 2023 00:46:28 -0400 Subject: [PATCH 01/81] debug: support loading elf debug info from external files Some distributions (ie. Ubuntu) have their libc debug info in separate files. This change allows the stack walking code to read that debug info. - add support for reading compressed ELF sections - support reading the build-id from the elf headers in order to lookup external debug info - support reading the .gnu_debuglink section to look up external debug info --- lib/std/debug.zig | 234 ++++++++++++++++++++++++++++++++++++---------- lib/std/elf.zig | 8 ++ 2 files changed, 191 insertions(+), 51 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 44f6ce136759..083b2afb6c37 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -684,12 +684,16 @@ fn printUnknownSource(debug_info: *DebugInfo, out_stream: anytype, address: usiz pub fn printSourceAtAddress(debug_info: *DebugInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), - else => return err, + else => { + return err; + }, }; const symbol_info = module.getSymbolAtAddress(debug_info.allocator, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), - else => return err, + else => { + return err; + }, }; defer symbol_info.deinit(debug_info.allocator); @@ -877,13 +881,29 @@ fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 return ptr[start..end]; } -/// This takes ownership of elf_file: users of this function should not close -/// it themselves, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -pub fn readElfDebugInfo(allocator: mem.Allocator, elf_file: File) !ModuleDebugInfo { +pub fn readElfDebugInfo( + allocator: mem.Allocator, + elf_filename: ?[]const u8, + build_id: ?[]const u8, + expected_crc: ?u32, +) !ModuleDebugInfo { nosuspend { + + // TODO https://github.com/ziglang/zig/issues/5525 + const elf_file = (if (elf_filename) |filename| blk: { + break :blk if (fs.path.isAbsolute(filename)) + fs.openFileAbsolute(filename, .{ .intended_io_mode = .blocking }) + else + fs.cwd().openFile(filename, .{ .intended_io_mode = .blocking }); + } else fs.openSelfExe(.{ .intended_io_mode = .blocking })) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + const mapped_mem = try mapWholeFile(elf_file); - const hdr = @as(*const elf.Ehdr, @ptrCast(&mapped_mem[0])); + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32SmallWithPoly(.IEEE).hash(mapped_mem)) return error.MissingDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; @@ -918,44 +938,147 @@ pub fn readElfDebugInfo(allocator: mem.Allocator, elf_file: File) !ModuleDebugIn var opt_debug_names: ?[]const u8 = null; var opt_debug_frame: ?[]const u8 = null; + var owned_sections: [ModuleDebugInfo.num_sections][]const u8 = [_][]const u8{&.{}} ** ModuleDebugInfo.num_sections; + errdefer for (owned_sections) |section| allocator.free(section); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + for (shdrs) |*shdr| { if (shdr.sh_type == elf.SHT_NULL) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - if (mem.eql(u8, name, ".debug_info")) { - opt_debug_info = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_abbrev")) { - opt_debug_abbrev = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_str")) { - opt_debug_str = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_str_offsets")) { - opt_debug_str_offsets = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_line")) { - opt_debug_line = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_line_str")) { - opt_debug_line_str = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_ranges")) { - opt_debug_ranges = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_loclists")) { - opt_debug_loclists = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_rnglists")) { - opt_debug_rnglists = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_addr")) { - opt_debug_addr = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_names")) { - opt_debug_names = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - } else if (mem.eql(u8, name, ".debug_frame")) { - opt_debug_frame = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@ptrCast([*:0]const u8, gnu_debuglink.ptr), 0); + const crc_offset = mem.alignForward(@ptrToInt(&debug_filename[debug_filename.len]) + 1, 4) - @ptrToInt(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset .. crc_offset + 4]; + separate_debug_crc = mem.readIntSliceNative(u32, crc_bytes); + separate_debug_filename = debug_filename; + continue; + } + + const sections = [_]struct { name: []const u8, out: *?[]const u8 }{ + .{ .name = ".debug_info", .out = &opt_debug_info }, + .{ .name = ".debug_abbrev", .out = &opt_debug_abbrev }, + .{ .name = ".debug_str", .out = &opt_debug_str }, + .{ .name = ".debug_str_offsets", .out = &opt_debug_str_offsets }, + .{ .name = ".debug_line", .out = &opt_debug_line }, + .{ .name = ".debug_line_str", .out = &opt_debug_line_str }, + .{ .name = ".debug_ranges", .out = &opt_debug_ranges }, + .{ .name = ".debug_loclists", .out = &opt_debug_loclists }, + .{ .name = ".debug_rnglists", .out = &opt_debug_rnglists }, + .{ .name = ".debug_addr", .out = &opt_debug_addr }, + .{ .name = ".debug_names", .out = &opt_debug_names }, + .{ .name = ".debug_frame", .out = &opt_debug_frame }, + }; + + var section_index = for (sections, 0..) |section, i| { + if (mem.eql(u8, section.name, name)) { + break i; + } + } else continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) { + var section_stream = io.fixedBufferStream(section_bytes); + var section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + + // TODO: Support ZSTD + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.zlibStream(allocator, section_stream.reader()) catch continue; + defer zlib_stream.deinit(); + + var decompressed_section = try allocator.alloc(u8, chdr.ch_size); + errdefer allocator.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + sections[section_index].out.* = decompressed_section; + owned_sections[section_index] = decompressed_section; + } else { + sections[section_index].out.* = section_bytes; + } + } + + const missing_debug_info = + opt_debug_info == null or + opt_debug_abbrev == null or + opt_debug_str == null or + opt_debug_line == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + // TODO: Determine the set of directories used by most distros for this path (check GDB sources) + }; + + // /.build-id/<2-character id prefix>/.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + // TODO: joinBuf would be ideal (with a fs.MAX_PATH_BYTES buffer) + const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); + defer allocator.free(path); + std.debug.print(" Loading external debug info from {s}\n", .{path}); + return readElfDebugInfo(allocator, path, null, separate_debug_crc) catch continue; + } + } + + // use the path from .gnu_debuglink, in the search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; + + // / + if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc)) |debug_info| return debug_info else |_| {} + + // /.debug/ + { + const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); + defer allocator.free(path); + + if (readElfDebugInfo(allocator, path, null, separate_debug_crc)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const cwd_path = fs.cwd().realpath("", &cwd_buf) catch break :blk; + + // // + for (global_debug_directories) |global_directory| { + const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); + defer allocator.free(path); + if (readElfDebugInfo(allocator, path, null, separate_debug_crc)) |debug_info| return debug_info else |_| {} + } } + + return error.MissingDebugInfo; } var di = DW.DwarfInfo{ .endian = endian, - .debug_info = opt_debug_info orelse return error.MissingDebugInfo, - .debug_abbrev = opt_debug_abbrev orelse return error.MissingDebugInfo, - .debug_str = opt_debug_str orelse return error.MissingDebugInfo, + .debug_info = opt_debug_info.?, + .debug_abbrev = opt_debug_abbrev.?, + .debug_str = opt_debug_str.?, .debug_str_offsets = opt_debug_str_offsets, - .debug_line = opt_debug_line orelse return error.MissingDebugInfo, + .debug_line = opt_debug_line.?, .debug_line_str = opt_debug_line_str, .debug_ranges = opt_debug_ranges, .debug_loclists = opt_debug_loclists, @@ -971,6 +1094,7 @@ pub fn readElfDebugInfo(allocator: mem.Allocator, elf_file: File) !ModuleDebugIn .base_address = undefined, .dwarf = di, .mapped_memory = mapped_mem, + .owned_sections = owned_sections, }; } } @@ -1359,6 +1483,7 @@ pub const DebugInfo = struct { // Output base_address: usize = undefined, name: []const u8 = undefined, + build_id: ?[]const u8 = undefined, } = .{ .address = address }; const CtxTy = @TypeOf(ctx); @@ -1375,16 +1500,30 @@ pub const DebugInfo = struct { const seg_start = info.dlpi_addr + phdr.p_vaddr; const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { // Android libc uses NULL instead of an empty string to mark the // main program context.name = mem.sliceTo(info.dlpi_name, 0) orelse ""; context.base_address = info.dlpi_addr; - // Stop the iteration - return error.Found; + break; } + } else return; + + for (info.dlpi_phdr[0..info.dlpi_phnum]) |phdr| { + if (phdr.p_type != elf.PT_NOTE) continue; + + const note_bytes = @intToPtr([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; + const name_size = mem.readIntSliceNative(u32, note_bytes[0..4]); + if (name_size != 4) continue; + const desc_size = mem.readIntSliceNative(u32, note_bytes[4..8]); + const note_type = mem.readIntSliceNative(u32, note_bytes[8..12]); + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; + context.build_id = note_bytes[16 .. 16 + desc_size]; } + + // Stop the iteration + return error.Found; } }.callback)) { return error.MissingDebugInfo; @@ -1399,18 +1538,7 @@ pub const DebugInfo = struct { const obj_di = try self.allocator.create(ModuleDebugInfo); errdefer self.allocator.destroy(obj_di); - // TODO https://github.com/ziglang/zig/issues/5525 - const copy = if (ctx.name.len > 0) - fs.cwd().openFile(ctx.name, .{ .intended_io_mode = .blocking }) - else - fs.openSelfExe(.{ .intended_io_mode = .blocking }); - - const elf_file = copy catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - - obj_di.* = try readElfDebugInfo(self.allocator, elf_file); + obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null); obj_di.base_address = ctx.base_address; try self.address_map.putNoClobber(ctx.base_address, obj_di); @@ -1752,9 +1880,13 @@ pub const ModuleDebugInfo = switch (native_os) { base_address: usize, dwarf: DW.DwarfInfo, mapped_memory: []align(mem.page_size) const u8, + owned_sections: [num_sections][]const u8 = [_][]const u8{&.{}} ** num_sections, + + const num_sections = 12; fn deinit(self: *@This(), allocator: mem.Allocator) void { self.dwarf.deinit(allocator); + for (self.owned_sections) |section| allocator.free(section); os.munmap(self.mapped_memory); } diff --git a/lib/std/elf.zig b/lib/std/elf.zig index 3ea136fabe71..004e50896017 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -371,6 +371,9 @@ pub const SHT_LOUSER = 0x80000000; /// End of application-specific pub const SHT_HIUSER = 0xffffffff; +// Note type for .note.gnu.build_id +pub const NT_GNU_BUILD_ID = 3; + /// Local symbol pub const STB_LOCAL = 0; /// Global symbol @@ -1055,6 +1058,11 @@ pub const Shdr = switch (@sizeOf(usize)) { 8 => Elf64_Shdr, else => @compileError("expected pointer size of 32 or 64"), }; +pub const Chdr = switch (@sizeOf(usize)) { + 4 => Elf32_Chdr, + 8 => Elf64_Chdr, + else => @compileError("expected pointer size of 32 or 64"), +}; pub const Sym = switch (@sizeOf(usize)) { 4 => Elf32_Sym, 8 => Elf64_Sym, From f6148f123e35dcbec4b9ab7458967fa93277e443 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 4 May 2023 01:35:43 -0400 Subject: [PATCH 02/81] add CommonInformationEntry parser --- lib/std/debug.zig | 136 +++++++-------------------- lib/std/dwarf.zig | 217 ++++++++++++++++++++++++++++++++++++++----- lib/std/dwarf/EH.zig | 19 ++++ 3 files changed, 245 insertions(+), 127 deletions(-) create mode 100644 lib/std/dwarf/EH.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 083b2afb6c37..5e72c4d05203 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -800,52 +800,20 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe // This coff file has embedded DWARF debug info _ = sec; - const debug_info = coff_obj.getSectionDataAlloc(".debug_info", allocator) catch return error.MissingDebugInfo; - errdefer allocator.free(debug_info); - const debug_abbrev = coff_obj.getSectionDataAlloc(".debug_abbrev", allocator) catch return error.MissingDebugInfo; - errdefer allocator.free(debug_abbrev); - const debug_str = coff_obj.getSectionDataAlloc(".debug_str", allocator) catch return error.MissingDebugInfo; - errdefer allocator.free(debug_str); - const debug_line = coff_obj.getSectionDataAlloc(".debug_line", allocator) catch return error.MissingDebugInfo; - errdefer allocator.free(debug_line); - - const debug_str_offsets = coff_obj.getSectionDataAlloc(".debug_str_offsets", allocator) catch null; - const debug_line_str = coff_obj.getSectionDataAlloc(".debug_line_str", allocator) catch null; - const debug_ranges = coff_obj.getSectionDataAlloc(".debug_ranges", allocator) catch null; - const debug_loclists = coff_obj.getSectionDataAlloc(".debug_loclists", allocator) catch null; - const debug_rnglists = coff_obj.getSectionDataAlloc(".debug_rnglists", allocator) catch null; - const debug_addr = coff_obj.getSectionDataAlloc(".debug_addr", allocator) catch null; - const debug_names = coff_obj.getSectionDataAlloc(".debug_names", allocator) catch null; - const debug_frame = coff_obj.getSectionDataAlloc(".debug_frame", allocator) catch null; + const num_sections = std.enums.directEnumArrayLen(DW.DwarfSection, 0); + var sections: [num_sections]?[]const u8 = [_]?[]const u8{null} ** num_sections; + errdefer for (sections) |section| if (section) |s| allocator.free(s); + + inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { + sections[i] = try coff_obj.getSectionDataAlloc("." ++ section.name, allocator); + } var dwarf = DW.DwarfInfo{ .endian = native_endian, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_str_offsets = debug_str_offsets, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - .debug_loclists = debug_loclists, - .debug_rnglists = debug_rnglists, - .debug_addr = debug_addr, - .debug_names = debug_names, - .debug_frame = debug_frame, - }; - - DW.openDwarfDebugInfo(&dwarf, allocator) catch |err| { - if (debug_str_offsets) |d| allocator.free(d); - if (debug_line_str) |d| allocator.free(d); - if (debug_ranges) |d| allocator.free(d); - if (debug_loclists) |d| allocator.free(d); - if (debug_rnglists) |d| allocator.free(d); - if (debug_addr) |d| allocator.free(d); - if (debug_names) |d| allocator.free(d); - if (debug_frame) |d| allocator.free(d); - return err; + .sections = sections, }; + try DW.openDwarfDebugInfo(&dwarf, allocator); di.debug_data = PdbOrDwarf{ .dwarf = dwarf }; return di; } @@ -901,7 +869,7 @@ pub fn readElfDebugInfo( }; const mapped_mem = try mapWholeFile(elf_file); - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32SmallWithPoly(.IEEE).hash(mapped_mem)) return error.MissingDebugInfo; + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32SmallWithPoly(.IEEE).hash(mapped_mem)) return error.InvalidDebugInfo; const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; @@ -916,36 +884,23 @@ pub fn readElfDebugInfo( const shoff = hdr.e_shoff; const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast( - &mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow], - )); - const header_strings = mapped_mem[str_shdr.sh_offset .. str_shdr.sh_offset + str_shdr.sh_size]; + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; const shdrs = @as( [*]const elf.Shdr, @ptrCast(@alignCast(&mapped_mem[shoff])), )[0..hdr.e_shnum]; - var opt_debug_info: ?[]const u8 = null; - var opt_debug_abbrev: ?[]const u8 = null; - var opt_debug_str: ?[]const u8 = null; - var opt_debug_str_offsets: ?[]const u8 = null; - var opt_debug_line: ?[]const u8 = null; - var opt_debug_line_str: ?[]const u8 = null; - var opt_debug_ranges: ?[]const u8 = null; - var opt_debug_loclists: ?[]const u8 = null; - var opt_debug_rnglists: ?[]const u8 = null; - var opt_debug_addr: ?[]const u8 = null; - var opt_debug_names: ?[]const u8 = null; - var opt_debug_frame: ?[]const u8 = null; - - var owned_sections: [ModuleDebugInfo.num_sections][]const u8 = [_][]const u8{&.{}} ** ModuleDebugInfo.num_sections; + const num_sections = std.enums.directEnumArrayLen(DW.DwarfSection, 0); + var sections: [num_sections]?[]const u8 = [_]?[]const u8{null} ** num_sections; + var owned_sections: [num_sections][]const u8 = [_][]const u8{&.{}} ** num_sections; errdefer for (owned_sections) |section| allocator.free(section); var separate_debug_filename: ?[]const u8 = null; var separate_debug_crc: ?u32 = null; for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL) continue; + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); if (mem.eql(u8, name, ".gnu_debuglink")) { @@ -958,26 +913,11 @@ pub fn readElfDebugInfo( continue; } - const sections = [_]struct { name: []const u8, out: *?[]const u8 }{ - .{ .name = ".debug_info", .out = &opt_debug_info }, - .{ .name = ".debug_abbrev", .out = &opt_debug_abbrev }, - .{ .name = ".debug_str", .out = &opt_debug_str }, - .{ .name = ".debug_str_offsets", .out = &opt_debug_str_offsets }, - .{ .name = ".debug_line", .out = &opt_debug_line }, - .{ .name = ".debug_line_str", .out = &opt_debug_line_str }, - .{ .name = ".debug_ranges", .out = &opt_debug_ranges }, - .{ .name = ".debug_loclists", .out = &opt_debug_loclists }, - .{ .name = ".debug_rnglists", .out = &opt_debug_rnglists }, - .{ .name = ".debug_addr", .out = &opt_debug_addr }, - .{ .name = ".debug_names", .out = &opt_debug_names }, - .{ .name = ".debug_frame", .out = &opt_debug_frame }, - }; - - var section_index = for (sections, 0..) |section, i| { - if (mem.eql(u8, section.name, name)) { - break i; - } - } else continue; + var section_index: ?usize = null; + inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "." ++ section.name, name)) section_index = i; + } + if (section_index == null) continue; const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) { @@ -997,25 +937,24 @@ pub fn readElfDebugInfo( const read = zlib_stream.reader().readAll(decompressed_section) catch continue; assert(read == decompressed_section.len); - sections[section_index].out.* = decompressed_section; - owned_sections[section_index] = decompressed_section; + sections[section_index.?] = decompressed_section; + owned_sections[section_index.?] = decompressed_section; } else { - sections[section_index].out.* = section_bytes; - } + sections[section_index.?] = section_bytes; + } } const missing_debug_info = - opt_debug_info == null or - opt_debug_abbrev == null or - opt_debug_str == null or - opt_debug_line == null; + sections[@enumToInt(DW.DwarfSection.debug_info)] == null or + sections[@enumToInt(DW.DwarfSection.debug_abbrev)] == null or + sections[@enumToInt(DW.DwarfSection.debug_str)] == null or + sections[@enumToInt(DW.DwarfSection.debug_line)] == null; // Attempt to load debug info from an external file // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html if (missing_debug_info) { const global_debug_directories = [_][]const u8{ "/usr/lib/debug", - // TODO: Determine the set of directories used by most distros for this path (check GDB sources) }; // /.build-id/<2-character id prefix>/.debug @@ -1074,18 +1013,7 @@ pub fn readElfDebugInfo( var di = DW.DwarfInfo{ .endian = endian, - .debug_info = opt_debug_info.?, - .debug_abbrev = opt_debug_abbrev.?, - .debug_str = opt_debug_str.?, - .debug_str_offsets = opt_debug_str_offsets, - .debug_line = opt_debug_line.?, - .debug_line_str = opt_debug_line_str, - .debug_ranges = opt_debug_ranges, - .debug_loclists = opt_debug_loclists, - .debug_rnglists = opt_debug_rnglists, - .debug_addr = opt_debug_addr, - .debug_names = opt_debug_names, - .debug_frame = opt_debug_frame, + .sections = sections, }; try DW.openDwarfDebugInfo(&di, allocator); @@ -1882,7 +1810,7 @@ pub const ModuleDebugInfo = switch (native_os) { mapped_memory: []align(mem.page_size) const u8, owned_sections: [num_sections][]const u8 = [_][]const u8{&.{}} ** num_sections, - const num_sections = 12; + const num_sections = 14; fn deinit(self: *@This(), allocator: mem.Allocator) void { self.dwarf.deinit(allocator); @@ -1916,7 +1844,7 @@ fn getSymbolFromDwarf(allocator: mem.Allocator, address: u64, di: *DW.DwarfInfo) if (nosuspend di.findCompileUnit(address)) |compile_unit| { return SymbolInfo{ .symbol_name = nosuspend di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, DW.AT.name, di.debug_str, compile_unit.*) catch |err| switch (err) { + .compile_unit_name = compile_unit.die.getAttrString(di, DW.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index aa1ac6959fd4..4028e15ee936 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -13,6 +13,7 @@ pub const OP = @import("dwarf/OP.zig"); pub const LANG = @import("dwarf/LANG.zig"); pub const FORM = @import("dwarf/FORM.zig"); pub const ATE = @import("dwarf/ATE.zig"); +pub const EH = @import("dwarf/EH.zig"); pub const LLE = struct { pub const end_of_list = 0x00; @@ -337,7 +338,7 @@ const Die = struct { FormValue.String => |value| return value, FormValue.StrPtr => |offset| return di.getString(offset), FormValue.StrOffset => |index| { - const debug_str_offsets = di.debug_str_offsets orelse return badDwarf(); + const debug_str_offsets = di.section(.debug_str_offsets) orelse return badDwarf(); if (compile_unit.str_offsets_base == 0) return badDwarf(); if (compile_unit.is_64) { const byte_offset = compile_unit.str_offsets_base + 8 * index; @@ -642,26 +643,36 @@ fn getAbbrevTableEntry(abbrev_table: *const AbbrevTable, abbrev_code: u64) ?*con return null; } +pub const DwarfSection = enum { + debug_info, + debug_abbrev, + debug_str, + debug_str_offsets, + debug_line, + debug_line_str, + debug_ranges, + debug_loclists, + debug_rnglists, + debug_addr, + debug_names, + debug_frame, + eh_frame, + eh_frame_hdr, +}; + pub const DwarfInfo = struct { endian: std.builtin.Endian, // No memory is owned by the DwarfInfo - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, - debug_str_offsets: ?[]const u8, - debug_line: []const u8, - debug_line_str: ?[]const u8, - debug_ranges: ?[]const u8, - debug_loclists: ?[]const u8, - debug_rnglists: ?[]const u8, - debug_addr: ?[]const u8, - debug_names: ?[]const u8, - debug_frame: ?[]const u8, + sections: [std.enums.directEnumArrayLen(DwarfSection, 0)]?[]const u8, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(AbbrevTableHeader) = .{}, compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{}, func_list: std.ArrayListUnmanaged(Func) = .{}, + pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 { + return di.sections[@enumToInt(dwarf_section)]; + } + pub fn deinit(di: *DwarfInfo, allocator: mem.Allocator) void { for (di.abbrev_table_list.items) |*abbrev| { abbrev.deinit(); @@ -691,7 +702,7 @@ pub const DwarfInfo = struct { } fn scanAllFunctions(di: *DwarfInfo, allocator: mem.Allocator) !void { - var stream = io.fixedBufferStream(di.debug_info); + var stream = io.fixedBufferStream(di.section(.debug_info).?); const in = stream.reader(); const seekable = &stream.seekableStream(); var this_unit_offset: u64 = 0; @@ -764,7 +775,7 @@ pub const DwarfInfo = struct { // Prevent endless loops while (depth > 0) : (depth -= 1) { if (this_die_obj.getAttr(AT.name)) |_| { - const name = try this_die_obj.getAttrString(di, AT.name, di.debug_str, compile_unit); + const name = try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit); break :x try allocator.dupe(u8, name); } else if (this_die_obj.getAttr(AT.abstract_origin)) |_| { // Follow the DIE it points to and repeat @@ -836,7 +847,7 @@ pub const DwarfInfo = struct { } fn scanAllCompileUnits(di: *DwarfInfo, allocator: mem.Allocator) !void { - var stream = io.fixedBufferStream(di.debug_info); + var stream = io.fixedBufferStream(di.section(.debug_info).?); const in = &stream.reader(); const seekable = &stream.seekableStream(); var this_unit_offset: u64 = 0; @@ -930,7 +941,7 @@ pub const DwarfInfo = struct { if (target_address >= range.start and target_address < range.end) return compile_unit; } - const opt_debug_ranges = if (compile_unit.version >= 5) di.debug_rnglists else di.debug_ranges; + const opt_debug_ranges = if (compile_unit.version >= 5) di.section(.debug_rnglists) else di.section(.debug_ranges); const debug_ranges = opt_debug_ranges orelse continue; const ranges_val = compile_unit.die.getAttr(AT.ranges) orelse continue; @@ -1065,7 +1076,7 @@ pub const DwarfInfo = struct { } fn parseAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, offset: u64) !AbbrevTable { - var stream = io.fixedBufferStream(di.debug_abbrev); + var stream = io.fixedBufferStream(di.section(.debug_abbrev).?); const in = &stream.reader(); const seekable = &stream.seekableStream(); @@ -1146,11 +1157,11 @@ pub const DwarfInfo = struct { compile_unit: CompileUnit, target_address: u64, ) !debug.LineInfo { - var stream = io.fixedBufferStream(di.debug_line); + var stream = io.fixedBufferStream(di.section(.debug_line).?); const in = &stream.reader(); const seekable = &stream.seekableStream(); - const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.debug_line_str, compile_unit); + const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); try seekable.seekTo(line_info_offset); @@ -1416,15 +1427,15 @@ pub const DwarfInfo = struct { } fn getString(di: DwarfInfo, offset: u64) ![]const u8 { - return getStringGeneric(di.debug_str, offset); + return getStringGeneric(di.section(.debug_str), offset); } fn getLineString(di: DwarfInfo, offset: u64) ![]const u8 { - return getStringGeneric(di.debug_line_str, offset); + return getStringGeneric(di.section(.debug_line_str), offset); } fn readDebugAddr(di: DwarfInfo, compile_unit: CompileUnit, index: u64) !u64 { - const debug_addr = di.debug_addr orelse return badDwarf(); + const debug_addr = di.section(.debug_addr) orelse return badDwarf(); // addr_base points to the first item after the header, however we // need to read the header to know the size of each item. Empirically, @@ -1455,6 +1466,12 @@ pub const DwarfInfo = struct { pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator) !void { try di.scanAllFunctions(allocator); try di.scanAllCompileUnits(allocator); + + // DEBUG + if (di.section(.eh_frame)) |eh_frame| { + _ = try CommonInformationEntry.parse(eh_frame, 8, .Little); + } + } /// This function is to make it handy to comment out the return and make it @@ -1477,3 +1494,157 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { const last = mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return badDwarf(); return str[casted_offset..last :0]; } + +const EhPointer = struct { + value: union(enum) { + signed: i64, + unsigned: u64, + }, + relative_to: u8, + + // address of the encoded value + pc: u64, + + // TODO: Function to resolve the value given input state (.text start, .eh_frame_hdr start, functions start) +}; + +fn readEhPointer(enc: u8, pc: usize, addr_size_bytes: u8, endian: std.builtin.Endian, reader: anytype) !?EhPointer { + if (enc == EH.PE.omit) return null; + return EhPointer{ + .value = switch (enc & 0x0f) { + EH.PE.absptr => .{ .unsigned = switch (addr_size_bytes) { + 2 => try reader.readInt(u16, endian), + 4 => try reader.readInt(u32, endian), + 8 => try reader.readInt(u64, endian), + else => return error.InvalidAddrSize, + } }, + EH.PE.uleb128 => .{ .unsigned = try leb.readULEB128(u64, reader) }, + EH.PE.udata2 => .{ .unsigned = try reader.readInt(u16, endian) }, + EH.PE.udata4 => .{ .unsigned = try reader.readInt(u32, endian) }, + EH.PE.udata8 => .{ .unsigned = try reader.readInt(u64, endian) }, + EH.PE.sleb128 => .{ .signed = try leb.readILEB128(i64, reader) }, + EH.PE.sdata2 => .{ .signed = try reader.readInt(i16, endian) }, + EH.PE.sdata4 => .{ .signed = try reader.readInt(i32, endian) }, + EH.PE.sdata8 => .{ .signed = try reader.readInt(i64, endian) }, + else => return badDwarf(), + }, + .relative_to = enc & 0xf0, + .pc = pc + }; +} + +const CommonInformationEntry = struct { + length: u32, + id: u32, + version: u8, + code_alignment_factor: u64, + data_alignment_factor: u64, + return_address_register: u64, + + // Augmented data + lsda_pointer_enc: ?u8, + personality_routine_pointer: ?EhPointer, + fde_pointer_enc: ?u8, + + initial_instructions: []const u8, + + // The returned struct references memory in `bytes`. + pub fn parse(bytes: []const u8, addr_size_bytes: u8, endian: std.builtin.Endian) !CommonInformationEntry { + if (addr_size_bytes > 8) return error.InvalidAddrSize; + if (bytes.len < 4) return badDwarf(); + const length = mem.readInt(u32, bytes[0..4], endian); + const cie_bytes = bytes[4..][0..length]; + + var stream = io.fixedBufferStream(cie_bytes); + const reader = stream.reader(); + + const id = try reader.readInt(u32, endian); + if (id != 0) return badDwarf(); + + const version = try reader.readByte(); + if (version != 1) return badDwarf(); + + var has_eh_data = false; + var has_aug_data = false; + + var aug_str_len: usize = 0; + var aug_str_start = stream.pos; + var aug_byte = try reader.readByte(); + while (aug_byte != 0) : (aug_byte = try reader.readByte()) { + switch (aug_byte) { + 'z' => { + if (aug_str_len != 0) return badDwarf(); + has_aug_data = true; + aug_str_start = stream.pos; + }, + 'e' => { + if (has_aug_data or aug_str_len != 0) return badDwarf(); + if (try reader.readByte() != 'h') return badDwarf(); + has_eh_data = true; + }, + else => { + if (has_eh_data) return badDwarf(); + aug_str_len += 1; + }, + } + } + + if (has_eh_data) { + // legacy data created by older versions of gcc - ignored here + for (0..addr_size_bytes) |_| _ = try reader.readByte(); + } + + const code_alignment_factor = try leb.readULEB128(u64, reader); + const data_alignment_factor = try leb.readULEB128(u64, reader); + const return_address_register = try leb.readULEB128(u64, reader); + + var lsda_pointer_enc: ?u8 = null; + var personality_routine_pointer: ?EhPointer = null; + var fde_pointer_enc: ?u8 = null; + + if (has_aug_data) { + const aug_data_len = try leb.readULEB128(usize, reader); + const aug_data_start = stream.pos; + + const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; + for (aug_str) |byte| { + switch (byte) { + 'L' => { + lsda_pointer_enc = try reader.readByte(); + }, + 'P' => { + const personality_enc = try reader.readByte(); + personality_routine_pointer = try readEhPointer( + personality_enc, + @ptrToInt(&cie_bytes[stream.pos]), + addr_size_bytes, + endian, + reader, + ); + }, + 'R' => { + fde_pointer_enc = try reader.readByte(); + }, + else => return badDwarf(), + } + } + + // verify length field + if (stream.pos != (aug_data_start + aug_data_len)) return badDwarf(); + } + + const initial_instructions = cie_bytes[stream.pos..]; + return .{ + .length = length, + .id = id, + .version = version, + .code_alignment_factor = code_alignment_factor, + .data_alignment_factor = data_alignment_factor, + .return_address_register = return_address_register, + .lsda_pointer_enc = lsda_pointer_enc, + .personality_routine_pointer = personality_routine_pointer, + .fde_pointer_enc = fde_pointer_enc, + .initial_instructions = initial_instructions, + }; + } +}; diff --git a/lib/std/dwarf/EH.zig b/lib/std/dwarf/EH.zig new file mode 100644 index 000000000000..2ffb1d166314 --- /dev/null +++ b/lib/std/dwarf/EH.zig @@ -0,0 +1,19 @@ +pub const PE = struct { + pub const absptr = 0x00; + pub const uleb128 = 0x01; + pub const udata2 = 0x02; + pub const udata4 = 0x03; + pub const udata8 = 0x04; + pub const sleb128 = 0x09; + pub const sdata2 = 0x0A; + pub const sdata4 = 0x0B; + pub const sdata8 = 0x0C; + + pub const pcrel = 0x10; + pub const textrel = 0x20; + pub const datarel = 0x30; + pub const funcrel = 0x40; + pub const aligned = 0x50; + + pub const omit = 0xff; +}; From 6c1d1aa45c7aa955c63c63b2bfbeb895dd694076 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 7 May 2023 13:36:23 -0400 Subject: [PATCH 03/81] begin working on parsing unwind info --- lib/std/dwarf.zig | 338 +++++++++++++++++++++++++++++++++---------- lib/std/dwarf/EH.zig | 2 + 2 files changed, 265 insertions(+), 75 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 4028e15ee936..16baa84c2888 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -662,13 +662,19 @@ pub const DwarfSection = enum { pub const DwarfInfo = struct { endian: std.builtin.Endian, - // No memory is owned by the DwarfInfo + + // No section memory is owned by the DwarfInfo sections: [std.enums.directEnumArrayLen(DwarfSection, 0)]?[]const u8, + // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(AbbrevTableHeader) = .{}, compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{}, func_list: std.ArrayListUnmanaged(Func) = .{}, + cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .{}, + // Sorted by start_pc + fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{}, + pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 { return di.sections[@enumToInt(dwarf_section)]; } @@ -1434,6 +1440,7 @@ pub const DwarfInfo = struct { return getStringGeneric(di.section(.debug_line_str), offset); } + fn readDebugAddr(di: DwarfInfo, compile_unit: CompileUnit, index: u64) !u64 { const debug_addr = di.section(.debug_addr) orelse return badDwarf(); @@ -1459,6 +1466,56 @@ pub const DwarfInfo = struct { else => badDwarf(), }; } + + pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator) !void { + var has_eh_frame_hdr = false; + if (di.section(.eh_frame)) |eh_frame_hdr| { + has_eh_frame_hdr = true; + + // TODO: Parse this section + _ = eh_frame_hdr; + } + + if (di.section(.eh_frame)) |eh_frame| { + var stream = io.fixedBufferStream(eh_frame); + const reader = stream.reader(); + + while (stream.pos < stream.buffer.len) { + const length_offset = stream.pos; + var length: u64 = try reader.readInt(u32, di.endian); + if (length == 0) break; + + var is_64 = length == math.maxInt(u32); + if (is_64) { + length = try reader.readInt(u64, di.endian); + } + + const entry_bytes = eh_frame[stream.pos..][0..length]; + const id = try reader.readInt(u32, di.endian); + + // TODO: Get section_offset here (pass in from headers) + + if (id == 0) { + const cie = try CommonInformationEntry.parse(entry_bytes, @ptrToInt(eh_frame.ptr), 0, length_offset, @sizeOf(usize), di.endian); + try di.cie_map.put(allocator, length_offset, cie); + } else { + const cie_offset = stream.pos - 4 - id; + const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); + const fde = try FrameDescriptionEntry.parse(entry_bytes, @ptrToInt(eh_frame.ptr), 0, cie, @sizeOf(usize), di.endian); + try di.fde_list.append(allocator, fde); + } + } + + // TODO: Avoiding sorting if has_eh_frame_hdr exists + std.sort.sort(FrameDescriptionEntry, di.fde_list.items, {}, struct { + fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { + _ = ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + } + } + }; /// Initialize DWARF info. The caller has the responsibility to initialize most @@ -1467,11 +1524,8 @@ pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator) !void { try di.scanAllFunctions(allocator); try di.scanAllCompileUnits(allocator); - // DEBUG - if (di.section(.eh_frame)) |eh_frame| { - _ = try CommonInformationEntry.parse(eh_frame, 8, .Little); - } - + // Unwind info is not required + di.scanAllUnwindInfo(allocator) catch {}; } /// This function is to make it handy to comment out the return and make it @@ -1495,74 +1549,131 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { return str[casted_offset..last :0]; } -const EhPointer = struct { - value: union(enum) { - signed: i64, - unsigned: u64, - }, - relative_to: u8, +const EhPointerContext = struct { + // The address of the pointer field itself + pc_rel_base: u64, - // address of the encoded value - pc: u64, - - // TODO: Function to resolve the value given input state (.text start, .eh_frame_hdr start, functions start) + // These relative addressing modes are only used in specific cases, and + // might not be available / required in all parsing contexts + data_rel_base: ?u64 = null, + text_rel_base: ?u64 = null, + function_rel_base: ?u64 = null, }; -fn readEhPointer(enc: u8, pc: usize, addr_size_bytes: u8, endian: std.builtin.Endian, reader: anytype) !?EhPointer { +fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: std.builtin.Endian) !?u64 { if (enc == EH.PE.omit) return null; - return EhPointer{ - .value = switch (enc & 0x0f) { - EH.PE.absptr => .{ .unsigned = switch (addr_size_bytes) { + + const value: union(enum) { + signed: i64, + unsigned: u64, + } = switch (enc & 0x0f) { + EH.PE.absptr => .{ + .unsigned = switch (addr_size_bytes) { 2 => try reader.readInt(u16, endian), 4 => try reader.readInt(u32, endian), 8 => try reader.readInt(u64, endian), else => return error.InvalidAddrSize, - } }, - EH.PE.uleb128 => .{ .unsigned = try leb.readULEB128(u64, reader) }, - EH.PE.udata2 => .{ .unsigned = try reader.readInt(u16, endian) }, - EH.PE.udata4 => .{ .unsigned = try reader.readInt(u32, endian) }, - EH.PE.udata8 => .{ .unsigned = try reader.readInt(u64, endian) }, - EH.PE.sleb128 => .{ .signed = try leb.readILEB128(i64, reader) }, - EH.PE.sdata2 => .{ .signed = try reader.readInt(i16, endian) }, - EH.PE.sdata4 => .{ .signed = try reader.readInt(i32, endian) }, - EH.PE.sdata8 => .{ .signed = try reader.readInt(i64, endian) }, - else => return badDwarf(), + }, }, - .relative_to = enc & 0xf0, - .pc = pc + EH.PE.uleb128 => .{ .unsigned = try leb.readULEB128(u64, reader) }, + EH.PE.udata2 => .{ .unsigned = try reader.readInt(u16, endian) }, + EH.PE.udata4 => .{ .unsigned = try reader.readInt(u32, endian) }, + EH.PE.udata8 => .{ .unsigned = try reader.readInt(u64, endian) }, + EH.PE.sleb128 => .{ .signed = try leb.readILEB128(i64, reader) }, + EH.PE.sdata2 => .{ .signed = try reader.readInt(i16, endian) }, + EH.PE.sdata4 => .{ .signed = try reader.readInt(i32, endian) }, + EH.PE.sdata8 => .{ .signed = try reader.readInt(i64, endian) }, + else => return badDwarf(), }; + + const relative_to = enc & 0xf0; + var base = switch (relative_to) { + EH.PE.pcrel => ctx.pc_rel_base, + EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, + EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, + EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, + EH.PE.indirect => { + switch (addr_size_bytes) { + 2 => return @intToPtr(*const u16, value.unsigned).*, + 4 => return @intToPtr(*const u32, value.unsigned).*, + 8 => return @intToPtr(*const u64, value.unsigned).*, + else => return error.UnsupportedAddrSize, + } + }, + else => null, + }; + + if (base) |b| { + return switch (value) { + .signed => |s| @intCast(u64, s + @intCast(i64, b)), + .unsigned => |u| u + b, + }; + } else { + return switch (value) { + .signed => |s| @intCast(u64, s), + .unsigned => |u| u, + }; + } } -const CommonInformationEntry = struct { - length: u32, - id: u32, +pub const CommonInformationEntry = struct { + // Used in .eh_frame + pub const eh_id = 0; + + // Used in .debug_frame (DWARF32) + pub const dwarf32_id = std.math.maxInt(u32); + + // Used in .debug_frame (DWARF64) + pub const dwarf64_id = std.math.maxInt(u64); + + // Offset of the length field of this entry in the eh_frame section. + // This is the key that FDEs use to reference CIEs. + length_offset: u64, version: u8, - code_alignment_factor: u64, - data_alignment_factor: u64, - return_address_register: u64, - // Augmented data - lsda_pointer_enc: ?u8, - personality_routine_pointer: ?EhPointer, - fde_pointer_enc: ?u8, + code_alignment_factor: u32, + data_alignment_factor: i32, + return_address_register: u8, + aug_str: []const u8, + aug_data: []const u8, + lsda_pointer_enc: u8, + personality_enc: ?u8, + personality_routine_pointer: ?u64, + fde_pointer_enc: u8, initial_instructions: []const u8, - // The returned struct references memory in `bytes`. - pub fn parse(bytes: []const u8, addr_size_bytes: u8, endian: std.builtin.Endian) !CommonInformationEntry { - if (addr_size_bytes > 8) return error.InvalidAddrSize; - if (bytes.len < 4) return badDwarf(); - const length = mem.readInt(u32, bytes[0..4], endian); - const cie_bytes = bytes[4..][0..length]; + pub fn isSignalFrame(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'S') return true; + return false; + } + + pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'B') return true; + return false; + } + + pub fn mteTaggedFrame(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'G') return true; + return false; + } + + // The returned struct references memory backed by cie_bytes + pub fn parse( + cie_bytes: []const u8, + section_base: u64, + section_offset: u64, + length_offset: u64, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !CommonInformationEntry { + if (addr_size_bytes > 8) return error.UnsupportedAddrSize; var stream = io.fixedBufferStream(cie_bytes); const reader = stream.reader(); - const id = try reader.readInt(u32, endian); - if (id != 0) return badDwarf(); - const version = try reader.readByte(); - if (version != 1) return badDwarf(); + if (version != 1 and version != 3) return error.UnsupportedDwarfVersion; var has_eh_data = false; var has_aug_data = false; @@ -1575,76 +1686,153 @@ const CommonInformationEntry = struct { 'z' => { if (aug_str_len != 0) return badDwarf(); has_aug_data = true; - aug_str_start = stream.pos; }, 'e' => { if (has_aug_data or aug_str_len != 0) return badDwarf(); if (try reader.readByte() != 'h') return badDwarf(); has_eh_data = true; }, - else => { - if (has_eh_data) return badDwarf(); - aug_str_len += 1; - }, + else => if (has_eh_data) return badDwarf(), } + + aug_str_len += 1; } if (has_eh_data) { - // legacy data created by older versions of gcc - ignored here + // legacy data created by older versions of gcc - unsupported here for (0..addr_size_bytes) |_| _ = try reader.readByte(); } - const code_alignment_factor = try leb.readULEB128(u64, reader); - const data_alignment_factor = try leb.readULEB128(u64, reader); - const return_address_register = try leb.readULEB128(u64, reader); + const code_alignment_factor = try leb.readULEB128(u32, reader); + const data_alignment_factor = try leb.readILEB128(i32, reader); + const return_address_register = if (version == 1) try reader.readByte() else try leb.readULEB128(u8, reader); - var lsda_pointer_enc: ?u8 = null; - var personality_routine_pointer: ?EhPointer = null; - var fde_pointer_enc: ?u8 = null; + var lsda_pointer_enc: u8 = EH.PE.omit; + var personality_enc: ?u8 = null; + var personality_routine_pointer: ?u64 = null; + var fde_pointer_enc: u8 = EH.PE.absptr; - if (has_aug_data) { + var aug_data: []const u8 = &[_]u8{}; + const aug_str = if (has_aug_data) blk: { const aug_data_len = try leb.readULEB128(usize, reader); const aug_data_start = stream.pos; + aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; - for (aug_str) |byte| { + for (aug_str[1..]) |byte| { switch (byte) { 'L' => { lsda_pointer_enc = try reader.readByte(); }, 'P' => { - const personality_enc = try reader.readByte(); + personality_enc = try reader.readByte(); personality_routine_pointer = try readEhPointer( - personality_enc, - @ptrToInt(&cie_bytes[stream.pos]), + reader, + personality_enc.?, addr_size_bytes, + .{ .pc_rel_base = @ptrToInt(&cie_bytes[stream.pos]) - section_base + section_offset }, endian, - reader, ); }, 'R' => { fde_pointer_enc = try reader.readByte(); }, + 'S', 'B', 'G' => {}, else => return badDwarf(), } } - // verify length field - if (stream.pos != (aug_data_start + aug_data_len)) return badDwarf(); - } + // aug_data_len can include padding so the CIE ends on an address boundary + try stream.seekTo(aug_data_start + aug_data_len); + break :blk aug_str; + } else &[_]u8{}; const initial_instructions = cie_bytes[stream.pos..]; return .{ - .length = length, - .id = id, + .length_offset = length_offset, .version = version, .code_alignment_factor = code_alignment_factor, .data_alignment_factor = data_alignment_factor, .return_address_register = return_address_register, + .aug_str = aug_str, + .aug_data = aug_data, .lsda_pointer_enc = lsda_pointer_enc, + .personality_enc = personality_enc, .personality_routine_pointer = personality_routine_pointer, .fde_pointer_enc = fde_pointer_enc, .initial_instructions = initial_instructions, }; } }; + +pub const FrameDescriptionEntry = struct { + // Offset into eh_frame where the CIE for this FDE is stored + cie_length_offset: u64, + + pc_begin: u64, + pc_range: u64, + lsda_pointer: ?u64, + aug_data: []const u8, + instructions: []const u8, + + pub fn parse( + fde_bytes: []const u8, + section_base: u64, + section_offset: u64, + cie: CommonInformationEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !FrameDescriptionEntry { + if (addr_size_bytes > 8) return error.InvalidAddrSize; + + var stream = io.fixedBufferStream(fde_bytes); + const reader = stream.reader(); + + const pc_begin = try readEhPointer( + reader, + cie.fde_pointer_enc, + addr_size_bytes, + .{ .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset }, + endian, + ) orelse return badDwarf(); + + const pc_range = try readEhPointer( + reader, + cie.fde_pointer_enc & 0x0f, + addr_size_bytes, + .{ .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset }, + endian, + ) orelse return badDwarf(); + + var aug_data: []const u8 = &[_]u8{}; + const lsda_pointer = if (cie.aug_str.len > 0) blk: { + const aug_data_len = try leb.readULEB128(u64, reader); + const aug_data_start = stream.pos; + aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; + + const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) + try readEhPointer( + reader, + cie.lsda_pointer_enc & 0x0f, + addr_size_bytes, + .{ .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) }, + endian, + ) + else + null; + + try stream.seekTo(aug_data_start + aug_data_len); + break :blk lsda_pointer; + } else null; + + const instructions = fde_bytes[stream.pos..]; + return .{ + .cie_length_offset = cie.length_offset, + .pc_begin = pc_begin, + .pc_range = pc_range, + .lsda_pointer = lsda_pointer, + .aug_data = aug_data, + .instructions = instructions, + }; + } +}; diff --git a/lib/std/dwarf/EH.zig b/lib/std/dwarf/EH.zig index 2ffb1d166314..94d306fc08b1 100644 --- a/lib/std/dwarf/EH.zig +++ b/lib/std/dwarf/EH.zig @@ -1,5 +1,6 @@ pub const PE = struct { pub const absptr = 0x00; + pub const uleb128 = 0x01; pub const udata2 = 0x02; pub const udata4 = 0x03; @@ -14,6 +15,7 @@ pub const PE = struct { pub const datarel = 0x30; pub const funcrel = 0x40; pub const aligned = 0x50; + pub const indirect = 0x80; pub const omit = 0xff; }; From 8b8d6271371c66eb703bdbe6f13e0426c4c2436f Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 7 May 2023 20:00:54 -0400 Subject: [PATCH 04/81] - add call frame instruction parser - add register printing --- lib/std/dwarf.zig | 9 +- lib/std/dwarf/abi.zig | 54 ++++++++ lib/std/dwarf/call_frame.zig | 237 +++++++++++++++++++++++++++++++++++ 3 files changed, 298 insertions(+), 2 deletions(-) create mode 100644 lib/std/dwarf/abi.zig create mode 100644 lib/std/dwarf/call_frame.zig diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 16baa84c2888..144c12470e82 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -14,6 +14,8 @@ pub const LANG = @import("dwarf/LANG.zig"); pub const FORM = @import("dwarf/FORM.zig"); pub const ATE = @import("dwarf/ATE.zig"); pub const EH = @import("dwarf/EH.zig"); +pub const abi = @import("dwarf/abi.zig"); +pub const call_frame = @import("dwarf/call_frame.zig"); pub const LLE = struct { pub const end_of_list = 0x00; @@ -1490,7 +1492,8 @@ pub const DwarfInfo = struct { length = try reader.readInt(u64, di.endian); } - const entry_bytes = eh_frame[stream.pos..][0..length]; + const id_len = @as(u8, if (is_64) 8 else 4); + const entry_bytes = eh_frame[stream.pos..][0..length - id_len]; const id = try reader.readInt(u32, di.endian); // TODO: Get section_offset here (pass in from headers) @@ -1658,7 +1661,8 @@ pub const CommonInformationEntry = struct { return false; } - // The returned struct references memory backed by cie_bytes + // This function expects to read the CIE starting with the version field. + // The returned struct references memory backed by cie_bytes. pub fn parse( cie_bytes: []const u8, section_base: u64, @@ -1775,6 +1779,7 @@ pub const FrameDescriptionEntry = struct { aug_data: []const u8, instructions: []const u8, + // This function expects to read the FDE starting with the PC Begin field pub fn parse( fde_bytes: []const u8, section_base: u64, diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig new file mode 100644 index 000000000000..9ffb908b7387 --- /dev/null +++ b/lib/std/dwarf/abi.zig @@ -0,0 +1,54 @@ +const std = @import("../std.zig"); + +fn writeUnknownReg(writer: anytype, reg_number: u8) !void { + try writer.print("reg{}", .{ reg_number }); +} + +pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number: u8) !void { + if (arch) |a| { + switch (a) { + .x86_64 => { + switch (reg_number) { + 0 => try writer.writeAll("RAX"), + 1 => try writer.writeAll("RDX"), + 2 => try writer.writeAll("RCX"), + 3 => try writer.writeAll("RBX"), + 4 => try writer.writeAll("RSI"), + 5 => try writer.writeAll("RDI"), + 6 => try writer.writeAll("RBP"), + 7 => try writer.writeAll("RSP"), + 8...15 => try writer.print("R{}", .{ reg_number }), + 16 => try writer.writeAll("RIP"), + 17...32 => try writer.print("XMM{}", .{ reg_number - 17 }), + 33...40 => try writer.print("ST{}", .{ reg_number - 33 }), + 41...48 => try writer.print("MM{}", .{ reg_number - 41 }), + 49 => try writer.writeAll("RFLAGS"), + 50 => try writer.writeAll("ES"), + 51 => try writer.writeAll("CS"), + 52 => try writer.writeAll("SS"), + 53 => try writer.writeAll("DS"), + 54 => try writer.writeAll("FS"), + 55 => try writer.writeAll("GS"), + // 56-57 Reserved + 58 => try writer.writeAll("FS.BASE"), + 59 => try writer.writeAll("GS.BASE"), + // 60-61 Reserved + 62 => try writer.writeAll("TR"), + 63 => try writer.writeAll("LDTR"), + 64 => try writer.writeAll("MXCSR"), + 65 => try writer.writeAll("FCW"), + 66 => try writer.writeAll("FSW"), + 67...82 => try writer.print("XMM{}", .{ reg_number - 51 }), + // 83-117 Reserved + 118...125 => try writer.print("K{}", .{ reg_number - 118 }), + // 126-129 Reserved + else => try writeUnknownReg(writer, reg_number), + } + }, + + // TODO: Add x86, aarch64 + + else => try writeUnknownReg(writer, reg_number), + } + } else try writeUnknownReg(writer, reg_number); +} diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig new file mode 100644 index 000000000000..34bd82f7c9d5 --- /dev/null +++ b/lib/std/dwarf/call_frame.zig @@ -0,0 +1,237 @@ +const std = @import("../std.zig"); +const debug = std.debug; +const leb = @import("../leb128.zig"); +const abi = @import("abi.zig"); +const dwarf = @import("../dwarf.zig"); + +// These enum values correspond to the opcode encoding itself, with +// the exception of the opcodes that include data in the opcode itself. +// For those, the enum value is the opcode with the lower 6 bits (the data) masked to 0. +const Opcode = enum(u8) { + // These are placeholders that define the range of vendor-specific opcodes + const lo_user = 0x1c; + const hi_user = 0x3f; + + advance_loc = 0x1 << 6, + offset = 0x2 << 6, + restore = 0x3 << 6, + nop = 0x00, + set_loc = 0x01, + advance_loc1 = 0x02, + advance_loc2 = 0x03, + advance_loc4 = 0x04, + offset_extended = 0x05, + restore_extended = 0x06, + undefined = 0x07, + same_value = 0x08, + register = 0x09, + remember_state = 0x0a, + restore_state = 0x0b, + def_cfa = 0x0c, + def_cfa_register = 0x0d, + def_cfa_offset = 0x0e, + def_cfa_expression = 0x0f, + expression = 0x10, + offset_extended_sf = 0x11, + def_cfa_sf = 0x12, + def_cfa_offset_sf = 0x13, + val_offset = 0x14, + val_offset_sf = 0x15, + val_expression = 0x16, + + _, +}; + +const Operand = enum { + opcode_delta, + opcode_register, + uleb128_register, + uleb128_offset, + sleb128_offset, + address, + u8_delta, + u16_delta, + u32_delta, + block, + + fn Storage(comptime self: Operand) type { + return switch (self) { + .opcode_delta, .opcode_register => u6, + .uleb128_register => u8, + .uleb128_offset => u64, + .sleb128_offset => i64, + .address => u64, + .u8_delta => u8, + .u16_delta => u16, + .u32_delta => u32, + .block => []const u8, + }; + } + + fn read( + comptime self: Operand, + reader: anytype, + opcode_value: ?u6, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Storage(self) { + return switch (self) { + .opcode_delta, .opcode_register => opcode_value orelse return error.InvalidOperand, + .uleb128_register => try leb.readULEB128(u8, reader), + .uleb128_offset => try leb.readULEB128(u64, reader), + .sleb128_offset => try leb.readILEB128(i64, reader), + .address => switch (addr_size_bytes) { + 2 => try reader.readInt(u16, endian), + 4 => try reader.readInt(u32, endian), + 8 => try reader.readInt(u64, endian), + else => return error.InvalidAddrSize, + }, + .u8_delta => try reader.readByte(), + .u16_delta => try reader.readInt(u16, endian), + .u32_delta => try reader.readInt(u32, endian), + .block => { + const block_len = try leb.readULEB128(u64, reader); + + // TODO: This feels like a kludge, change to FixedBufferStream param? + const block = reader.context.buffer[reader.context.pos..][0..block_len]; + reader.context.pos += block_len; + + return block; + } + }; + } +}; + +fn InstructionType(comptime definition: anytype) type { + const definition_type = @typeInfo(@TypeOf(definition)); + debug.assert(definition_type == .Struct); + + const definition_len = definition_type.Struct.fields.len; + comptime var fields: [definition_len]std.builtin.Type.StructField = undefined; + inline for (definition_type.Struct.fields, &fields) |definition_field, *operands_field| { + const opcode = std.enums.nameCast(Operand, @field(definition, definition_field.name)); + const storage_type = opcode.Storage(); + operands_field.* = .{ + .name = definition_field.name, + .type = storage_type, + .default_value = null, + .is_comptime = false, + .alignment = @alignOf(storage_type), + }; + } + + const InstructionOperands = @Type(.{ + .Struct = .{ + .layout = .Auto, + .fields = &fields, + .decls = &.{}, + .is_tuple = false, + }, + }); + + return struct { + const Self = @This(); + operands: InstructionOperands, + + pub fn read(reader: anytype, opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian) !Self { + var operands: InstructionOperands = undefined; + inline for (definition_type.Struct.fields) |definition_field| { + const operand = comptime std.enums.nameCast(Operand, @field(definition, definition_field.name)); + @field(operands, definition_field.name) = try operand.read(reader, opcode_value, addr_size_bytes, endian); + } + + return .{ .operands = operands }; + } + }; +} + +pub const Instruction = union(Opcode) { + advance_loc: InstructionType(.{ .delta = .opcode_delta }), + offset: InstructionType(.{ .register = .opcode_register, .offset = .uleb128_offset }), + restore: InstructionType(.{ .register = .opcode_register }), + nop: InstructionType(.{}), + set_loc: InstructionType(.{ .address = .address }), + advance_loc1: InstructionType(.{ .delta = .u8_delta }), + advance_loc2: InstructionType(.{ .delta = .u16_delta }), + advance_loc4: InstructionType(.{ .delta = .u32_delta }), + offset_extended: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + restore_extended: InstructionType(.{ .register = .uleb128_register }), + undefined: InstructionType(.{ .register = .uleb128_register }), + same_value: InstructionType(.{ .register = .uleb128_register }), + register: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + remember_state: InstructionType(.{}), + restore_state: InstructionType(.{}), + def_cfa: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + def_cfa_register: InstructionType(.{ .register = .uleb128_register }), + def_cfa_offset: InstructionType(.{ .offset = .uleb128_offset }), + def_cfa_expression: InstructionType(.{ .block = .block }), + expression: InstructionType(.{ .register = .uleb128_register, .block = .block }), + offset_extended_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }), + def_cfa_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }), + def_cfa_offset_sf: InstructionType(.{ .offset = .sleb128_offset }), + val_offset: InstructionType(.{ .a = .uleb128_offset, .b = .uleb128_offset }), + val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }), + val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }), + + pub fn read(reader: anytype, addr_size_bytes: u8, endian: std.builtin.Endian) !Instruction { + const opcode = try reader.readByte(); + const upper = opcode & 0b11000000; + return switch (upper) { + inline @enumToInt(Opcode.advance_loc), @enumToInt(Opcode.offset), @enumToInt(Opcode.restore) => |u| @unionInit( + Instruction, + @tagName(@intToEnum(Opcode, u)), + try std.meta.TagPayload(Instruction, @intToEnum(Opcode, u)).read(reader, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian), + ), + 0 => blk: { + inline for (@typeInfo(Opcode).Enum.fields) |field| { + if (field.value == opcode) { + break :blk @unionInit( + Instruction, + @tagName(@intToEnum(Opcode, field.value)), + try std.meta.TagPayload(Instruction, @intToEnum(Opcode, field.value)).read(reader, null, addr_size_bytes, endian), + ); + } + } + break :blk error.UnknownOpcode; + }, + else => error.UnknownOpcode, + }; + } + + pub fn writeOperands(self: Instruction, writer: anytype, cie: dwarf.CommonInformationEntry, arch: ?std.Target.Cpu.Arch) !void { + switch (self) { + inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{ i.operands.delta * cie.code_alignment_factor }), + .offset => |i| { + try abi.writeRegisterName(writer, arch, i.operands.register); + try writer.print(" {}", .{ @intCast(i64, i.operands.offset) * cie.data_alignment_factor }); + }, + .restore => {}, + .nop => {}, + .set_loc => {}, + .offset_extended => {}, + .restore_extended => {}, + .undefined => {}, + .same_value => {}, + .register => {}, + .remember_state => {}, + .restore_state => {}, + .def_cfa => |i| { + try abi.writeRegisterName(writer, arch, i.operands.register); + try writer.print(" +{}", .{ i.operands.offset }); + }, + .def_cfa_register => {}, + .def_cfa_offset => {}, + .def_cfa_expression => |i| { + try writer.print("TODO parse expressions: {x}", .{ std.fmt.fmtSliceHexLower(i.operands.block) }); + }, + .expression => {}, + .offset_extended_sf => {}, + .def_cfa_sf => {}, + .def_cfa_offset_sf => {}, + .val_offset => {}, + .val_offset_sf => {}, + .val_expression => {}, + } + } + +}; From 38303d7b9cd3eb12c59636e84b9a411b07ad79af Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 8 May 2023 02:28:39 -0400 Subject: [PATCH 05/81] add VirtualMachine to run CFA instructions --- lib/std/dwarf/call_frame.zig | 163 ++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 3 deletions(-) diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 34bd82f7c9d5..467b5061b1ff 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -217,13 +217,170 @@ pub const Instruction = union(Opcode) { .restore_state => {}, .def_cfa => |i| { try abi.writeRegisterName(writer, arch, i.operands.register); - try writer.print(" +{}", .{ i.operands.offset }); + try writer.print(" {}", .{ fmtOffset(@intCast(i64, i.operands.offset)) }); }, .def_cfa_register => {}, - .def_cfa_offset => {}, + .def_cfa_offset => |i| { + try writer.print("{}", .{ fmtOffset(@intCast(i64, i.operands.offset)) }); + }, .def_cfa_expression => |i| { - try writer.print("TODO parse expressions: {x}", .{ std.fmt.fmtSliceHexLower(i.operands.block) }); + try writer.print("TODO(parse expressions data {x})", .{ std.fmt.fmtSliceHexLower(i.operands.block) }); + }, + .expression => {}, + .offset_extended_sf => {}, + .def_cfa_sf => {}, + .def_cfa_offset_sf => {}, + .val_offset => {}, + .val_offset_sf => {}, + .val_expression => {}, + } + } + +}; + + +fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + if (data >= 0) try writer.writeByte('+'); + return std.fmt.formatInt(data, 10, .lower, options, writer); +} + +fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) { + return .{ .data = offset }; +} + +/// See section 6.4.1 of the DWARF5 specification +pub const VirtualMachine = struct { + + const RegisterRule = union(enum) { + undefined: void, + same_value: void, + offset: i64, + val_offset: i64, + register: u8, + expression: []const u8, + val_expression: []const u8, + architectural: void, + }; + + const Column = struct { + register: u8 = undefined, + rule: RegisterRule = .{ .undefined = {} }, + + pub fn writeRule(self: Column, writer: anytype, is_cfa: bool, arch: ?std.Target.Cpu.Arch) !void { + if (is_cfa) { + try writer.writeAll("CFA"); + } else { + try abi.writeRegisterName(writer, arch, self.register); + } + + try writer.writeByte('='); + switch (self.rule) { + .undefined => {}, + .same_value => try writer.writeAll("S"), + .offset => |offset| { + if (is_cfa) { + try abi.writeRegisterName(writer, arch, self.register); + try writer.print("{}", .{ fmtOffset(offset) }); + } else { + try writer.print("[CFA{}]", .{ fmtOffset(offset) }); + } + }, + .val_offset => |offset| { + if (is_cfa) { + try abi.writeRegisterName(writer, arch, self.register); + try writer.print("{}", .{ fmtOffset(offset) }); + } else { + try writer.print("CFA{}", .{ fmtOffset(offset) }); + } + }, + .register => |register| try abi.writeRegisterName(writer, arch, register), + .expression => try writer.writeAll("TODO(expression)"), + .val_expression => try writer.writeAll("TODO(val_expression)"), + .architectural => try writer.writeAll("TODO(architectural)"), + } + } + }; + + pub const Row = struct { + /// Offset from pc_begin + offset: u64 = 0, + cfa: Column = .{}, + /// Index into `columns` of the first column in this row + columns_start: usize = undefined, + columns_len: u8 = 0, + }; + + rows: std.ArrayListUnmanaged(Row) = .{}, + columns: std.ArrayListUnmanaged(Column) = .{}, + current_row: Row = .{}, + + pub fn reset(self: *VirtualMachine) void { + self.rows.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + } + + pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { + self.rows.deinit(allocator); + self.columns.deinit(allocator); + self.* = undefined; + } + + pub fn getColumns(self: VirtualMachine, row: Row) []Column { + return self.columns.items[row.columns_start..][0..row.columns_len]; + } + + fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { + for (self.getColumns(self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns_len == 0) { + self.current_row.columns_start = self.columns.items.len; + } + self.current_row.columns_len += 1; + + const column = try self.columns.addOne(allocator); + column.* = .{ + .register = register, + }; + + return column; + } + + pub fn step(self: *VirtualMachine, allocator: std.mem.Allocator, cie: dwarf.CommonInformationEntry, instruction: Instruction) !void { + switch (instruction) { + inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| { + self.current_row.offset += i.operands.delta; + }, + .offset => |i| { + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ .offset = @intCast(i64, i.operands.offset) * cie.data_alignment_factor }; + }, + .restore => {}, + .nop => {}, + .set_loc => {}, + .offset_extended => {}, + .restore_extended => {}, + .undefined => {}, + .same_value => {}, + .register => {}, + .remember_state => {}, + .restore_state => {}, + .def_cfa => |i| { + self.current_row.cfa = .{ + .register = i.operands.register, + .rule = .{ .offset = @intCast(i64, i.operands.offset) }, + }; + }, + .def_cfa_register => {}, + .def_cfa_offset => |i| { + self.current_row.cfa.rule = .{ + .offset = @intCast(i64, i.operands.offset) + }; }, + .def_cfa_expression => {}, .expression => {}, .offset_extended_sf => {}, .def_cfa_sf => {}, From f3f3c877e0da444e6e5208d7c7179776ddb8ecd8 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 9 May 2023 01:35:22 -0400 Subject: [PATCH 06/81] - add DWARF expression parser - change read apis to use a stream - add register formatters --- lib/std/dwarf/abi.zig | 34 ++++- lib/std/dwarf/call_frame.zig | 237 ++++++++++++++++++++++++---------- lib/std/dwarf/expressions.zig | 197 ++++++++++++++++++++++++++++ 3 files changed, 395 insertions(+), 73 deletions(-) create mode 100644 lib/std/dwarf/expressions.zig diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 9ffb908b7387..4d73a5d23ef9 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -1,7 +1,7 @@ const std = @import("../std.zig"); fn writeUnknownReg(writer: anytype, reg_number: u8) !void { - try writer.print("reg{}", .{ reg_number }); + try writer.print("reg{}", .{reg_number}); } pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number: u8) !void { @@ -17,11 +17,11 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number 5 => try writer.writeAll("RDI"), 6 => try writer.writeAll("RBP"), 7 => try writer.writeAll("RSP"), - 8...15 => try writer.print("R{}", .{ reg_number }), + 8...15 => try writer.print("R{}", .{reg_number}), 16 => try writer.writeAll("RIP"), - 17...32 => try writer.print("XMM{}", .{ reg_number - 17 }), - 33...40 => try writer.print("ST{}", .{ reg_number - 33 }), - 41...48 => try writer.print("MM{}", .{ reg_number - 41 }), + 17...32 => try writer.print("XMM{}", .{reg_number - 17}), + 33...40 => try writer.print("ST{}", .{reg_number - 33}), + 41...48 => try writer.print("MM{}", .{reg_number - 41}), 49 => try writer.writeAll("RFLAGS"), 50 => try writer.writeAll("ES"), 51 => try writer.writeAll("CS"), @@ -38,9 +38,9 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number 64 => try writer.writeAll("MXCSR"), 65 => try writer.writeAll("FCW"), 66 => try writer.writeAll("FSW"), - 67...82 => try writer.print("XMM{}", .{ reg_number - 51 }), + 67...82 => try writer.print("XMM{}", .{reg_number - 51}), // 83-117 Reserved - 118...125 => try writer.print("K{}", .{ reg_number - 118 }), + 118...125 => try writer.print("K{}", .{reg_number - 118}), // 126-129 Reserved else => try writeUnknownReg(writer, reg_number), } @@ -52,3 +52,23 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number } } else try writeUnknownReg(writer, reg_number); } + +const FormatRegisterData = struct { + reg_number: u8, + arch: ?std.Target.Cpu.Arch, +}; + +pub fn formatRegister( + data: FormatRegisterData, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = fmt; + _ = options; + try writeRegisterName(writer, data.arch, data.reg_number); +} + +pub fn fmtRegister(reg_number: u8, arch: ?std.Target.Cpu.Arch) std.fmt.Formatter(formatRegister) { + return .{ .data = .{ .reg_number = reg_number, .arch = arch } }; +} diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 467b5061b1ff..a40094d608ed 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -3,18 +3,13 @@ const debug = std.debug; const leb = @import("../leb128.zig"); const abi = @import("abi.zig"); const dwarf = @import("../dwarf.zig"); +const expressions = @import("expressions.zig"); -// These enum values correspond to the opcode encoding itself, with -// the exception of the opcodes that include data in the opcode itself. -// For those, the enum value is the opcode with the lower 6 bits (the data) masked to 0. const Opcode = enum(u8) { - // These are placeholders that define the range of vendor-specific opcodes - const lo_user = 0x1c; - const hi_user = 0x3f; - advance_loc = 0x1 << 6, offset = 0x2 << 6, restore = 0x3 << 6, + nop = 0x00, set_loc = 0x01, advance_loc1 = 0x02, @@ -39,7 +34,17 @@ const Opcode = enum(u8) { val_offset_sf = 0x15, val_expression = 0x16, - _, + // These opcodes encode an operand in the lower 6 bits of the opcode itself + pub const lo_inline = Opcode.advance_loc; + pub const hi_inline = Opcode.restore; + + // These opcodes are trailed by zero or more operands + pub const lo_reserved = Opcode.nop; + pub const hi_reserved = Opcode.val_expression; + + // Vendor-specific opcodes + pub const lo_user = 0x1c; + pub const hi_user = 0x3f; }; const Operand = enum { @@ -70,11 +75,12 @@ const Operand = enum { fn read( comptime self: Operand, - reader: anytype, + stream: *std.io.FixedBufferStream([]const u8), opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian, ) !Storage(self) { + const reader = stream.reader(); return switch (self) { .opcode_delta, .opcode_register => opcode_value orelse return error.InvalidOperand, .uleb128_register => try leb.readULEB128(u8, reader), @@ -91,13 +97,13 @@ const Operand = enum { .u32_delta => try reader.readInt(u32, endian), .block => { const block_len = try leb.readULEB128(u64, reader); + if (stream.pos + block_len > stream.buffer.len) return error.InvalidOperand; - // TODO: This feels like a kludge, change to FixedBufferStream param? - const block = reader.context.buffer[reader.context.pos..][0..block_len]; + const block = stream.buffer[stream.pos..][0..block_len]; reader.context.pos += block_len; return block; - } + }, }; } }; @@ -133,11 +139,16 @@ fn InstructionType(comptime definition: anytype) type { const Self = @This(); operands: InstructionOperands, - pub fn read(reader: anytype, opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian) !Self { + pub fn read( + stream: *std.io.FixedBufferStream([]const u8), + opcode_value: ?u6, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Self { var operands: InstructionOperands = undefined; inline for (definition_type.Struct.fields) |definition_field| { const operand = comptime std.enums.nameCast(Operand, @field(definition, definition_field.name)); - @field(operands, definition_field.name) = try operand.read(reader, opcode_value, addr_size_bytes, endian); + @field(operands, definition_field.name) = try operand.read(stream, opcode_value, addr_size_bytes, endian); } return .{ .operands = operands }; @@ -173,37 +184,44 @@ pub const Instruction = union(Opcode) { val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }), val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }), - pub fn read(reader: anytype, addr_size_bytes: u8, endian: std.builtin.Endian) !Instruction { - const opcode = try reader.readByte(); - const upper = opcode & 0b11000000; - return switch (upper) { - inline @enumToInt(Opcode.advance_loc), @enumToInt(Opcode.offset), @enumToInt(Opcode.restore) => |u| @unionInit( - Instruction, - @tagName(@intToEnum(Opcode, u)), - try std.meta.TagPayload(Instruction, @intToEnum(Opcode, u)).read(reader, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian), - ), - 0 => blk: { - inline for (@typeInfo(Opcode).Enum.fields) |field| { - if (field.value == opcode) { - break :blk @unionInit( - Instruction, - @tagName(@intToEnum(Opcode, field.value)), - try std.meta.TagPayload(Instruction, @intToEnum(Opcode, field.value)).read(reader, null, addr_size_bytes, endian), - ); - } - } - break :blk error.UnknownOpcode; + pub fn read( + stream: *std.io.FixedBufferStream([]const u8), + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Instruction { + @setEvalBranchQuota(1800); + + return switch (try stream.reader().readByte()) { + inline @enumToInt(Opcode.lo_inline)...@enumToInt(Opcode.hi_inline) => |opcode| blk: { + const e = @intToEnum(Opcode, opcode & 0b11000000); + const payload_type = std.meta.TagPayload(Instruction, e); + const value = try payload_type.read(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian); + break :blk @unionInit(Instruction, @tagName(e), value); }, - else => error.UnknownOpcode, + inline @enumToInt(Opcode.lo_reserved)...@enumToInt(Opcode.hi_reserved) => |opcode| blk: { + const e = @intToEnum(Opcode, opcode); + const payload_type = std.meta.TagPayload(Instruction, e); + const value = try payload_type.read(stream, null, addr_size_bytes, endian); + break :blk @unionInit(Instruction, @tagName(e), value); + }, + Opcode.lo_user...Opcode.hi_user => error.UnimplementedUserOpcode, + else => error.InvalidOpcode, }; } - pub fn writeOperands(self: Instruction, writer: anytype, cie: dwarf.CommonInformationEntry, arch: ?std.Target.Cpu.Arch) !void { + pub fn writeOperands( + self: Instruction, + writer: anytype, + cie: dwarf.CommonInformationEntry, + arch: ?std.Target.Cpu.Arch, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !void { switch (self) { - inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{ i.operands.delta * cie.code_alignment_factor }), + inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{i.operands.delta * cie.code_alignment_factor}), .offset => |i| { try abi.writeRegisterName(writer, arch, i.operands.register); - try writer.print(" {}", .{ @intCast(i64, i.operands.offset) * cie.data_alignment_factor }); + try writer.print(" {}", .{@intCast(i64, i.operands.offset) * cie.data_alignment_factor}); }, .restore => {}, .nop => {}, @@ -217,14 +235,14 @@ pub const Instruction = union(Opcode) { .restore_state => {}, .def_cfa => |i| { try abi.writeRegisterName(writer, arch, i.operands.register); - try writer.print(" {}", .{ fmtOffset(@intCast(i64, i.operands.offset)) }); + try writer.print(" {d:<1}", .{@intCast(i64, i.operands.offset)}); }, .def_cfa_register => {}, .def_cfa_offset => |i| { - try writer.print("{}", .{ fmtOffset(@intCast(i64, i.operands.offset)) }); + try writer.print("{d:<1}", .{@intCast(i64, i.operands.offset)}); }, .def_cfa_expression => |i| { - try writer.print("TODO(parse expressions data {x})", .{ std.fmt.fmtSliceHexLower(i.operands.block) }); + try writeExpression(writer, i.operands.block, arch, addr_size_bytes, endian); }, .expression => {}, .offset_extended_sf => {}, @@ -235,23 +253,83 @@ pub const Instruction = union(Opcode) { .val_expression => {}, } } - }; +fn writeExpression( + writer: anytype, + block: []const u8, + arch: ?std.Target.Cpu.Arch, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + var stream = std.io.fixedBufferStream(block); + + // Generate a lookup table from opcode value to name + const opcode_lut_len = 256; + const opcode_lut: [opcode_lut_len]?[]const u8 = comptime blk: { + var lut: [opcode_lut_len]?[]const u8 = [_]?[]const u8{null} ** opcode_lut_len; + for (@typeInfo(dwarf.OP).Struct.decls) |decl| { + lut[@as(u8, @field(dwarf.OP, decl.name))] = decl.name; + } -fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - if (data >= 0) try writer.writeByte('+'); - return std.fmt.formatInt(data, 10, .lower, options, writer); -} + break :blk lut; + }; -fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) { - return .{ .data = offset }; + switch (endian) { + inline .Little, .Big => |e| { + switch (addr_size_bytes) { + inline 2, 4, 8 => |size| { + const StackMachine = expressions.StackMachine(.{ + .addr_size = size, + .endian = e, + .call_frame_mode = true, + }); + + const reader = stream.reader(); + while (stream.pos < stream.buffer.len) { + if (stream.pos > 0) try writer.writeAll(", "); + + const opcode = try reader.readByte(); + if (opcode_lut[opcode]) |opcode_name| { + try writer.print("DW_OP_{s}", .{opcode_name}); + } else { + // TODO: See how llvm-dwarfdump prints these? + if (opcode >= dwarf.OP.lo_user and opcode <= dwarf.OP.lo_user) { + try writer.print("", .{opcode}); + } else { + try writer.print("", .{opcode}); + } + } + + if (try StackMachine.readOperand(&stream, opcode)) |value| { + switch (value) { + //.generic => |v| try writer.print("{d}", .{v}), + .generic => {}, // Constant values are implied by the opcode name + .register => |v| try writer.print(" {}", .{ abi.fmtRegister(v, arch) }), + .base_register => |v| try writer.print(" {}{d:<1}", .{ abi.fmtRegister(v.base_register, arch), v.offset }), + else => try writer.print(" TODO({s})", .{@tagName(value)}), + } + } + } + }, + else => return error.InvalidAddrSize, + } + }, + } } +// fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { +// _ = fmt; +// if (data >= 0) try writer.writeByte('+'); +// return std.fmt.formatInt(data, 10, .lower, options, writer); +// } + +// fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) { +// return .{ .data = offset }; +// } + /// See section 6.4.1 of the DWARF5 specification pub const VirtualMachine = struct { - const RegisterRule = union(enum) { undefined: void, same_value: void, @@ -263,11 +341,18 @@ pub const VirtualMachine = struct { architectural: void, }; - const Column = struct { + pub const Column = struct { register: u8 = undefined, rule: RegisterRule = .{ .undefined = {} }, - pub fn writeRule(self: Column, writer: anytype, is_cfa: bool, arch: ?std.Target.Cpu.Arch) !void { + pub fn writeRule( + self: Column, + writer: anytype, + is_cfa: bool, + arch: ?std.Target.Cpu.Arch, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !void { if (is_cfa) { try writer.writeAll("CFA"); } else { @@ -281,48 +366,54 @@ pub const VirtualMachine = struct { .offset => |offset| { if (is_cfa) { try abi.writeRegisterName(writer, arch, self.register); - try writer.print("{}", .{ fmtOffset(offset) }); + try writer.print("{d:<1}", .{offset}); } else { - try writer.print("[CFA{}]", .{ fmtOffset(offset) }); + try writer.print("[CFA{d:<1}]", .{offset}); } }, .val_offset => |offset| { if (is_cfa) { try abi.writeRegisterName(writer, arch, self.register); - try writer.print("{}", .{ fmtOffset(offset) }); + try writer.print("{d:<1}", .{offset}); } else { - try writer.print("CFA{}", .{ fmtOffset(offset) }); + try writer.print("CFA{d:<1}", .{offset}); } }, .register => |register| try abi.writeRegisterName(writer, arch, register), - .expression => try writer.writeAll("TODO(expression)"), + .expression => |expression| try writeExpression(writer, expression, arch, addr_size_bytes, endian), .val_expression => try writer.writeAll("TODO(val_expression)"), .architectural => try writer.writeAll("TODO(architectural)"), } } }; + /// Each row contains unwinding rules for a set of registers at a specific location in the program. pub const Row = struct { /// Offset from pc_begin offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived + /// from, while other columns define registers in terms of the CFA. cfa: Column = .{}, - /// Index into `columns` of the first column in this row + /// Index into `columns` of the first column in this row. columns_start: usize = undefined, columns_len: u8 = 0, }; - rows: std.ArrayListUnmanaged(Row) = .{}, columns: std.ArrayListUnmanaged(Column) = .{}, + row_stack: std.ArrayListUnmanaged(Row) = .{}, current_row: Row = .{}, + // TODO: Add stack machine stack + pub fn reset(self: *VirtualMachine) void { - self.rows.clearRetainingCapacity(); + self.row_stack.clearRetainingCapacity(); self.columns.clearRetainingCapacity(); self.current_row = .{}; } pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.rows.deinit(allocator); + self.row_stack.deinit(allocator); self.columns.deinit(allocator); self.* = undefined; } @@ -366,8 +457,20 @@ pub const VirtualMachine = struct { .undefined => {}, .same_value => {}, .register => {}, - .remember_state => {}, - .restore_state => {}, + .remember_state => { + + // TODO: The row stack only actually needs the column information + // TODO: Also it needs to copy the columns because changes can edit the referenced columns + // TODO: This function could push the column range onto the stack, the copy the columns and update current row + + try self.row_stack.append(allocator, self.current_row); + }, + .restore_state => { + if (self.row_stack.items.len == 0) return error.InvalidOperation; + const row = self.row_stack.pop(); + self.current_row.columns_len = row.columns_len; + self.current_row.columns_start = row.columns_start; + }, .def_cfa => |i| { self.current_row.cfa = .{ .register = i.operands.register, @@ -376,11 +479,14 @@ pub const VirtualMachine = struct { }, .def_cfa_register => {}, .def_cfa_offset => |i| { + self.current_row.cfa.rule = .{ .offset = @intCast(i64, i.operands.offset) }; + }, + .def_cfa_expression => |i| { + self.current_row.cfa.register = undefined; self.current_row.cfa.rule = .{ - .offset = @intCast(i64, i.operands.offset) + .expression = i.operands.block, }; }, - .def_cfa_expression => {}, .expression => {}, .offset_extended_sf => {}, .def_cfa_sf => {}, @@ -390,5 +496,4 @@ pub const VirtualMachine = struct { .val_expression => {}, } } - }; diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig new file mode 100644 index 000000000000..dd838ecf963f --- /dev/null +++ b/lib/std/dwarf/expressions.zig @@ -0,0 +1,197 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const OP = @import("OP.zig"); +const leb = @import("../leb128.zig"); + +pub const StackMachineOptions = struct { + /// The address size of the target architecture + addr_size: u8 = @sizeOf(usize), + + /// Endianess of the target architecture + endian: std.builtin.Endian = .Little, + + /// Restrict the stack machine to a subset of opcodes used in call frame instructions + call_frame_mode: bool = false, +}; + +/// A stack machine that can decode and run DWARF expressions. +/// Expressions can be decoded for non-native address size and endianness, +/// but can only be executed if the current target matches the configuration. +pub fn StackMachine(comptime options: StackMachineOptions) type { + const addr_type = switch(options.addr_size) { + 2 => u16, + 4 => u32, + 8 => u64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + const addr_type_signed = switch(options.addr_size) { + 2 => i16, + 4 => i32, + 8 => i64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + return struct { + const Value = union(enum) { + generic: addr_type, + const_type: []const u8, + register: u8, + base_register: struct { + base_register: u8, + offset: i64, + }, + composite_location: struct { + size: u64, + offset: i64, + }, + block: []const u8, + base_type: struct { + type_offset: u64, + value_bytes: []const u8, + }, + deref_type: struct { + size: u8, + offset: u64, + }, + }; + + stack: std.ArrayListUnmanaged(Value) = .{}, + + fn generic(value: anytype) Value { + const int_info = @typeInfo(@TypeOf(value)).Int; + if (@sizeOf(@TypeOf(value)) > options.addr_size) { + return .{ + .generic = switch (int_info.signedness) { + .signed => @bitCast(addr_type, @truncate(addr_type_signed, value)), + .unsigned => @truncate(addr_type, value), + } + }; + } else { + return .{ + .generic = switch (int_info.signedness) { + .signed => @bitCast(addr_type, @intCast(addr_type_signed, value)), + .unsigned => @intCast(addr_type, value), + } + }; + } + } + + pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8) !?Value { + const reader = stream.reader(); + return switch (opcode) { + OP.addr, + OP.call_ref, + => generic(try reader.readInt(addr_type, options.endian)), + OP.const1u, + OP.pick, + OP.deref_size, + OP.xderef_size, + => generic(try reader.readByte()), + OP.const1s => generic(try reader.readByteSigned()), + OP.const2u, + OP.call2, + OP.call4, + => generic(try reader.readInt(u16, options.endian)), + OP.const2s, + OP.bra, + OP.skip, + => generic(try reader.readInt(i16, options.endian)), + OP.const4u => generic(try reader.readInt(u32, options.endian)), + OP.const4s => generic(try reader.readInt(i32, options.endian)), + OP.const8u => generic(try reader.readInt(u64, options.endian)), + OP.const8s => generic(try reader.readInt(i64, options.endian)), + OP.constu, + OP.plus_uconst, + OP.addrx, + OP.constx, + OP.convert, + OP.reinterpret, + => generic(try leb.readULEB128(u64, reader)), + OP.consts, + OP.fbreg, + => generic(try leb.readILEB128(i64, reader)), + OP.lit0...OP.lit31 => |n| generic(n - OP.lit0), + OP.reg0...OP.reg31 => |n| .{ .register = n - OP.reg0 }, + OP.breg0...OP.breg31 => |n| .{ + .base_register = .{ + .base_register = n - OP.breg0, + .offset = try leb.readILEB128(i64, reader), + } + }, + OP.regx => .{ .register = try leb.readULEB128(u8, reader) }, + OP.bregx, + OP.regval_type => .{ + .base_register = .{ + .base_register = try leb.readULEB128(u8, reader), + .offset = try leb.readILEB128(i64, reader), + } + }, + OP.piece => .{ + .composite_location = .{ + .size = try leb.readULEB128(u8, reader), + .offset = 0, + }, + }, + OP.bit_piece => .{ + .composite_location = .{ + .size = try leb.readULEB128(u8, reader), + .offset = try leb.readILEB128(i64, reader), + }, + }, + OP.implicit_value, + OP.entry_value + => blk: { + const size = try leb.readULEB128(u8, reader); + if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; + const block = stream.buffer[stream.pos..][0..size]; + stream.pos += size; + break :blk .{ + .block = block, + }; + }, + OP.const_type => blk: { + const type_offset = try leb.readULEB128(u8, reader); + const size = try reader.readByte(); + if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; + const value_bytes = stream.buffer[stream.pos..][0..size]; + stream.pos += size; + break :blk .{ + .base_type = .{ + .type_offset = type_offset, + .value_bytes = value_bytes, + } + }; + }, + OP.deref_type, + OP.xderef_type, + => .{ + .deref_type = .{ + .size = try reader.readByte(), + .offset = try leb.readULEB128(u64, reader), + }, + }, + OP.lo_user...OP.hi_user => return error.UnimplementedUserOpcode, + else => null, + }; + } + + pub fn step( + self: *StackMachine, + stream: std.io.FixedBufferStream([]const u8), + allocator: std.mem.Allocator, + ) !void { + if (@sizeOf(usize) != addr_type or options.endian != builtin.target.cpu.arch.endian()) + @compileError("Execution of non-native address sizees / endianness is not supported"); + + const opcode = try stream.reader.readByte(); + _ = opcode; + _ = self; + _ = allocator; + + // switch (opcode) { + // OP.addr => try self.stack.append(allocator, try readOperand(stream, opcode)), + // } + } + }; +} From 338df862d147986c797902706967b3dae7c95cd6 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 9 May 2023 02:24:57 -0400 Subject: [PATCH 07/81] - fix remember_state - implement def_cfa_register --- lib/std/dwarf/call_frame.zig | 83 +++++++++++++++++------------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index a40094d608ed..fd440d96505b 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -233,17 +233,10 @@ pub const Instruction = union(Opcode) { .register => {}, .remember_state => {}, .restore_state => {}, - .def_cfa => |i| { - try abi.writeRegisterName(writer, arch, i.operands.register); - try writer.print(" {d:<1}", .{@intCast(i64, i.operands.offset)}); - }, - .def_cfa_register => {}, - .def_cfa_offset => |i| { - try writer.print("{d:<1}", .{@intCast(i64, i.operands.offset)}); - }, - .def_cfa_expression => |i| { - try writeExpression(writer, i.operands.block, arch, addr_size_bytes, endian); - }, + .def_cfa => |i| try writer.print("{} {d:<1}", .{ abi.fmtRegister(i.operands.register, arch), @intCast(i64, i.operands.offset)}), + .def_cfa_register => |i| try abi.writeRegisterName(writer, arch, i.operands.register), + .def_cfa_offset => |i| try writer.print("{d:<1}", .{@intCast(i64, i.operands.offset)}), + .def_cfa_expression => |i| try writeExpression(writer, i.operands.block, arch, addr_size_bytes, endian), .expression => {}, .offset_extended_sf => {}, .def_cfa_sf => {}, @@ -318,16 +311,6 @@ fn writeExpression( } } -// fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { -// _ = fmt; -// if (data >= 0) try writer.writeByte('+'); -// return std.fmt.formatInt(data, 10, .lower, options, writer); -// } - -// fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) { -// return .{ .data = offset }; -// } - /// See section 6.4.1 of the DWARF5 specification pub const VirtualMachine = struct { const RegisterRule = union(enum) { @@ -387,7 +370,7 @@ pub const VirtualMachine = struct { } }; - /// Each row contains unwinding rules for a set of registers at a specific location in the program. + /// Each row contains unwinding rules for a set of registers at a specific location in the pub const Row = struct { /// Offset from pc_begin offset: u64 = 0, @@ -395,31 +378,33 @@ pub const VirtualMachine = struct { /// The register field of this column defines the register that CFA is derived /// from, while other columns define registers in terms of the CFA. cfa: Column = .{}, + columns: ColumnRange = .{}, + }; + + const ColumnRange = struct { /// Index into `columns` of the first column in this row. - columns_start: usize = undefined, - columns_len: u8 = 0, + start: usize = undefined, + len: u8 = 0, }; columns: std.ArrayListUnmanaged(Column) = .{}, - row_stack: std.ArrayListUnmanaged(Row) = .{}, + stack: std.ArrayListUnmanaged(ColumnRange) = .{}, current_row: Row = .{}, - // TODO: Add stack machine stack - pub fn reset(self: *VirtualMachine) void { - self.row_stack.clearRetainingCapacity(); + self.stack.clearRetainingCapacity(); self.columns.clearRetainingCapacity(); self.current_row = .{}; } pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.row_stack.deinit(allocator); + self.stack.deinit(allocator); self.columns.deinit(allocator); self.* = undefined; } pub fn getColumns(self: VirtualMachine, row: Row) []Column { - return self.columns.items[row.columns_start..][0..row.columns_len]; + return self.columns.items[row.columns.start..][0..row.columns.len]; } fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { @@ -427,10 +412,10 @@ pub const VirtualMachine = struct { if (c.register == register) return c; } - if (self.current_row.columns_len == 0) { - self.current_row.columns_start = self.columns.items.len; + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; } - self.current_row.columns_len += 1; + self.current_row.columns.len += 1; const column = try self.columns.addOne(allocator); column.* = .{ @@ -443,7 +428,7 @@ pub const VirtualMachine = struct { pub fn step(self: *VirtualMachine, allocator: std.mem.Allocator, cie: dwarf.CommonInformationEntry, instruction: Instruction) !void { switch (instruction) { inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| { - self.current_row.offset += i.operands.delta; + self.current_row.offset += i.operands.delta * cie.code_alignment_factor; }, .offset => |i| { const column = try self.getOrAddColumn(allocator, i.operands.register); @@ -458,18 +443,22 @@ pub const VirtualMachine = struct { .same_value => {}, .register => {}, .remember_state => { - - // TODO: The row stack only actually needs the column information - // TODO: Also it needs to copy the columns because changes can edit the referenced columns - // TODO: This function could push the column range onto the stack, the copy the columns and update current row - - try self.row_stack.append(allocator, self.current_row); + try self.stack.append(allocator, self.current_row.columns); + errdefer _ = self.stack.pop(); + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + // Since we're copying from the same backing array, ensure it won't be reallocated + try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.getColumns(self.current_row)); + self.current_row.columns.start = new_start; + } }, .restore_state => { - if (self.row_stack.items.len == 0) return error.InvalidOperation; - const row = self.row_stack.pop(); - self.current_row.columns_len = row.columns_len; - self.current_row.columns_start = row.columns_start; + // TODO: Is it possible to remove the duplicate from above? Other instructions may have added columns since then though + const columns = self.stack.popOrNull() orelse return error.InvalidOperation; + self.current_row.columns.len = columns.len; + self.current_row.columns.start = columns.start; }, .def_cfa => |i| { self.current_row.cfa = .{ @@ -477,8 +466,12 @@ pub const VirtualMachine = struct { .rule = .{ .offset = @intCast(i64, i.operands.offset) }, }; }, - .def_cfa_register => {}, + .def_cfa_register => |i| { + // TODO: Verify the the current row is using a register and offset (validation) + self.current_row.cfa.register = i.operands.register; + }, .def_cfa_offset => |i| { + // TODO: Verify the the current row is using a register and offset (validation) self.current_row.cfa.rule = .{ .offset = @intCast(i64, i.operands.offset) }; }, .def_cfa_expression => |i| { From 7b4611cfb3eb0c1c3f4fcae8c0a4beaf28473803 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 9 May 2023 22:33:31 -0400 Subject: [PATCH 08/81] - move writing code to zig-dwarfdump - implement restore_state, restore_extended, offset_extended --- lib/std/dwarf.zig | 1 + lib/std/dwarf/call_frame.zig | 269 ++++++++++++++--------------------- 2 files changed, 104 insertions(+), 166 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 144c12470e82..eb9f7024059c 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -16,6 +16,7 @@ pub const ATE = @import("dwarf/ATE.zig"); pub const EH = @import("dwarf/EH.zig"); pub const abi = @import("dwarf/abi.zig"); pub const call_frame = @import("dwarf/call_frame.zig"); +pub const expressions = @import("dwarf/expressions.zig"); pub const LLE = struct { pub const end_of_list = 0x00; diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index fd440d96505b..8220a74e9f8b 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -1,9 +1,11 @@ +const builtin = @import("builtin"); const std = @import("../std.zig"); const debug = std.debug; const leb = @import("../leb128.zig"); const abi = @import("abi.zig"); const dwarf = @import("../dwarf.zig"); const expressions = @import("expressions.zig"); +const assert = std.debug.assert; const Opcode = enum(u8) { advance_loc = 0x1 << 6, @@ -36,7 +38,7 @@ const Opcode = enum(u8) { // These opcodes encode an operand in the lower 6 bits of the opcode itself pub const lo_inline = Opcode.advance_loc; - pub const hi_inline = Opcode.restore; + pub const hi_inline = @enumToInt(Opcode.restore) | 0b111111; // These opcodes are trailed by zero or more operands pub const lo_reserved = Opcode.nop; @@ -61,7 +63,7 @@ const Operand = enum { fn Storage(comptime self: Operand) type { return switch (self) { - .opcode_delta, .opcode_register => u6, + .opcode_delta, .opcode_register => u8, .uleb128_register => u8, .uleb128_offset => u64, .sleb128_offset => i64, @@ -110,7 +112,7 @@ const Operand = enum { fn InstructionType(comptime definition: anytype) type { const definition_type = @typeInfo(@TypeOf(definition)); - debug.assert(definition_type == .Struct); + assert(definition_type == .Struct); const definition_len = definition_type.Struct.fields.len; comptime var fields: [definition_len]std.builtin.Type.StructField = undefined; @@ -159,14 +161,14 @@ fn InstructionType(comptime definition: anytype) type { pub const Instruction = union(Opcode) { advance_loc: InstructionType(.{ .delta = .opcode_delta }), offset: InstructionType(.{ .register = .opcode_register, .offset = .uleb128_offset }), + offset_extended: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), restore: InstructionType(.{ .register = .opcode_register }), + restore_extended: InstructionType(.{ .register = .uleb128_register }), nop: InstructionType(.{}), set_loc: InstructionType(.{ .address = .address }), advance_loc1: InstructionType(.{ .delta = .u8_delta }), advance_loc2: InstructionType(.{ .delta = .u16_delta }), advance_loc4: InstructionType(.{ .delta = .u32_delta }), - offset_extended: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), - restore_extended: InstructionType(.{ .register = .uleb128_register }), undefined: InstructionType(.{ .register = .uleb128_register }), same_value: InstructionType(.{ .register = .uleb128_register }), register: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), @@ -192,7 +194,7 @@ pub const Instruction = union(Opcode) { @setEvalBranchQuota(1800); return switch (try stream.reader().readByte()) { - inline @enumToInt(Opcode.lo_inline)...@enumToInt(Opcode.hi_inline) => |opcode| blk: { + inline @enumToInt(Opcode.lo_inline)...Opcode.hi_inline => |opcode| blk: { const e = @intToEnum(Opcode, opcode & 0b11000000); const payload_type = std.meta.TagPayload(Instruction, e); const value = try payload_type.read(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian); @@ -205,112 +207,15 @@ pub const Instruction = union(Opcode) { break :blk @unionInit(Instruction, @tagName(e), value); }, Opcode.lo_user...Opcode.hi_user => error.UnimplementedUserOpcode, - else => error.InvalidOpcode, - }; - } + else => |opcode| blk: { + std.debug.print("Opcode {x}\n", .{opcode}); - pub fn writeOperands( - self: Instruction, - writer: anytype, - cie: dwarf.CommonInformationEntry, - arch: ?std.Target.Cpu.Arch, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !void { - switch (self) { - inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{i.operands.delta * cie.code_alignment_factor}), - .offset => |i| { - try abi.writeRegisterName(writer, arch, i.operands.register); - try writer.print(" {}", .{@intCast(i64, i.operands.offset) * cie.data_alignment_factor}); + break :blk error.InvalidOpcode; }, - .restore => {}, - .nop => {}, - .set_loc => {}, - .offset_extended => {}, - .restore_extended => {}, - .undefined => {}, - .same_value => {}, - .register => {}, - .remember_state => {}, - .restore_state => {}, - .def_cfa => |i| try writer.print("{} {d:<1}", .{ abi.fmtRegister(i.operands.register, arch), @intCast(i64, i.operands.offset)}), - .def_cfa_register => |i| try abi.writeRegisterName(writer, arch, i.operands.register), - .def_cfa_offset => |i| try writer.print("{d:<1}", .{@intCast(i64, i.operands.offset)}), - .def_cfa_expression => |i| try writeExpression(writer, i.operands.block, arch, addr_size_bytes, endian), - .expression => {}, - .offset_extended_sf => {}, - .def_cfa_sf => {}, - .def_cfa_offset_sf => {}, - .val_offset => {}, - .val_offset_sf => {}, - .val_expression => {}, - } + }; } }; -fn writeExpression( - writer: anytype, - block: []const u8, - arch: ?std.Target.Cpu.Arch, - addr_size_bytes: u8, - endian: std.builtin.Endian, -) !void { - var stream = std.io.fixedBufferStream(block); - - // Generate a lookup table from opcode value to name - const opcode_lut_len = 256; - const opcode_lut: [opcode_lut_len]?[]const u8 = comptime blk: { - var lut: [opcode_lut_len]?[]const u8 = [_]?[]const u8{null} ** opcode_lut_len; - for (@typeInfo(dwarf.OP).Struct.decls) |decl| { - lut[@as(u8, @field(dwarf.OP, decl.name))] = decl.name; - } - - break :blk lut; - }; - - switch (endian) { - inline .Little, .Big => |e| { - switch (addr_size_bytes) { - inline 2, 4, 8 => |size| { - const StackMachine = expressions.StackMachine(.{ - .addr_size = size, - .endian = e, - .call_frame_mode = true, - }); - - const reader = stream.reader(); - while (stream.pos < stream.buffer.len) { - if (stream.pos > 0) try writer.writeAll(", "); - - const opcode = try reader.readByte(); - if (opcode_lut[opcode]) |opcode_name| { - try writer.print("DW_OP_{s}", .{opcode_name}); - } else { - // TODO: See how llvm-dwarfdump prints these? - if (opcode >= dwarf.OP.lo_user and opcode <= dwarf.OP.lo_user) { - try writer.print("", .{opcode}); - } else { - try writer.print("", .{opcode}); - } - } - - if (try StackMachine.readOperand(&stream, opcode)) |value| { - switch (value) { - //.generic => |v| try writer.print("{d}", .{v}), - .generic => {}, // Constant values are implied by the opcode name - .register => |v| try writer.print(" {}", .{ abi.fmtRegister(v, arch) }), - .base_register => |v| try writer.print(" {}{d:<1}", .{ abi.fmtRegister(v.base_register, arch), v.offset }), - else => try writer.print(" TODO({s})", .{@tagName(value)}), - } - } - } - }, - else => return error.InvalidAddrSize, - } - }, - } -} - /// See section 6.4.1 of the DWARF5 specification pub const VirtualMachine = struct { const RegisterRule = union(enum) { @@ -324,52 +229,6 @@ pub const VirtualMachine = struct { architectural: void, }; - pub const Column = struct { - register: u8 = undefined, - rule: RegisterRule = .{ .undefined = {} }, - - pub fn writeRule( - self: Column, - writer: anytype, - is_cfa: bool, - arch: ?std.Target.Cpu.Arch, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !void { - if (is_cfa) { - try writer.writeAll("CFA"); - } else { - try abi.writeRegisterName(writer, arch, self.register); - } - - try writer.writeByte('='); - switch (self.rule) { - .undefined => {}, - .same_value => try writer.writeAll("S"), - .offset => |offset| { - if (is_cfa) { - try abi.writeRegisterName(writer, arch, self.register); - try writer.print("{d:<1}", .{offset}); - } else { - try writer.print("[CFA{d:<1}]", .{offset}); - } - }, - .val_offset => |offset| { - if (is_cfa) { - try abi.writeRegisterName(writer, arch, self.register); - try writer.print("{d:<1}", .{offset}); - } else { - try writer.print("CFA{d:<1}", .{offset}); - } - }, - .register => |register| try abi.writeRegisterName(writer, arch, register), - .expression => |expression| try writeExpression(writer, expression, arch, addr_size_bytes, endian), - .val_expression => try writer.writeAll("TODO(val_expression)"), - .architectural => try writer.writeAll("TODO(architectural)"), - } - } - }; - /// Each row contains unwinding rules for a set of registers at a specific location in the pub const Row = struct { /// Offset from pc_begin @@ -381,6 +240,12 @@ pub const VirtualMachine = struct { columns: ColumnRange = .{}, }; + pub const Column = struct { + /// Register can only null in the case of the CFA column + register: ?u8 = null, + rule: RegisterRule = .{ .undefined = {} }, + }; + const ColumnRange = struct { /// Index into `columns` of the first column in this row. start: usize = undefined, @@ -391,10 +256,14 @@ pub const VirtualMachine = struct { stack: std.ArrayListUnmanaged(ColumnRange) = .{}, current_row: Row = .{}, + /// The result of executing the CIE's initial_instructions + cie_row: ?Row = null, + pub fn reset(self: *VirtualMachine) void { self.stack.clearRetainingCapacity(); self.columns.clearRetainingCapacity(); self.current_row = .{}; + self.cie_row = null; } pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { @@ -403,12 +272,14 @@ pub const VirtualMachine = struct { self.* = undefined; } - pub fn getColumns(self: VirtualMachine, row: Row) []Column { + /// Return a slice backed by the row's non-CFA columns + pub fn rowColumns(self: VirtualMachine, row: Row) []Column { return self.columns.items[row.columns.start..][0..row.columns.len]; } + /// Either retrieves or adds a column for `register` (non-CFA) in the current row fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { - for (self.getColumns(self.current_row)) |*c| { + for (self.rowColumns(self.current_row)) |*c| { if (c.register == register) return c; } @@ -425,20 +296,82 @@ pub const VirtualMachine = struct { return column; } - pub fn step(self: *VirtualMachine, allocator: std.mem.Allocator, cie: dwarf.CommonInformationEntry, instruction: Instruction) !void { + /// Runs the CIE instructions, then the FDE instructions. Execution halts + /// once the row that corresponds to `pc` is known, and it is returned. + pub fn unwindTo( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: dwarf.CommonInformationEntry, + fde: dwarf.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Row { + assert(self.cie_row == null); + if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; + + var prev_row: Row = self.current_row; + const streams = .{ + std.io.fixedBufferStream(cie.initial_instructions), + std.io.fixedBufferStream(fde.instructions), + }; + + outer: for (streams, 0..) |*stream, i| { + while (stream.pos < stream.buffer.len) { + const instruction = try dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); + prev_row = try self.step(allocator, cie, i == 0, instruction); + if (pc < fde.pc_begin + self.current_row.offset) { + break :outer; + } + } + } + + return prev_row; + } + + pub fn unwindToNative( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: dwarf.CommonInformationEntry, + fde: dwarf.FrameDescriptionEntry, + ) void { + self.stepTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian()); + } + + /// Executes a single instruction. + /// If this instruction is from the CIE, `is_initial` should be set. + /// Returns the value of `current_row` before executing this instruction + pub fn step( + self: *VirtualMachine, + allocator: std.mem.Allocator, + cie: dwarf.CommonInformationEntry, + is_initial: bool, + instruction: Instruction, + ) !Row { + // CIE instructions must be run before FDE instructions + assert(!is_initial or self.cie_row == null); + if (!is_initial and self.cie_row == null) self.cie_row = self.current_row; + + const prev_row = self.current_row; switch (instruction) { inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| { self.current_row.offset += i.operands.delta * cie.code_alignment_factor; }, - .offset => |i| { + inline .offset, .offset_extended => |i| { const column = try self.getOrAddColumn(allocator, i.operands.register); column.rule = .{ .offset = @intCast(i64, i.operands.offset) * cie.data_alignment_factor }; }, - .restore => {}, + inline .restore, .restore_extended => |i| { + if (self.cie_row) |cie_row| { + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = for (self.rowColumns(cie_row)) |cie_column| { + if (cie_column.register == i.operands.register) break cie_column.rule; + } else .{ .undefined = {} }; + } else return error.InvalidOperation; + }, .nop => {}, .set_loc => {}, - .offset_extended => {}, - .restore_extended => {}, .undefined => {}, .same_value => {}, .register => {}, @@ -448,17 +381,19 @@ pub const VirtualMachine = struct { const new_start = self.columns.items.len; if (self.current_row.columns.len > 0) { - // Since we're copying from the same backing array, ensure it won't be reallocated try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.getColumns(self.current_row)); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); self.current_row.columns.start = new_start; } }, .restore_state => { - // TODO: Is it possible to remove the duplicate from above? Other instructions may have added columns since then though - const columns = self.stack.popOrNull() orelse return error.InvalidOperation; - self.current_row.columns.len = columns.len; - self.current_row.columns.start = columns.start; + const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; + self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); + try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); + + self.current_row.columns.start = self.columns.items.len; + self.current_row.columns.len = restored_columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); }, .def_cfa => |i| { self.current_row.cfa = .{ @@ -488,5 +423,7 @@ pub const VirtualMachine = struct { .val_offset_sf => {}, .val_expression => {}, } + + return prev_row; } }; From a0a40c2e7e177bed7032ddd3d271016127ae9205 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 10 May 2023 00:48:48 -0400 Subject: [PATCH 09/81] - implement more opcodes --- lib/std/dwarf/call_frame.zig | 118 +++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 25 deletions(-) diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 8220a74e9f8b..0f10b7d45694 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -171,7 +171,7 @@ pub const Instruction = union(Opcode) { advance_loc4: InstructionType(.{ .delta = .u32_delta }), undefined: InstructionType(.{ .register = .uleb128_register }), same_value: InstructionType(.{ .register = .uleb128_register }), - register: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + register: InstructionType(.{ .register = .uleb128_register, .target_register = .uleb128_register }), remember_state: InstructionType(.{}), restore_state: InstructionType(.{}), def_cfa: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), @@ -229,15 +229,20 @@ pub const VirtualMachine = struct { architectural: void, }; - /// Each row contains unwinding rules for a set of registers at a specific location in the + /// Each row contains unwinding rules for a set of registers. pub const Row = struct { - /// Offset from pc_begin + /// Offset from `FrameDescriptionEntry.pc_begin` offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. /// The register field of this column defines the register that CFA is derived - /// from, while other columns define registers in terms of the CFA. + /// from, while other columns define register rules in terms of the CFA. cfa: Column = .{}, columns: ColumnRange = .{}, + + /// Indicates that the next write to any column in this row needs to copy + /// the backing column storage first. + copy_on_write: bool = false, }; pub const Column = struct { @@ -339,6 +344,17 @@ pub const VirtualMachine = struct { self.stepTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian()); } + fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { + if (!self.current_row.copy_on_write) return; + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + self.current_row.columns.start = new_start; + } + } + /// Executes a single instruction. /// If this instruction is from the CIE, `is_initial` should be set. /// Returns the value of `current_row` before executing this instruction @@ -355,14 +371,36 @@ pub const VirtualMachine = struct { const prev_row = self.current_row; switch (instruction) { - inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| { + .set_loc => |i| { + if (i.operands.address <= self.current_row.offset) return error.InvalidOperation; + // TODO: Check cie.segment_selector_size != for DWARFV4 + self.current_row.offset = i.operands.address; + }, + inline .advance_loc, + .advance_loc1, + .advance_loc2, + .advance_loc4, + => |i| { self.current_row.offset += i.operands.delta * cie.code_alignment_factor; + self.current_row.copy_on_write = true; }, - inline .offset, .offset_extended => |i| { + inline .offset, + .offset_extended, + .offset_extended_sf, + => |i| { + try self.resolveCopyOnWrite(allocator); const column = try self.getOrAddColumn(allocator, i.operands.register); column.rule = .{ .offset = @intCast(i64, i.operands.offset) * cie.data_alignment_factor }; }, - inline .restore, .restore_extended => |i| { + // .offset_extended_sf => |i| { + // try self.resolveCopyOnWrite(allocator); + // const column = try self.getOrAddColumn(allocator, i.operands.register); + // column.rule = .{ .offset = i.operands.offset * cie.data_alignment_factor }; + // }, + inline .restore, + .restore_extended, + => |i| { + try self.resolveCopyOnWrite(allocator); if (self.cie_row) |cie_row| { const column = try self.getOrAddColumn(allocator, i.operands.register); column.rule = for (self.rowColumns(cie_row)) |cie_column| { @@ -371,20 +409,31 @@ pub const VirtualMachine = struct { } else return error.InvalidOperation; }, .nop => {}, - .set_loc => {}, - .undefined => {}, - .same_value => {}, - .register => {}, + .undefined => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ .undefined = {} }; + }, + .same_value => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ .same_value = {} }; + }, + .register => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ .register = i.operands.target_register }; + }, .remember_state => { try self.stack.append(allocator, self.current_row.columns); - errdefer _ = self.stack.pop(); - - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; - } + self.current_row.copy_on_write = true; + + // const new_start = self.columns.items.len; + // if (self.current_row.columns.len > 0) { + // try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); + // self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + // self.current_row.columns.start = new_start; + // } }, .restore_state => { const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; @@ -396,29 +445,48 @@ pub const VirtualMachine = struct { self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); }, .def_cfa => |i| { + try self.resolveCopyOnWrite(allocator); self.current_row.cfa = .{ .register = i.operands.register, .rule = .{ .offset = @intCast(i64, i.operands.offset) }, }; }, + .def_cfa_sf => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.operands.register, + .rule = .{ .offset = i.operands.offset * cie.data_alignment_factor }, + }; + }, .def_cfa_register => |i| { - // TODO: Verify the the current row is using a register and offset (validation) + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .offset) return error.InvalidOperation; self.current_row.cfa.register = i.operands.register; }, .def_cfa_offset => |i| { - // TODO: Verify the the current row is using a register and offset (validation) + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .offset) return error.InvalidOperation; self.current_row.cfa.rule = .{ .offset = @intCast(i64, i.operands.offset) }; }, + .def_cfa_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ .offset = i.operands.offset * cie.data_alignment_factor }; + }, .def_cfa_expression => |i| { + try self.resolveCopyOnWrite(allocator); self.current_row.cfa.register = undefined; self.current_row.cfa.rule = .{ .expression = i.operands.block, }; }, - .expression => {}, - .offset_extended_sf => {}, - .def_cfa_sf => {}, - .def_cfa_offset_sf => {}, + .expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ + .expression = i.operands.block, + }; + }, .val_offset => {}, .val_offset_sf => {}, .val_expression => {}, From 69399fbb82ea74fce4fb6bbfec5ab2cbfa435c1a Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 11 May 2023 01:52:45 -0400 Subject: [PATCH 10/81] - add default register rule - fixup eh pointer decoding --- lib/std/dwarf.zig | 89 ++++++++++++++++++++++++------------ lib/std/dwarf/EH.zig | 6 +++ lib/std/dwarf/call_frame.zig | 53 +++++++++++---------- 3 files changed, 95 insertions(+), 53 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index eb9f7024059c..826fd3fca3b1 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1494,18 +1494,34 @@ pub const DwarfInfo = struct { } const id_len = @as(u8, if (is_64) 8 else 4); - const entry_bytes = eh_frame[stream.pos..][0..length - id_len]; + const entry_bytes = eh_frame[stream.pos..][0 .. length - id_len]; const id = try reader.readInt(u32, di.endian); // TODO: Get section_offset here (pass in from headers) if (id == 0) { - const cie = try CommonInformationEntry.parse(entry_bytes, @ptrToInt(eh_frame.ptr), 0, length_offset, @sizeOf(usize), di.endian); + const cie = try CommonInformationEntry.parse( + entry_bytes, + @ptrToInt(eh_frame.ptr), + 0, + true, + length_offset, + @sizeOf(usize), + di.endian, + ); try di.cie_map.put(allocator, length_offset, cie); } else { const cie_offset = stream.pos - 4 - id; const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); - const fde = try FrameDescriptionEntry.parse(entry_bytes, @ptrToInt(eh_frame.ptr), 0, cie, @sizeOf(usize), di.endian); + const fde = try FrameDescriptionEntry.parse( + entry_bytes, + @ptrToInt(eh_frame.ptr), + 0, + true, + cie, + @sizeOf(usize), + di.endian, + ); try di.fde_list.append(allocator, fde); } } @@ -1557,6 +1573,11 @@ const EhPointerContext = struct { // The address of the pointer field itself pc_rel_base: u64, + // Whether or not to follow indirect pointers. This should only be + // used when decoding pointers at runtime using the current process's + // debug info. + follow_indirect: bool, + // These relative addressing modes are only used in specific cases, and // might not be available / required in all parsing contexts data_rel_base: ?u64 = null, @@ -1570,7 +1591,7 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo const value: union(enum) { signed: i64, unsigned: u64, - } = switch (enc & 0x0f) { + } = switch (enc & EH.PE.type_mask) { EH.PE.absptr => .{ .unsigned = switch (addr_size_bytes) { 2 => try reader.readInt(u16, endian), @@ -1590,33 +1611,31 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo else => return badDwarf(), }; - const relative_to = enc & 0xf0; - var base = switch (relative_to) { + var base = switch (enc & EH.PE.rel_mask) { EH.PE.pcrel => ctx.pc_rel_base, EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.indirect => { - switch (addr_size_bytes) { - 2 => return @intToPtr(*const u16, value.unsigned).*, - 4 => return @intToPtr(*const u32, value.unsigned).*, - 8 => return @intToPtr(*const u64, value.unsigned).*, - else => return error.UnsupportedAddrSize, - } - }, else => null, }; - if (base) |b| { - return switch (value) { - .signed => |s| @intCast(u64, s + @intCast(i64, b)), - .unsigned => |u| u + b, + const ptr = if (base) |b| switch (value) { + .signed => |s| @intCast(u64, s + @intCast(i64, b)), + .unsigned => |u| u + b, + } else switch (value) { + .signed => |s| @intCast(u64, s), + .unsigned => |u| u, + }; + + if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { + return switch (addr_size_bytes) { + 2 => return @intToPtr(*const u16, ptr).*, + 4 => return @intToPtr(*const u32, ptr).*, + 8 => return @intToPtr(*const u64, ptr).*, + else => return error.UnsupportedAddrSize, }; } else { - return switch (value) { - .signed => |s| @intCast(u64, s), - .unsigned => |u| u, - }; + return ptr; } } @@ -1668,6 +1687,7 @@ pub const CommonInformationEntry = struct { cie_bytes: []const u8, section_base: u64, section_offset: u64, + is_runtime: bool, length_offset: u64, addr_size_bytes: u8, endian: std.builtin.Endian, @@ -1735,7 +1755,10 @@ pub const CommonInformationEntry = struct { reader, personality_enc.?, addr_size_bytes, - .{ .pc_rel_base = @ptrToInt(&cie_bytes[stream.pos]) - section_base + section_offset }, + .{ + .pc_rel_base = @ptrToInt(&cie_bytes[stream.pos]) - section_base + section_offset, + .follow_indirect = is_runtime, + }, endian, ); }, @@ -1785,6 +1808,7 @@ pub const FrameDescriptionEntry = struct { fde_bytes: []const u8, section_base: u64, section_offset: u64, + is_runtime: bool, cie: CommonInformationEntry, addr_size_bytes: u8, endian: std.builtin.Endian, @@ -1798,15 +1822,21 @@ pub const FrameDescriptionEntry = struct { reader, cie.fde_pointer_enc, addr_size_bytes, - .{ .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset }, + .{ + .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset, + .follow_indirect = is_runtime, + }, endian, ) orelse return badDwarf(); const pc_range = try readEhPointer( reader, - cie.fde_pointer_enc & 0x0f, + cie.fde_pointer_enc, addr_size_bytes, - .{ .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset }, + .{ + .pc_rel_base = 0, + .follow_indirect = false, + }, endian, ) orelse return badDwarf(); @@ -1819,9 +1849,12 @@ pub const FrameDescriptionEntry = struct { const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) try readEhPointer( reader, - cie.lsda_pointer_enc & 0x0f, + cie.lsda_pointer_enc, addr_size_bytes, - .{ .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) }, + .{ + .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset, + .follow_indirect = is_runtime, + }, endian, ) else diff --git a/lib/std/dwarf/EH.zig b/lib/std/dwarf/EH.zig index 94d306fc08b1..3ee7e0be0f07 100644 --- a/lib/std/dwarf/EH.zig +++ b/lib/std/dwarf/EH.zig @@ -1,6 +1,10 @@ pub const PE = struct { pub const absptr = 0x00; + pub const size_mask = 0x7; + pub const sign_mask = 0x8; + pub const type_mask = size_mask | sign_mask; + pub const uleb128 = 0x01; pub const udata2 = 0x02; pub const udata4 = 0x03; @@ -10,11 +14,13 @@ pub const PE = struct { pub const sdata4 = 0x0B; pub const sdata8 = 0x0C; + pub const rel_mask = 0x70; pub const pcrel = 0x10; pub const textrel = 0x20; pub const datarel = 0x30; pub const funcrel = 0x40; pub const aligned = 0x50; + pub const indirect = 0x80; pub const omit = 0xff; diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 0f10b7d45694..90f9458e4305 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -216,16 +216,35 @@ pub const Instruction = union(Opcode) { } }; -/// See section 6.4.1 of the DWARF5 specification +/// This is a virtual machine that runs DWARF call frame instructions. +/// See section 6.4.1 of the DWARF5 specification. pub const VirtualMachine = struct { + const RegisterRule = union(enum) { + // The spec says that the default rule for each column is the undefined rule. + // However, it also allows ABI / compiler authors to specify alternate defaults, so + // there is a distinction made here. + default: void, + undefined: void, same_value: void, + + // offset(N) offset: i64, + + // val_offset(N) val_offset: i64, + + // register(R) register: u8, + + // expression(E) expression: []const u8, + + // val_expression(E) val_expression: []const u8, + + // Augmenter-defined rule architectural: void, }; @@ -248,7 +267,7 @@ pub const VirtualMachine = struct { pub const Column = struct { /// Register can only null in the case of the CFA column register: ?u8 = null, - rule: RegisterRule = .{ .undefined = {} }, + rule: RegisterRule = .{ .default = {} }, }; const ColumnRange = struct { @@ -264,13 +283,6 @@ pub const VirtualMachine = struct { /// The result of executing the CIE's initial_instructions cie_row: ?Row = null, - pub fn reset(self: *VirtualMachine) void { - self.stack.clearRetainingCapacity(); - self.columns.clearRetainingCapacity(); - self.current_row = .{}; - self.cie_row = null; - } - pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { self.stack.deinit(allocator); self.columns.deinit(allocator); @@ -357,7 +369,7 @@ pub const VirtualMachine = struct { /// Executes a single instruction. /// If this instruction is from the CIE, `is_initial` should be set. - /// Returns the value of `current_row` before executing this instruction + /// Returns the value of `current_row` before executing this instruction. pub fn step( self: *VirtualMachine, allocator: std.mem.Allocator, @@ -367,13 +379,16 @@ pub const VirtualMachine = struct { ) !Row { // CIE instructions must be run before FDE instructions assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) self.cie_row = self.current_row; + if (!is_initial and self.cie_row == null) { + self.cie_row = self.current_row; + self.current_row.copy_on_write = true; + } const prev_row = self.current_row; switch (instruction) { .set_loc => |i| { if (i.operands.address <= self.current_row.offset) return error.InvalidOperation; - // TODO: Check cie.segment_selector_size != for DWARFV4 + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 self.current_row.offset = i.operands.address; }, inline .advance_loc, @@ -392,11 +407,6 @@ pub const VirtualMachine = struct { const column = try self.getOrAddColumn(allocator, i.operands.register); column.rule = .{ .offset = @intCast(i64, i.operands.offset) * cie.data_alignment_factor }; }, - // .offset_extended_sf => |i| { - // try self.resolveCopyOnWrite(allocator); - // const column = try self.getOrAddColumn(allocator, i.operands.register); - // column.rule = .{ .offset = i.operands.offset * cie.data_alignment_factor }; - // }, inline .restore, .restore_extended, => |i| { @@ -405,7 +415,7 @@ pub const VirtualMachine = struct { const column = try self.getOrAddColumn(allocator, i.operands.register); column.rule = for (self.rowColumns(cie_row)) |cie_column| { if (cie_column.register == i.operands.register) break cie_column.rule; - } else .{ .undefined = {} }; + } else .{ .default = {} }; } else return error.InvalidOperation; }, .nop => {}, @@ -427,13 +437,6 @@ pub const VirtualMachine = struct { .remember_state => { try self.stack.append(allocator, self.current_row.columns); self.current_row.copy_on_write = true; - - // const new_start = self.columns.items.len; - // if (self.current_row.columns.len > 0) { - // try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - // self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - // self.current_row.columns.start = new_start; - // } }, .restore_state => { const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; From b449d98a935a20429874d8eb379d9cc0e49c5fcd Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 15 May 2023 01:52:53 -0400 Subject: [PATCH 11/81] - rework StackIterator to optionally use debug_info to unwind the stack - add abi routines for getting register values - unwding is working! --- lib/std/debug.zig | 267 ++++++++++++++++++++++++----------- lib/std/dwarf.zig | 147 ++++++++++++++++--- lib/std/dwarf/abi.zig | 106 ++++++++++++++ lib/std/dwarf/call_frame.zig | 115 +++++++++++---- 4 files changed, 513 insertions(+), 122 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 5e72c4d05203..5e63bd97045b 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -135,8 +135,9 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { /// Tries to print the stack trace starting from the supplied base pointer to stderr, /// unbuffered, and ignores any error returned. +/// `context` is either *const os.ucontext_t on posix, or the result of CONTEXT.getRegs() on Windows. /// TODO multithreaded awareness -pub fn dumpStackTraceFromBase(bp: usize, ip: usize) void { +pub fn dumpStackTraceFromBase(context: anytype) void { nosuspend { if (comptime builtin.target.isWasm()) { if (native_os == .wasi) { @@ -156,12 +157,15 @@ pub fn dumpStackTraceFromBase(bp: usize, ip: usize) void { }; const tty_config = io.tty.detectConfig(io.getStdErr()); if (native_os == .windows) { - writeCurrentStackTraceWindows(stderr, debug_info, tty_config, ip) catch return; + writeCurrentStackTraceWindows(stderr, debug_info, tty_config, context.ip) catch return; return; } - printSourceAtAddress(debug_info, stderr, ip, tty_config) catch return; - var it = StackIterator.init(null, bp); + var it = StackIterator.initWithContext(null, debug_info, context) catch return; + + // TODO: Should `it.dwarf_context.pc` be `it.getIp()`? (but then the non-dwarf case has to store ip) + printSourceAtAddress(debug_info, stderr, it.dwarf_context.pc, tty_config) catch return; + while (it.next()) |return_address| { // On arm64 macOS, the address of the last frame is 0x0 rather than 0x1 as on x86_64 macOS, // therefore, we do a check for `return_address == 0` before subtracting 1 from it to avoid @@ -206,6 +210,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT } stack_trace.index = slice.len; } else { + // TODO: This should use the dwarf unwinder if it's available var it = StackIterator.init(first_address, null); for (stack_trace.instruction_addresses, 0..) |*addr, i| { addr.* = it.next() orelse { @@ -405,6 +410,11 @@ pub const StackIterator = struct { // Last known value of the frame pointer register. fp: usize, + // When DebugInfo and a register context is available, this iterator can unwind + // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). + debug_info: ?*DebugInfo, + dwarf_context: if (@hasDecl(os, "ucontext_t")) DW.UnwindContext else void = undefined, + pub fn init(first_address: ?usize, fp: ?usize) StackIterator { if (native_arch == .sparc64) { // Flush all the register windows on stack. @@ -416,9 +426,17 @@ pub const StackIterator = struct { return StackIterator{ .first_address = first_address, .fp = fp orelse @frameAddress(), + .debug_info = null, }; } + pub fn initWithContext(first_address: ?usize, debug_info: *DebugInfo, context: *const os.ucontext_t) !StackIterator { + var iterator = init(first_address, null); + iterator.debug_info = debug_info; + iterator.dwarf_context = try DW.UnwindContext.init(context); + return iterator; + } + // Offset of the saved BP wrt the frame pointer. const fp_offset = if (native_arch.isRISCV()) // On RISC-V the frame pointer points to the top of the saved register @@ -500,7 +518,28 @@ pub const StackIterator = struct { } } + fn next_dwarf(self: *StackIterator) !void { + const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); + if (module.getDwarfInfo()) |di| { + try di.unwindFrame(self.debug_info.?.allocator, &self.dwarf_context, module.base_address); + } else return error.MissingDebugInfo; + } + fn next_internal(self: *StackIterator) ?usize { + if (self.debug_info != null) { + if (self.next_dwarf()) |_| { + return self.dwarf_context.pc; + } else |err| { + // Fall back to fp unwinding on the first failure, + // as the register context won't be updated + self.fp = self.dwarf_context.getFp() catch 0; + self.debug_info = null; + + // TODO: Remove + print("\ndwarf unwind error {}, placing fp at 0x{x}\n\n", .{err, self.fp}); + } + } + const fp = if (comptime native_arch.isSPARC()) // On SPARC the offset is positive. (!) math.add(usize, self.fp, fp_offset) catch return null @@ -540,6 +579,8 @@ pub fn writeCurrentStackTrace( if (native_os == .windows) { return writeCurrentStackTraceWindows(out_stream, debug_info, tty_config, start_addr); } + + // TODO: Capture a context and use initWithContext var it = StackIterator.init(start_addr, null); while (it.next()) |return_address| { // On arm64 macOS, the address of the last frame is 0x0 rather than 0x1 as on x86_64 macOS, @@ -800,12 +841,14 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe // This coff file has embedded DWARF debug info _ = sec; - const num_sections = std.enums.directEnumArrayLen(DW.DwarfSection, 0); - var sections: [num_sections]?[]const u8 = [_]?[]const u8{null} ** num_sections; - errdefer for (sections) |section| if (section) |s| allocator.free(s); + var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { - sections[i] = try coff_obj.getSectionDataAlloc("." ++ section.name, allocator); + sections[i] = .{ + .data = try coff_obj.getSectionDataAlloc("." ++ section.name, allocator), + .owned = true, + }; } var dwarf = DW.DwarfInfo{ @@ -813,7 +856,7 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe .sections = sections, }; - try DW.openDwarfDebugInfo(&dwarf, allocator); + try DW.openDwarfDebugInfo(&dwarf, allocator, coff_bytes); di.debug_data = PdbOrDwarf{ .dwarf = dwarf }; return di; } @@ -854,6 +897,8 @@ pub fn readElfDebugInfo( elf_filename: ?[]const u8, build_id: ?[]const u8, expected_crc: ?u32, + parent_sections: *DW.DwarfInfo.SectionArray, + parent_mapped_mem: ?[]align(mem.page_size) const u8, ) !ModuleDebugInfo { nosuspend { @@ -891,10 +936,20 @@ pub fn readElfDebugInfo( @ptrCast(@alignCast(&mapped_mem[shoff])), )[0..hdr.e_shnum]; - const num_sections = std.enums.directEnumArrayLen(DW.DwarfSection, 0); - var sections: [num_sections]?[]const u8 = [_]?[]const u8{null} ** num_sections; - var owned_sections: [num_sections][]const u8 = [_][]const u8{&.{}} ** num_sections; - errdefer for (owned_sections) |section| allocator.free(section); + var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; + + // Take ownership over any owned sections from the parent scope + for (parent_sections, §ions) |*parent, *section| { + if (parent.*) |*p| { + section.* = p.*; + p.owned = false; + } + } + + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); + + // TODO: This function should take a ptr to GNU_EH_FRAME (which is .eh_frame_hdr) from the ELF headers + // and prefil sections[.eh_frame_hdr] var separate_debug_filename: ?[]const u8 = null; var separate_debug_crc: ?u32 = null; @@ -920,7 +975,7 @@ pub fn readElfDebugInfo( if (section_index == null) continue; const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) { + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { var section_stream = io.fixedBufferStream(section_bytes); var section_reader = section_stream.reader(); const chdr = section_reader.readStruct(elf.Chdr) catch continue; @@ -937,11 +992,14 @@ pub fn readElfDebugInfo( const read = zlib_stream.reader().readAll(decompressed_section) catch continue; assert(read == decompressed_section.len); - sections[section_index.?] = decompressed_section; - owned_sections[section_index.?] = decompressed_section; - } else { - sections[section_index.?] = section_bytes; - } + break :blk .{ + .data = decompressed_section, + .owned = true, + }; + } else .{ + .data = section_bytes, + .owned = false, + }; } const missing_debug_info = @@ -953,6 +1011,12 @@ pub fn readElfDebugInfo( // Attempt to load debug info from an external file // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + const global_debug_directories = [_][]const u8{ "/usr/lib/debug", }; @@ -977,8 +1041,9 @@ pub fn readElfDebugInfo( // TODO: joinBuf would be ideal (with a fs.MAX_PATH_BYTES buffer) const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); defer allocator.free(path); + // TODO: Remove std.debug.print(" Loading external debug info from {s}\n", .{path}); - return readElfDebugInfo(allocator, path, null, separate_debug_crc) catch continue; + return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; } } @@ -987,14 +1052,14 @@ pub fn readElfDebugInfo( if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; // / - if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc)) |debug_info| return debug_info else |_| {} + if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} // /.debug/ { const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc)) |debug_info| return debug_info else |_| {} + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} } var cwd_buf: [fs.MAX_PATH_BYTES]u8 = undefined; @@ -1004,7 +1069,7 @@ pub fn readElfDebugInfo( for (global_debug_directories) |global_directory| { const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc)) |debug_info| return debug_info else |_| {} + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} } } @@ -1016,13 +1081,13 @@ pub fn readElfDebugInfo( .sections = sections, }; - try DW.openDwarfDebugInfo(&di, allocator); + try DW.openDwarfDebugInfo(&di, allocator, parent_mapped_mem orelse mapped_mem); return ModuleDebugInfo{ .base_address = undefined, .dwarf = di, - .mapped_memory = mapped_mem, - .owned_sections = owned_sections, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, }; } } @@ -1426,7 +1491,8 @@ pub const DebugInfo = struct { for (phdrs) |*phdr| { if (phdr.p_type != elf.PT_LOAD) continue; - const seg_start = info.dlpi_addr + phdr.p_vaddr; + // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 + const seg_start = info.dlpi_addr +% phdr.p_vaddr; const seg_end = seg_start + phdr.p_memsz; if (context.address >= seg_start and context.address < seg_end) { // Android libc uses NULL instead of an empty string to mark the @@ -1437,6 +1503,8 @@ pub const DebugInfo = struct { } } else return; + // TODO: Look for the GNU_EH_FRAME section and pass it to readElfDebugInfo + for (info.dlpi_phdr[0..info.dlpi_phnum]) |phdr| { if (phdr.p_type != elf.PT_NOTE) continue; @@ -1447,7 +1515,7 @@ pub const DebugInfo = struct { const note_type = mem.readIntSliceNative(u32, note_bytes[8..12]); if (note_type != elf.NT_GNU_BUILD_ID) continue; if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.build_id = note_bytes[16 .. 16 + desc_size]; + context.build_id = note_bytes[16..][0..desc_size]; } // Stop the iteration @@ -1466,7 +1534,10 @@ pub const DebugInfo = struct { const obj_di = try self.allocator.create(ModuleDebugInfo); errdefer self.allocator.destroy(obj_di); - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null); + var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; + // TODO: If GNU_EH_FRAME was found, set it in sections + + obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); obj_di.base_address = ctx.base_address; try self.address_map.putNoClobber(ctx.base_address, obj_di); @@ -1491,6 +1562,7 @@ pub const ModuleDebugInfo = switch (native_os) { .macos, .ios, .watchos, .tvos => struct { base_address: usize, mapped_memory: []align(mem.page_size) const u8, + external_mapped_memory: ?[]align(mem.page_size) const u8, symbols: []const MachoSymbol, strings: [:0]const u8, ofiles: OFileTable, @@ -1511,6 +1583,7 @@ pub const ModuleDebugInfo = switch (native_os) { self.ofiles.deinit(); allocator.free(self.symbols); os.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| os.munmap(m); } fn loadOFile(self: *@This(), allocator: mem.Allocator, o_file_path: []const u8) !OFileInfo { @@ -1723,6 +1796,12 @@ pub const ModuleDebugInfo = switch (native_os) { unreachable; } } + + pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { + // TODO: Implement + _ = self; + return null; + } }, .uefi, .windows => struct { base_address: usize, @@ -1803,19 +1882,24 @@ pub const ModuleDebugInfo = switch (native_os) { .line_info = opt_line_info, }; } + + pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { + return switch (self.debug_data) { + .dwarf => |*dwarf| dwarf, + else => null, + }; + } }, .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris => struct { base_address: usize, dwarf: DW.DwarfInfo, mapped_memory: []align(mem.page_size) const u8, - owned_sections: [num_sections][]const u8 = [_][]const u8{&.{}} ** num_sections, - - const num_sections = 14; + external_mapped_memory: ?[]align(mem.page_size) const u8, fn deinit(self: *@This(), allocator: mem.Allocator) void { self.dwarf.deinit(allocator); - for (self.owned_sections) |section| allocator.free(section); os.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| os.munmap(m); } pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { @@ -1823,6 +1907,10 @@ pub const ModuleDebugInfo = switch (native_os) { const relocated_address = address - self.base_address; return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); } + + pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { + return &self.dwarf; + } }, .wasi => struct { fn deinit(self: *@This(), allocator: mem.Allocator) void { @@ -1836,6 +1924,11 @@ pub const ModuleDebugInfo = switch (native_os) { _ = address; return SymbolInfo{}; } + + pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { + _ = self; + return null; + } }, else => DW.DwarfInfo, }; @@ -1992,55 +2085,69 @@ fn dumpSegfaultInfoPosix(sig: i32, addr: usize, ctx_ptr: ?*const anyopaque) void } catch os.abort(); switch (native_arch) { - .x86 => { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = @as(usize, @intCast(ctx.mcontext.gregs[os.REG.EIP])); - const bp = @as(usize, @intCast(ctx.mcontext.gregs[os.REG.EBP])); - dumpStackTraceFromBase(bp, ip); - }, - .x86_64 => { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = switch (native_os) { - .linux, .netbsd, .solaris => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.RIP])), - .freebsd => @as(usize, @intCast(ctx.mcontext.rip)), - .openbsd => @as(usize, @intCast(ctx.sc_rip)), - .macos => @as(usize, @intCast(ctx.mcontext.ss.rip)), - else => unreachable, - }; - const bp = switch (native_os) { - .linux, .netbsd, .solaris => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.RBP])), - .openbsd => @as(usize, @intCast(ctx.sc_rbp)), - .freebsd => @as(usize, @intCast(ctx.mcontext.rbp)), - .macos => @as(usize, @intCast(ctx.mcontext.ss.rbp)), - else => unreachable, - }; - dumpStackTraceFromBase(bp, ip); - }, - .arm => { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = @as(usize, @intCast(ctx.mcontext.arm_pc)); - const bp = @as(usize, @intCast(ctx.mcontext.arm_fp)); - dumpStackTraceFromBase(bp, ip); - }, - .aarch64 => { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = switch (native_os) { - .macos => @as(usize, @intCast(ctx.mcontext.ss.pc)), - .netbsd => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.PC])), - .freebsd => @as(usize, @intCast(ctx.mcontext.gpregs.elr)), - else => @as(usize, @intCast(ctx.mcontext.pc)), - }; - // x29 is the ABI-designated frame pointer - const bp = switch (native_os) { - .macos => @as(usize, @intCast(ctx.mcontext.ss.fp)), - .netbsd => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.FP])), - .freebsd => @as(usize, @intCast(ctx.mcontext.gpregs.x[os.REG.FP])), - else => @as(usize, @intCast(ctx.mcontext.regs[29])), - }; - dumpStackTraceFromBase(bp, ip); + .x86, + .x86_64, + .arm, + .aarch64, + => { + const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + dumpStackTraceFromBase(ctx); }, else => {}, } + + // TODO: Move this logic to dwarf.abi.regBytes + + // switch (native_arch) { + // .x86 => { + // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + // const ip = @intCast(usize, ctx.mcontext.gregs[os.REG.EIP]) ; + // const bp = @intCast(usize, ctx.mcontext.gregs[os.REG.EBP]); + // dumpStackTraceFromBase(bp, ip); + // }, + // .x86_64 => { + // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + // const ip = switch (native_os) { + // .linux, .netbsd, .solaris => @intCast(usize, ctx.mcontext.gregs[os.REG.RIP]), + // .freebsd => @intCast(usize, ctx.mcontext.rip), + // .openbsd => @intCast(usize, ctx.sc_rip), + // .macos => @intCast(usize, ctx.mcontext.ss.rip), + // else => unreachable, + // }; + // const bp = switch (native_os) { + // .linux, .netbsd, .solaris => @intCast(usize, ctx.mcontext.gregs[os.REG.RBP]), + // .openbsd => @intCast(usize, ctx.sc_rbp), + // .freebsd => @intCast(usize, ctx.mcontext.rbp), + // .macos => @intCast(usize, ctx.mcontext.ss.rbp), + // else => unreachable, + // }; + // dumpStackTraceFromBase(bp, ip); + // }, + // .arm => { + // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + // const ip = @intCast(usize, ctx.mcontext.arm_pc); + // const bp = @intCast(usize, ctx.mcontext.arm_fp); + // dumpStackTraceFromBase(bp, ip); + // }, + // .aarch64 => { + // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + // const ip = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.pc), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), + // else => @intCast(usize, ctx.mcontext.pc), + // }; + // // x29 is the ABI-designated frame pointer + // const bp = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.fp), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), + // else => @intCast(usize, ctx.mcontext.regs[29]), + // }; + // dumpStackTraceFromBase(bp, ip); + // }, + // else => {}, + // } } fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(windows.WINAPI) c_long { @@ -2105,7 +2212,7 @@ fn dumpSegfaultInfoWindows(info: *windows.EXCEPTION_POINTERS, msg: u8, label: ?[ else => unreachable, } catch os.abort(); - dumpStackTraceFromBase(regs.bp, regs.ip); + dumpStackTraceFromBase(regs); } pub fn dumpStackPointerAddr(prefix: []const u8) void { diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 826fd3fca3b1..d36aceee93c6 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -3,6 +3,7 @@ const std = @import("std.zig"); const debug = std.debug; const fs = std.fs; const io = std.io; +const os = std.os; const mem = std.mem; const math = std.math; const leb = @import("leb128.zig"); @@ -664,10 +665,17 @@ pub const DwarfSection = enum { }; pub const DwarfInfo = struct { - endian: std.builtin.Endian, + pub const Section = struct { + data: []const u8, + owned: bool, + }; + + const num_sections = std.enums.directEnumArrayLen(DwarfSection, 0); + pub const SectionArray = [num_sections]?Section; + pub const null_section_array = [_]?Section{null} ** num_sections; - // No section memory is owned by the DwarfInfo - sections: [std.enums.directEnumArrayLen(DwarfSection, 0)]?[]const u8, + endian: std.builtin.Endian, + sections: SectionArray, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(AbbrevTableHeader) = .{}, @@ -679,10 +687,13 @@ pub const DwarfInfo = struct { fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{}, pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 { - return di.sections[@enumToInt(dwarf_section)]; + return if (di.sections[@enumToInt(dwarf_section)]) |s| s.data else null; } pub fn deinit(di: *DwarfInfo, allocator: mem.Allocator) void { + for (di.sections) |s| { + if (s.owned) allocator.free(s.data); + } for (di.abbrev_table_list.items) |*abbrev| { abbrev.deinit(); } @@ -696,6 +707,8 @@ pub const DwarfInfo = struct { func.deinit(allocator); } di.func_list.deinit(allocator); + di.cie_map.deinit(allocator); + di.fde_list.deinit(allocator); } pub fn getSymbolName(di: *DwarfInfo, address: u64) ?[]const u8 { @@ -1443,7 +1456,6 @@ pub const DwarfInfo = struct { return getStringGeneric(di.section(.debug_line_str), offset); } - fn readDebugAddr(di: DwarfInfo, compile_unit: CompileUnit, index: u64) !u64 { const debug_addr = di.section(.debug_addr) orelse return badDwarf(); @@ -1470,12 +1482,13 @@ pub const DwarfInfo = struct { }; } - pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator) !void { + pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator, binary_mem: []const u8) !void { var has_eh_frame_hdr = false; - if (di.section(.eh_frame)) |eh_frame_hdr| { + if (di.section(.eh_frame_hdr)) |eh_frame_hdr| { has_eh_frame_hdr = true; - // TODO: Parse this section + // TODO: Parse this section to get the lookup table, and skip loading the entire section + _ = eh_frame_hdr; } @@ -1494,16 +1507,14 @@ pub const DwarfInfo = struct { } const id_len = @as(u8, if (is_64) 8 else 4); + const id = if (is_64) try reader.readInt(u64, di.endian) else try reader.readInt(u32, di.endian); const entry_bytes = eh_frame[stream.pos..][0 .. length - id_len]; - const id = try reader.readInt(u32, di.endian); - - // TODO: Get section_offset here (pass in from headers) if (id == 0) { const cie = try CommonInformationEntry.parse( entry_bytes, @ptrToInt(eh_frame.ptr), - 0, + @ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), true, length_offset, @sizeOf(usize), @@ -1511,12 +1522,12 @@ pub const DwarfInfo = struct { ); try di.cie_map.put(allocator, length_offset, cie); } else { - const cie_offset = stream.pos - 4 - id; + const cie_offset = stream.pos - id_len - id; const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); const fde = try FrameDescriptionEntry.parse( entry_bytes, @ptrToInt(eh_frame.ptr), - 0, + @ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), true, cie, @sizeOf(usize), @@ -1524,6 +1535,8 @@ pub const DwarfInfo = struct { ); try di.fde_list.append(allocator, fde); } + + stream.pos += entry_bytes.len; } // TODO: Avoiding sorting if has_eh_frame_hdr exists @@ -1536,16 +1549,116 @@ pub const DwarfInfo = struct { } } + pub fn unwindFrame(di: *const DwarfInfo, allocator: mem.Allocator, context: *UnwindContext, module_base_address: usize) !void { + if (context.pc == 0) return; + + // TODO: Handle signal frame (ie. use_prev_instr in libunwind) + // TOOD: Use eh_frame_hdr to accelerate the search if available + //const eh_frame_hdr = di.section(.eh_frame_hdr) orelse return error.MissingDebugInfo; + + // Find the FDE + const unmapped_pc = context.pc - module_base_address; + const index = std.sort.binarySearch(FrameDescriptionEntry, unmapped_pc, di.fde_list.items, {}, struct { + pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { + if (pc < mid_item.pc_begin) { + return .lt; + } else { + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) { + return .eq; + } + + return .gt; + } + } + }.compareFn); + + const fde = if (index) |i| &di.fde_list.items[i] else return error.MissingFDE; + const cie = di.cie_map.getPtr(fde.cie_length_offset) orelse return error.MissingCIE; + + // const prev_cfa = context.cfa; + // const prev_pc = context.pc; + + // TODO: Cache this on self so we can re-use the allocations? + var vm = call_frame.VirtualMachine{}; + defer vm.deinit(allocator); + + const row = try vm.runToNative(allocator, unmapped_pc, cie.*, fde.*); + context.cfa = switch (row.cfa.rule) { + .val_offset => |offset| blk: { + const register = row.cfa.register orelse return error.InvalidCFARule; + const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, register)); + + // TODO: Check isValidMemory? + break :blk try call_frame.applyOffset(value, offset); + }, + .expression => |expression| { + + // TODO: Evaluate expression + _ = expression; + return error.UnimplementedTODO; + + }, + else => return error.InvalidCFARule, + }; + + // Update the context with the unwound values + // TODO: Need old cfa and pc? + + var next_ucontext = context.ucontext; + + var has_next_ip = false; + for (vm.rowColumns(row)) |column| { + if (column.register) |register| { + const dest = try abi.regBytes(&next_ucontext, register); + if (register == cie.return_address_register) { + has_next_ip = column.rule != .undefined; + } + + try column.resolveValue(context.*, dest); + } + } + + context.ucontext = next_ucontext; + + if (has_next_ip) { + context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, @enumToInt(abi.Register.ip))); + } else { + context.pc = 0; + } + + mem.writeIntSliceNative(usize, try abi.regBytes(&context.ucontext, @enumToInt(abi.Register.sp)), context.cfa.?); + } +}; + +pub const UnwindContext = struct { + cfa: ?usize, + pc: usize, + ucontext: os.ucontext_t, + + pub fn init(ucontext: *const os.ucontext_t) !UnwindContext { + const pc = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, @enumToInt(abi.Register.ip))); + return .{ + .cfa = null, + .pc = pc, + .ucontext = ucontext.*, + }; + } + + pub fn getFp(self: *const UnwindContext) !usize { + return mem.readIntSliceNative(usize, try abi.regBytes(&self.ucontext, @enumToInt(abi.Register.fp))); + } }; /// Initialize DWARF info. The caller has the responsibility to initialize most -/// the DwarfInfo fields before calling. -pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator) !void { +/// the DwarfInfo fields before calling. `binary_mem` is the raw bytes of the +/// main binary file (not the secondary debug info file). +pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator, binary_mem: []const u8) !void { try di.scanAllFunctions(allocator); try di.scanAllCompileUnits(allocator); // Unwind info is not required - di.scanAllUnwindInfo(allocator) catch {}; + di.scanAllUnwindInfo(allocator, binary_mem) catch {}; } /// This function is to make it handy to comment out the return and make it diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 4d73a5d23ef9..913743f0f877 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -1,4 +1,110 @@ +const builtin = @import("builtin"); const std = @import("../std.zig"); +const os = std.os; +const mem = std.mem; + +/// Maps register names to their DWARF register number. +/// `bp`, `ip`, and `sp` are provided as aliases. +pub const Register = switch (builtin.cpu.arch) { + .x86 => { + + //pub const ip = Register.eip; + //pub const sp = Register. + }, + .x86_64 => enum(u8) { + rax, + rdx, + rcx, + rbx, + rsi, + rdi, + rbp, + rsp, + r8, + r9, + r10, + r11, + r12, + r13, + r14, + r15, + rip, + xmm0, + xmm1, + xmm2, + xmm3, + xmm4, + xmm5, + xmm6, + xmm7, + xmm8, + xmm9, + xmm10, + xmm11, + xmm12, + xmm13, + xmm14, + xmm15, + + pub const fp = Register.rbp; + pub const ip = Register.rip; + pub const sp = Register.rsp; + }, + else => enum {}, +}; + +fn RegBytesReturnType(comptime ContextPtrType: type) type { + const info = @typeInfo(ContextPtrType); + if (info != .Pointer or info.Pointer.child != os.ucontext_t) { + @compileError("Expected a pointer to ucontext_t, got " ++ @typeName(@TypeOf(ContextPtrType))); + } + + return if (info.Pointer.is_const) return []const u8 else []u8; +} + +/// Returns a slice containing the backing storage for `reg_number` +pub fn regBytes(ucontext_ptr: anytype, reg_number: u8) !RegBytesReturnType(@TypeOf(ucontext_ptr)) { + var m = &ucontext_ptr.mcontext; + + return switch (builtin.cpu.arch) { + .x86_64 => switch (builtin.os.tag) { + .linux, .netbsd, .solaris => switch (reg_number) { + 0 => mem.asBytes(&m.gregs[os.REG.RAX]), + 1 => mem.asBytes(&m.gregs[os.REG.RDX]), + 2 => mem.asBytes(&m.gregs[os.REG.RCX]), + 3 => mem.asBytes(&m.gregs[os.REG.RBX]), + 4 => mem.asBytes(&m.gregs[os.REG.RSI]), + 5 => mem.asBytes(&m.gregs[os.REG.RDI]), + 6 => mem.asBytes(&m.gregs[os.REG.RBP]), + 7 => mem.asBytes(&m.gregs[os.REG.RSP]), + 8 => mem.asBytes(&m.gregs[os.REG.R8]), + 9 => mem.asBytes(&m.gregs[os.REG.R9]), + 10 => mem.asBytes(&m.gregs[os.REG.R10]), + 11 => mem.asBytes(&m.gregs[os.REG.R11]), + 12 => mem.asBytes(&m.gregs[os.REG.R12]), + 13 => mem.asBytes(&m.gregs[os.REG.R13]), + 14 => mem.asBytes(&m.gregs[os.REG.R14]), + 15 => mem.asBytes(&m.gregs[os.REG.R15]), + 16 => mem.asBytes(&m.gregs[os.REG.RIP]), + 17...32 => |i| mem.asBytes(&m.fpregs.xmm[i - 17]), + else => error.InvalidRegister, + }, + //.freebsd => @intCast(usize, ctx.mcontext.rip), + //.openbsd => @intCast(usize, ctx.sc_rip), + //.macos => @intCast(usize, ctx.mcontext.ss.rip), + else => error.UnimplementedOs, + }, + else => error.UnimplementedArch, + }; +} + +/// Returns the ABI-defined default value this register has in the unwinding table +/// before running any of the CIE instructions. +pub fn getRegDefaultValue(reg_number: u8, out: []u8) void { + // TODO: Implement any ABI-specific rules for the default value for registers + _ = reg_number; + @memset(out, undefined); +} fn writeUnknownReg(writer: anytype, reg_number: u8) !void { try writer.print("reg{}", .{reg_number}); diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 90f9458e4305..e9761206ed1d 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -1,5 +1,6 @@ const builtin = @import("builtin"); const std = @import("../std.zig"); +const mem = std.mem; const debug = std.debug; const leb = @import("../leb128.zig"); const abi = @import("abi.zig"); @@ -216,10 +217,19 @@ pub const Instruction = union(Opcode) { } }; +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +pub fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @intCast(usize, offset)) + else + try std.math.sub(usize, base, @intCast(usize, -offset)); +} + /// This is a virtual machine that runs DWARF call frame instructions. -/// See section 6.4.1 of the DWARF5 specification. pub const VirtualMachine = struct { - + /// See section 6.4.1 of the DWARF5 specification for details on each const RegisterRule = union(enum) { // The spec says that the default rule for each column is the undefined rule. // However, it also allows ABI / compiler authors to specify alternate defaults, so @@ -254,20 +264,63 @@ pub const VirtualMachine = struct { offset: u64 = 0, /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived - /// from, while other columns define register rules in terms of the CFA. + /// The register field of this column defines the register that CFA is derived from. cfa: Column = .{}, + + /// The register fields in these columns define the register the rule applies to. columns: ColumnRange = .{}, /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first. + /// the backing column storage first, as it may be referenced by previous rows. copy_on_write: bool = false, }; pub const Column = struct { - /// Register can only null in the case of the CFA column register: ?u8 = null, rule: RegisterRule = .{ .default = {} }, + + /// Resolves the register rule and places the result into `out` (see dwarf.abi.regBytes) + pub fn resolveValue(self: Column, context: dwarf.UnwindContext, out: []u8) !void { + switch (self.rule) { + .default => { + const register = self.register orelse return error.InvalidRegister; + abi.getRegDefaultValue(register, out); + }, + .undefined => { + @memset(out, undefined); + }, + .same_value => {}, + .offset => |offset| { + if (context.cfa) |cfa| { + const ptr = @intToPtr(*const usize, try applyOffset(cfa, offset)); + + // TODO: context.isValidMemory(ptr) + mem.writeIntSliceNative(usize, out, ptr.*); + } else return error.InvalidCFA; + }, + .val_offset => |offset| { + if (context.cfa) |cfa| { + mem.writeIntSliceNative(usize, out, try applyOffset(cfa, offset)); + } else return error.InvalidCFA; + }, + .register => |register| { + const src = try abi.regBytes(&context.ucontext, register); + if (src.len != out.len) return error.RegisterTypeMismatch; + @memcpy(out, try abi.regBytes(&context.ucontext, register)); + }, + .expression => |expression| { + // TODO + _ = expression; + unreachable; + }, + .val_expression => |expression| { + // TODO + _ = expression; + unreachable; + }, + .architectural => return error.UnimplementedRule, + } + } }; const ColumnRange = struct { @@ -294,7 +347,7 @@ pub const VirtualMachine = struct { return self.columns.items[row.columns.start..][0..row.columns.len]; } - /// Either retrieves or adds a column for `register` (non-CFA) in the current row + /// Either retrieves or adds a column for `register` (non-CFA) in the current row. fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { for (self.rowColumns(self.current_row)) |*c| { if (c.register == register) return c; @@ -315,7 +368,7 @@ pub const VirtualMachine = struct { /// Runs the CIE instructions, then the FDE instructions. Execution halts /// once the row that corresponds to `pc` is known, and it is returned. - pub fn unwindTo( + pub fn runTo( self: *VirtualMachine, allocator: std.mem.Allocator, pc: u64, @@ -328,12 +381,15 @@ pub const VirtualMachine = struct { if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; var prev_row: Row = self.current_row; - const streams = .{ - std.io.fixedBufferStream(cie.initial_instructions), - std.io.fixedBufferStream(fde.instructions), + + var cie_stream = std.io.fixedBufferStream(cie.initial_instructions); + var fde_stream = std.io.fixedBufferStream(fde.instructions); + var streams = [_]*std.io.FixedBufferStream([]const u8){ + &cie_stream, + &fde_stream, }; - outer: for (streams, 0..) |*stream, i| { + outer: for (&streams, 0..) |stream, i| { while (stream.pos < stream.buffer.len) { const instruction = try dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); prev_row = try self.step(allocator, cie, i == 0, instruction); @@ -346,14 +402,14 @@ pub const VirtualMachine = struct { return prev_row; } - pub fn unwindToNative( + pub fn runToNative( self: *VirtualMachine, allocator: std.mem.Allocator, pc: u64, cie: dwarf.CommonInformationEntry, fde: dwarf.FrameDescriptionEntry, - ) void { - self.stepTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian()); + ) !Row { + return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian()); } fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { @@ -451,30 +507,30 @@ pub const VirtualMachine = struct { try self.resolveCopyOnWrite(allocator); self.current_row.cfa = .{ .register = i.operands.register, - .rule = .{ .offset = @intCast(i64, i.operands.offset) }, + .rule = .{ .val_offset = @intCast(i64, i.operands.offset) }, }; }, .def_cfa_sf => |i| { try self.resolveCopyOnWrite(allocator); self.current_row.cfa = .{ .register = i.operands.register, - .rule = .{ .offset = i.operands.offset * cie.data_alignment_factor }, + .rule = .{ .val_offset = i.operands.offset * cie.data_alignment_factor }, }; }, .def_cfa_register => |i| { try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .offset) return error.InvalidOperation; + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; self.current_row.cfa.register = i.operands.register; }, .def_cfa_offset => |i| { try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ .offset = @intCast(i64, i.operands.offset) }; + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ .val_offset = @intCast(i64, i.operands.offset) }; }, .def_cfa_offset_sf => |i| { try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ .offset = i.operands.offset * cie.data_alignment_factor }; + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ .val_offset = i.operands.offset * cie.data_alignment_factor }; }, .def_cfa_expression => |i| { try self.resolveCopyOnWrite(allocator); @@ -490,9 +546,18 @@ pub const VirtualMachine = struct { .expression = i.operands.block, }; }, - .val_offset => {}, - .val_offset_sf => {}, - .val_expression => {}, + .val_offset => { + // TODO: Implement + unreachable; + }, + .val_offset_sf => { + // TODO: Implement + unreachable; + }, + .val_expression => { + // TODO: Implement + unreachable; + }, } return prev_row; From 9145ff7da073966ace27151f7a0921b20c7860f4 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 22 May 2023 14:45:24 -0400 Subject: [PATCH 12/81] dwarf: implement more register number mappings - add dwarf.abi.RegisterContext to handle register numbers changing based on DWARF format --- lib/std/debug.zig | 12 ++ lib/std/dwarf.zig | 24 ++-- lib/std/dwarf/abi.zig | 267 ++++++++++++++++++++++++++--------- lib/std/dwarf/call_frame.zig | 4 +- 4 files changed, 226 insertions(+), 81 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 5e63bd97045b..66dfdc183814 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -521,6 +521,8 @@ pub const StackIterator = struct { fn next_dwarf(self: *StackIterator) !void { const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); if (module.getDwarfInfo()) |di| { + self.dwarf_context.reg_ctx.eh_frame = true; + self.dwarf_context.reg_ctx.is_macho = di.is_macho; try di.unwindFrame(self.debug_info.?.allocator, &self.dwarf_context, module.base_address); } else return error.MissingDebugInfo; } @@ -532,6 +534,10 @@ pub const StackIterator = struct { } else |err| { // Fall back to fp unwinding on the first failure, // as the register context won't be updated + + // TODO: Could still attempt dwarf unwinding after this, maybe marking non-updated registers as + // invalid, so the unwind only fails if it requires out of date registers? + self.fp = self.dwarf_context.getFp() catch 0; self.debug_info = null; @@ -854,6 +860,7 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe var dwarf = DW.DwarfInfo{ .endian = native_endian, .sections = sections, + .is_macho = false, }; try DW.openDwarfDebugInfo(&dwarf, allocator, coff_bytes); @@ -1079,6 +1086,7 @@ pub fn readElfDebugInfo( var di = DW.DwarfInfo{ .endian = endian, .sections = sections, + .is_macho = false, }; try DW.openDwarfDebugInfo(&di, allocator, parent_mapped_mem orelse mapped_mem); @@ -1682,6 +1690,10 @@ pub const ModuleDebugInfo = switch (native_os) { var di = DW.DwarfInfo{ .endian = .Little, + .is_macho = true, + + // TODO: Get this compiling + .debug_info = try chopSlice(mapped_mem, debug_info.offset, debug_info.size), .debug_abbrev = try chopSlice(mapped_mem, debug_abbrev.offset, debug_abbrev.size), .debug_str = try chopSlice(mapped_mem, debug_str.offset, debug_str.size), diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index d36aceee93c6..99c26051c888 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -686,6 +686,8 @@ pub const DwarfInfo = struct { // Sorted by start_pc fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{}, + is_macho: bool, + pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 { return if (di.sections[@enumToInt(dwarf_section)]) |s| s.data else null; } @@ -712,6 +714,7 @@ pub const DwarfInfo = struct { } pub fn getSymbolName(di: *DwarfInfo, address: u64) ?[]const u8 { + // TODO: Can this be binary searched? for (di.func_list.items) |*func| { if (func.pc_range) |range| { if (address >= range.start and address < range.end) { @@ -853,6 +856,9 @@ pub const DwarfInfo = struct { } }; + // TODO: Debug issue where `puts` in Ubuntu's libc was not found + //if (fn_name != null and pc_range != null) debug.print("func_list: {s} -> 0x{x}-0x{x}\n", .{fn_name.?, pc_range.?.start, pc_range.?.end}); + try di.func_list.append(allocator, Func{ .name = fn_name, .pc_range = pc_range, @@ -1587,7 +1593,7 @@ pub const DwarfInfo = struct { context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; - const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, register)); + const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, register, context.reg_ctx)); // TODO: Check isValidMemory? break :blk try call_frame.applyOffset(value, offset); @@ -1602,15 +1608,13 @@ pub const DwarfInfo = struct { else => return error.InvalidCFARule, }; - // Update the context with the unwound values - // TODO: Need old cfa and pc? - + // Update the context with the previous frame's values var next_ucontext = context.ucontext; var has_next_ip = false; for (vm.rowColumns(row)) |column| { if (column.register) |register| { - const dest = try abi.regBytes(&next_ucontext, register); + const dest = try abi.regBytes(&next_ucontext, register, context.reg_ctx); if (register == cie.return_address_register) { has_next_ip = column.rule != .undefined; } @@ -1622,12 +1626,12 @@ pub const DwarfInfo = struct { context.ucontext = next_ucontext; if (has_next_ip) { - context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, @enumToInt(abi.Register.ip))); + context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, abi.ipRegNum(), context.reg_ctx)); } else { context.pc = 0; } - mem.writeIntSliceNative(usize, try abi.regBytes(&context.ucontext, @enumToInt(abi.Register.sp)), context.cfa.?); + mem.writeIntSliceNative(usize, try abi.regBytes(&context.ucontext, abi.spRegNum(context.reg_ctx), context.reg_ctx), context.cfa.?); } }; @@ -1635,18 +1639,20 @@ pub const UnwindContext = struct { cfa: ?usize, pc: usize, ucontext: os.ucontext_t, + reg_ctx: abi.RegisterContext, pub fn init(ucontext: *const os.ucontext_t) !UnwindContext { - const pc = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, @enumToInt(abi.Register.ip))); + const pc = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, abi.ipRegNum(), null)); return .{ .cfa = null, .pc = pc, .ucontext = ucontext.*, + .reg_ctx = undefined, }; } pub fn getFp(self: *const UnwindContext) !usize { - return mem.readIntSliceNative(usize, try abi.regBytes(&self.ucontext, @enumToInt(abi.Register.fp))); + return mem.readIntSliceNative(usize, try abi.regBytes(&self.ucontext, abi.fpRegNum(self.reg_ctx), self.reg_ctx)); } }; diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 913743f0f877..35f805bbf4da 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -3,56 +3,87 @@ const std = @import("../std.zig"); const os = std.os; const mem = std.mem; -/// Maps register names to their DWARF register number. -/// `bp`, `ip`, and `sp` are provided as aliases. -pub const Register = switch (builtin.cpu.arch) { - .x86 => { - - //pub const ip = Register.eip; - //pub const sp = Register. - }, - .x86_64 => enum(u8) { - rax, - rdx, - rcx, - rbx, - rsi, - rdi, - rbp, - rsp, - r8, - r9, - r10, - r11, - r12, - r13, - r14, - r15, - rip, - xmm0, - xmm1, - xmm2, - xmm3, - xmm4, - xmm5, - xmm6, - xmm7, - xmm8, - xmm9, - xmm10, - xmm11, - xmm12, - xmm13, - xmm14, - xmm15, - - pub const fp = Register.rbp; - pub const ip = Register.rip; - pub const sp = Register.rsp; - }, - else => enum {}, +pub const RegisterContext = struct { + eh_frame: bool, + is_macho: bool, }; +pub fn ipRegNum() u8 { + return switch (builtin.cpu.arch) { + .x86 => 8, + .x86_64 => 16, + .arm => error.InvalidRegister, // TODO + .aarch64 => error.InvalidRegister, // TODO + + // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + // const ip = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.pc), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), + // else => @intCast(usize, ctx.mcontext.pc), + // }; + // // x29 is the ABI-designated frame pointer + // const bp = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.fp), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), + // else => @intCast(usize, ctx.mcontext.regs[29]), + // }; + else => unreachable, + }; +} + +pub fn fpRegNum(reg_ctx: RegisterContext) u8 { + return switch (builtin.cpu.arch) { + // GCC on OS X did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO + .x86 => if (reg_ctx.eh_frame and reg_ctx.is_macho) 4 else 5, + .x86_64 => 6, + .arm => error.InvalidRegister, // TODO + .aarch64 => error.InvalidRegister, // TODO + + // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + // const ip = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.pc), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), + // else => @intCast(usize, ctx.mcontext.pc), + // }; + // // x29 is the ABI-designated frame pointer + // const bp = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.fp), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), + // else => @intCast(usize, ctx.mcontext.regs[29]), + // }; + else => unreachable, + }; +} + +pub fn spRegNum(reg_ctx: RegisterContext) u8 { + return switch (builtin.cpu.arch) { + .x86 => if (reg_ctx.eh_frame and reg_ctx.is_macho) 5 else 4, + .x86_64 => 7, + .arm => error.InvalidRegister, // TODO + .aarch64 => error.InvalidRegister, // TODO + + // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + // const ip = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.pc), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), + // else => @intCast(usize, ctx.mcontext.pc), + // }; + // // x29 is the ABI-designated frame pointer + // const bp = switch (native_os) { + // .macos => @intCast(usize, ctx.mcontext.ss.fp), + // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), + // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), + // else => @intCast(usize, ctx.mcontext.regs[29]), + // }; + else => unreachable, + }; +} + fn RegBytesReturnType(comptime ContextPtrType: type) type { const info = @typeInfo(ContextPtrType); if (info != .Pointer or info.Pointer.child != os.ucontext_t) { @@ -62,36 +93,132 @@ fn RegBytesReturnType(comptime ContextPtrType: type) type { return if (info.Pointer.is_const) return []const u8 else []u8; } -/// Returns a slice containing the backing storage for `reg_number` -pub fn regBytes(ucontext_ptr: anytype, reg_number: u8) !RegBytesReturnType(@TypeOf(ucontext_ptr)) { +/// Returns a slice containing the backing storage for `reg_number`. +/// +/// `reg_ctx` describes in what context the register number is used, as it can have different +/// meanings depending on the DWARF container. It is only required when getting the stack or +/// frame pointer register on some architectures. +pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext) !RegBytesReturnType(@TypeOf(ucontext_ptr)) { var m = &ucontext_ptr.mcontext; return switch (builtin.cpu.arch) { + .x86 => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EAX]), + 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ECX]), + 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EDX]), + 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBX]), + 4...5 => if (reg_ctx) |r| bytes: { + if (reg_number == 4) { + break :bytes if (r.eh_frame and r.is_macho) + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBP]) + else + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESP]); + } else { + break :bytes if (r.eh_frame and r.is_macho) + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESP]) + else + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBP]); + } + } else error.RegisterContextRequired, + 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESI]), + 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EDI]), + 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EIP]), + 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EFL]), + 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.CS]), + 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.SS]), + 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.DS]), + 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ES]), + 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.FS]), + 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.GS]), + 16...23 => error.InvalidRegister, // TODO: Support loading ST0-ST7 from mcontext.fpregs + // TODO: Map TRAPNO, ERR, UESP + 32...39 => error.InvalidRegister, // TODO: Support loading XMM0-XMM7 from mcontext.fpregs + else => error.InvalidRegister, + }, .x86_64 => switch (builtin.os.tag) { .linux, .netbsd, .solaris => switch (reg_number) { - 0 => mem.asBytes(&m.gregs[os.REG.RAX]), - 1 => mem.asBytes(&m.gregs[os.REG.RDX]), - 2 => mem.asBytes(&m.gregs[os.REG.RCX]), - 3 => mem.asBytes(&m.gregs[os.REG.RBX]), - 4 => mem.asBytes(&m.gregs[os.REG.RSI]), - 5 => mem.asBytes(&m.gregs[os.REG.RDI]), - 6 => mem.asBytes(&m.gregs[os.REG.RBP]), - 7 => mem.asBytes(&m.gregs[os.REG.RSP]), - 8 => mem.asBytes(&m.gregs[os.REG.R8]), - 9 => mem.asBytes(&m.gregs[os.REG.R9]), - 10 => mem.asBytes(&m.gregs[os.REG.R10]), - 11 => mem.asBytes(&m.gregs[os.REG.R11]), - 12 => mem.asBytes(&m.gregs[os.REG.R12]), - 13 => mem.asBytes(&m.gregs[os.REG.R13]), - 14 => mem.asBytes(&m.gregs[os.REG.R14]), - 15 => mem.asBytes(&m.gregs[os.REG.R15]), - 16 => mem.asBytes(&m.gregs[os.REG.RIP]), + 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RAX]), + 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RDX]), + 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RCX]), + 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RBX]), + 4 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RSI]), + 5 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RDI]), + 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RBP]), + 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RSP]), + 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R8]), + 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R9]), + 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R10]), + 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R11]), + 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R12]), + 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R13]), + 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R14]), + 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R15]), + 16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RIP]), 17...32 => |i| mem.asBytes(&m.fpregs.xmm[i - 17]), else => error.InvalidRegister, }, - //.freebsd => @intCast(usize, ctx.mcontext.rip), - //.openbsd => @intCast(usize, ctx.sc_rip), - //.macos => @intCast(usize, ctx.mcontext.ss.rip), + .freebsd => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.rax), + 1 => mem.asBytes(&ucontext_ptr.mcontext.rdx), + 2 => mem.asBytes(&ucontext_ptr.mcontext.rcx), + 3 => mem.asBytes(&ucontext_ptr.mcontext.rbx), + 4 => mem.asBytes(&ucontext_ptr.mcontext.rsi), + 5 => mem.asBytes(&ucontext_ptr.mcontext.rdi), + 6 => mem.asBytes(&ucontext_ptr.mcontext.rbp), + 7 => mem.asBytes(&ucontext_ptr.mcontext.rsp), + 8 => mem.asBytes(&ucontext_ptr.mcontext.r8), + 9 => mem.asBytes(&ucontext_ptr.mcontext.r9), + 10 => mem.asBytes(&ucontext_ptr.mcontext.r10), + 11 => mem.asBytes(&ucontext_ptr.mcontext.r11), + 12 => mem.asBytes(&ucontext_ptr.mcontext.r12), + 13 => mem.asBytes(&ucontext_ptr.mcontext.r13), + 14 => mem.asBytes(&ucontext_ptr.mcontext.r14), + 15 => mem.asBytes(&ucontext_ptr.mcontext.r15), + 16 => mem.asBytes(&ucontext_ptr.mcontext.rip), + // TODO: Extract xmm state from mcontext.fpstate? + else => error.InvalidRegister, + }, + .openbsd => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.sc_rax), + 1 => mem.asBytes(&ucontext_ptr.sc_rdx), + 2 => mem.asBytes(&ucontext_ptr.sc_rcx), + 3 => mem.asBytes(&ucontext_ptr.sc_rbx), + 4 => mem.asBytes(&ucontext_ptr.sc_rsi), + 5 => mem.asBytes(&ucontext_ptr.sc_rdi), + 6 => mem.asBytes(&ucontext_ptr.sc_rbp), + 7 => mem.asBytes(&ucontext_ptr.sc_rsp), + 8 => mem.asBytes(&ucontext_ptr.sc_r8), + 9 => mem.asBytes(&ucontext_ptr.sc_r9), + 10 => mem.asBytes(&ucontext_ptr.sc_r10), + 11 => mem.asBytes(&ucontext_ptr.sc_r11), + 12 => mem.asBytes(&ucontext_ptr.sc_r12), + 13 => mem.asBytes(&ucontext_ptr.sc_r13), + 14 => mem.asBytes(&ucontext_ptr.sc_r14), + 15 => mem.asBytes(&ucontext_ptr.sc_r15), + 16 => mem.asBytes(&ucontext_ptr.sc_rip), + // TODO: Extract xmm state from sc_fpstate? + else => error.InvalidRegister, + }, + .macos => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.ss.rax), + 1 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdx), + 2 => mem.asBytes(&ucontext_ptr.mcontext.ss.rcx), + 3 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbx), + 4 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsi), + 5 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdi), + 6 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbp), + 7 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsp), + 8 => mem.asBytes(&ucontext_ptr.mcontext.ss.r8), + 9 => mem.asBytes(&ucontext_ptr.mcontext.ss.r9), + 10 => mem.asBytes(&ucontext_ptr.mcontext.ss.r10), + 11 => mem.asBytes(&ucontext_ptr.mcontext.ss.r11), + 12 => mem.asBytes(&ucontext_ptr.mcontext.ss.r12), + 13 => mem.asBytes(&ucontext_ptr.mcontext.ss.r13), + 14 => mem.asBytes(&ucontext_ptr.mcontext.ss.r14), + 15 => mem.asBytes(&ucontext_ptr.mcontext.ss.r15), + 16 => mem.asBytes(&ucontext_ptr.mcontext.ss.rip), + else => error.InvalidRegister, + }, else => error.UnimplementedOs, }, else => error.UnimplementedArch, diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index e9761206ed1d..8a8d0830313c 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -304,9 +304,9 @@ pub const VirtualMachine = struct { } else return error.InvalidCFA; }, .register => |register| { - const src = try abi.regBytes(&context.ucontext, register); + const src = try abi.regBytes(&context.ucontext, register, context.reg_ctx); if (src.len != out.len) return error.RegisterTypeMismatch; - @memcpy(out, try abi.regBytes(&context.ucontext, register)); + @memcpy(out, try abi.regBytes(&context.ucontext, register, context.reg_ctx)); }, .expression => |expression| { // TODO From e72e762d1e81b06c38902348a3f6625a85e1dce0 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 22 May 2023 15:56:04 -0400 Subject: [PATCH 13/81] dwarf: implement more register mappings, fix up macos compile --- lib/std/debug.zig | 114 +++++++++--------------------------------- lib/std/dwarf.zig | 2 +- lib/std/dwarf/abi.zig | 92 +++++++++++++++++++++------------- 3 files changed, 80 insertions(+), 128 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 66dfdc183814..0bbc2b511dce 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -532,6 +532,8 @@ pub const StackIterator = struct { if (self.next_dwarf()) |_| { return self.dwarf_context.pc; } else |err| { + if (err != error.MissingFDE) print("DWARF unwind error: {}\n", .{err}); + // Fall back to fp unwinding on the first failure, // as the register context won't be updated @@ -540,9 +542,6 @@ pub const StackIterator = struct { self.fp = self.dwarf_context.getFp() catch 0; self.debug_info = null; - - // TODO: Remove - print("\ndwarf unwind error {}, placing fp at 0x{x}\n\n", .{err, self.fp}); } } @@ -1570,7 +1569,6 @@ pub const ModuleDebugInfo = switch (native_os) { .macos, .ios, .watchos, .tvos => struct { base_address: usize, mapped_memory: []align(mem.page_size) const u8, - external_mapped_memory: ?[]align(mem.page_size) const u8, symbols: []const MachoSymbol, strings: [:0]const u8, ofiles: OFileTable, @@ -1591,7 +1589,6 @@ pub const ModuleDebugInfo = switch (native_os) { self.ofiles.deinit(); allocator.free(self.symbols); os.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| os.munmap(m); } fn loadOFile(self: *@This(), allocator: mem.Allocator, o_file_path: []const u8) !OFileInfo { @@ -1637,102 +1634,37 @@ pub const ModuleDebugInfo = switch (native_os) { addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); } - var opt_debug_line: ?macho.section_64 = null; - var opt_debug_info: ?macho.section_64 = null; - var opt_debug_abbrev: ?macho.section_64 = null; - var opt_debug_str: ?macho.section_64 = null; - var opt_debug_str_offsets: ?macho.section_64 = null; - var opt_debug_line_str: ?macho.section_64 = null; - var opt_debug_ranges: ?macho.section_64 = null; - var opt_debug_loclists: ?macho.section_64 = null; - var opt_debug_rnglists: ?macho.section_64 = null; - var opt_debug_addr: ?macho.section_64 = null; - var opt_debug_names: ?macho.section_64 = null; - var opt_debug_frame: ?macho.section_64 = null; - + var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; for (segcmd.?.getSections()) |sect| { const name = sect.sectName(); - if (mem.eql(u8, name, "__debug_line")) { - opt_debug_line = sect; - } else if (mem.eql(u8, name, "__debug_info")) { - opt_debug_info = sect; - } else if (mem.eql(u8, name, "__debug_abbrev")) { - opt_debug_abbrev = sect; - } else if (mem.eql(u8, name, "__debug_str")) { - opt_debug_str = sect; - } else if (mem.eql(u8, name, "__debug_str_offsets")) { - opt_debug_str_offsets = sect; - } else if (mem.eql(u8, name, "__debug_line_str")) { - opt_debug_line_str = sect; - } else if (mem.eql(u8, name, "__debug_ranges")) { - opt_debug_ranges = sect; - } else if (mem.eql(u8, name, "__debug_loclists")) { - opt_debug_loclists = sect; - } else if (mem.eql(u8, name, "__debug_rnglists")) { - opt_debug_rnglists = sect; - } else if (mem.eql(u8, name, "__debug_addr")) { - opt_debug_addr = sect; - } else if (mem.eql(u8, name, "__debug_names")) { - opt_debug_names = sect; - } else if (mem.eql(u8, name, "__debug_frame")) { - opt_debug_frame = sect; + + var section_index: ?usize = null; + inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, name)) section_index = i; } + if (section_index == null) continue; + + const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index.?] = .{ + .data = section_bytes, + .owned = false, + }; } - const debug_line = opt_debug_line orelse - return error.MissingDebugInfo; - const debug_info = opt_debug_info orelse - return error.MissingDebugInfo; - const debug_str = opt_debug_str orelse - return error.MissingDebugInfo; - const debug_abbrev = opt_debug_abbrev orelse - return error.MissingDebugInfo; + const missing_debug_info = + sections[@enumToInt(DW.DwarfSection.debug_info)] == null or + sections[@enumToInt(DW.DwarfSection.debug_abbrev)] == null or + sections[@enumToInt(DW.DwarfSection.debug_str)] == null or + sections[@enumToInt(DW.DwarfSection.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; var di = DW.DwarfInfo{ .endian = .Little, + .sections = sections, .is_macho = true, - - // TODO: Get this compiling - - .debug_info = try chopSlice(mapped_mem, debug_info.offset, debug_info.size), - .debug_abbrev = try chopSlice(mapped_mem, debug_abbrev.offset, debug_abbrev.size), - .debug_str = try chopSlice(mapped_mem, debug_str.offset, debug_str.size), - .debug_str_offsets = if (opt_debug_str_offsets) |debug_str_offsets| - try chopSlice(mapped_mem, debug_str_offsets.offset, debug_str_offsets.size) - else - null, - .debug_line = try chopSlice(mapped_mem, debug_line.offset, debug_line.size), - .debug_line_str = if (opt_debug_line_str) |debug_line_str| - try chopSlice(mapped_mem, debug_line_str.offset, debug_line_str.size) - else - null, - .debug_ranges = if (opt_debug_ranges) |debug_ranges| - try chopSlice(mapped_mem, debug_ranges.offset, debug_ranges.size) - else - null, - .debug_loclists = if (opt_debug_loclists) |debug_loclists| - try chopSlice(mapped_mem, debug_loclists.offset, debug_loclists.size) - else - null, - .debug_rnglists = if (opt_debug_rnglists) |debug_rnglists| - try chopSlice(mapped_mem, debug_rnglists.offset, debug_rnglists.size) - else - null, - .debug_addr = if (opt_debug_addr) |debug_addr| - try chopSlice(mapped_mem, debug_addr.offset, debug_addr.size) - else - null, - .debug_names = if (opt_debug_names) |debug_names| - try chopSlice(mapped_mem, debug_names.offset, debug_names.size) - else - null, - .debug_frame = if (opt_debug_frame) |debug_frame| - try chopSlice(mapped_mem, debug_frame.offset, debug_frame.size) - else - null, }; - try DW.openDwarfDebugInfo(&di, allocator); + try DW.openDwarfDebugInfo(&di, allocator, mapped_mem); var info = OFileInfo{ .di = di, .addr_table = addr_table, @@ -1784,7 +1716,7 @@ pub const ModuleDebugInfo = switch (native_os) { .compile_unit_name = compile_unit.die.getAttrString( o_file_di, DW.AT.name, - o_file_di.debug_str, + o_file_di.section(.debug_str), compile_unit.*, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 99c26051c888..b5294b59876e 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1626,7 +1626,7 @@ pub const DwarfInfo = struct { context.ucontext = next_ucontext; if (has_next_ip) { - context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, abi.ipRegNum(), context.reg_ctx)); + context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, comptime abi.ipRegNum(), context.reg_ctx)); } else { context.pc = 0; } diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 35f805bbf4da..869993c8e887 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -12,23 +12,8 @@ pub fn ipRegNum() u8 { return switch (builtin.cpu.arch) { .x86 => 8, .x86_64 => 16, - .arm => error.InvalidRegister, // TODO - .aarch64 => error.InvalidRegister, // TODO - - // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); - // const ip = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.pc), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), - // else => @intCast(usize, ctx.mcontext.pc), - // }; - // // x29 is the ABI-designated frame pointer - // const bp = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.fp), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), - // else => @intCast(usize, ctx.mcontext.regs[29]), - // }; + .arm => 15, + .aarch64 => 32, else => unreachable, }; } @@ -38,8 +23,8 @@ pub fn fpRegNum(reg_ctx: RegisterContext) u8 { // GCC on OS X did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO .x86 => if (reg_ctx.eh_frame and reg_ctx.is_macho) 4 else 5, .x86_64 => 6, - .arm => error.InvalidRegister, // TODO - .aarch64 => error.InvalidRegister, // TODO + .arm => 11, + .aarch64 => 29, // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); // const ip = switch (native_os) { @@ -63,23 +48,8 @@ pub fn spRegNum(reg_ctx: RegisterContext) u8 { return switch (builtin.cpu.arch) { .x86 => if (reg_ctx.eh_frame and reg_ctx.is_macho) 5 else 4, .x86_64 => 7, - .arm => error.InvalidRegister, // TODO - .aarch64 => error.InvalidRegister, // TODO - - // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); - // const ip = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.pc), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), - // else => @intCast(usize, ctx.mcontext.pc), - // }; - // // x29 is the ABI-designated frame pointer - // const bp = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.fp), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), - // else => @intCast(usize, ctx.mcontext.regs[29]), - // }; + .arm => 13, + .aarch64 => 31, else => unreachable, }; } @@ -221,6 +191,56 @@ pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext }, else => error.UnimplementedOs, }, + .arm => switch (builtin.os.tag) { + .linux => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.arm_r0), + 1 => mem.asBytes(&ucontext_ptr.mcontext.arm_r1), + 2 => mem.asBytes(&ucontext_ptr.mcontext.arm_r2), + 3 => mem.asBytes(&ucontext_ptr.mcontext.arm_r3), + 4 => mem.asBytes(&ucontext_ptr.mcontext.arm_r4), + 5 => mem.asBytes(&ucontext_ptr.mcontext.arm_r5), + 6 => mem.asBytes(&ucontext_ptr.mcontext.arm_r6), + 7 => mem.asBytes(&ucontext_ptr.mcontext.arm_r7), + 8 => mem.asBytes(&ucontext_ptr.mcontext.arm_r8), + 9 => mem.asBytes(&ucontext_ptr.mcontext.arm_r9), + 10 => mem.asBytes(&ucontext_ptr.mcontext.arm_r10), + 11 => mem.asBytes(&ucontext_ptr.mcontext.arm_fp), + 12 => mem.asBytes(&ucontext_ptr.mcontext.arm_ip), + 13 => mem.asBytes(&ucontext_ptr.mcontext.arm_sp), + 14 => mem.asBytes(&ucontext_ptr.mcontext.arm_lr), + 15 => mem.asBytes(&ucontext_ptr.mcontext.arm_pc), + // CPSR is not allocated a register number (See: https://github.com/ARM-software/abi-aa/blob/main/aadwarf32/aadwarf32.rst, Section 4.1) + else => error.InvalidRegister, + }, + else => error.UnimplementedOs, + }, + .aarch64 => switch (builtin.os.tag) { + .macos => switch (reg_number) { + 0...28 => mem.asBytes(&ucontext_ptr.mcontext.ss.regs[reg_number]), + 29 => mem.asBytes(&ucontext_ptr.mcontext.ss.fp), + 30 => mem.asBytes(&ucontext_ptr.mcontext.ss.lr), + 31 => mem.asBytes(&ucontext_ptr.mcontext.ss.sp), + 32 => mem.asBytes(&ucontext_ptr.mcontext.ss.pc), + else => error.InvalidRegister, + }, + .netbsd => switch (reg_number) { + 0...34 => mem.asBytes(&ucontext_ptr.mcontext.gregs[reg_number]), + else => error.InvalidRegister, + }, + .freebsd => switch (reg_number) { + 0...29 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.x[reg_number]), + 30 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.lr), + 31 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.sp), + 32 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.elr), // TODO: This seems wrong, but it was in the old debug.zig code for PC, check this + else => error.InvalidRegister, + }, + else => switch (reg_number) { + 0...30 => mem.asBytes(&ucontext_ptr.mcontext.regs[reg_number]), + 31 => mem.asBytes(&ucontext_ptr.mcontext.sp), + 32 => mem.asBytes(&ucontext_ptr.mcontext.pc), + else => error.InvalidRegister, + }, + }, else => error.UnimplementedArch, }; } From c98e03fc7ed509536076132fe9ea5d9d00038c23 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 22 May 2023 17:59:20 -0400 Subject: [PATCH 14/81] - rework CFI instruction parsing to not use std.meta - move register formatting code to zig-dwardump --- lib/std/dwarf/abi.zig | 93 ++---------------------------------- lib/std/dwarf/call_frame.zig | 38 ++++++++++----- 2 files changed, 29 insertions(+), 102 deletions(-) diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 869993c8e887..fdaac05b8778 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -25,21 +25,6 @@ pub fn fpRegNum(reg_ctx: RegisterContext) u8 { .x86_64 => 6, .arm => 11, .aarch64 => 29, - - // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); - // const ip = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.pc), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), - // else => @intCast(usize, ctx.mcontext.pc), - // }; - // // x29 is the ABI-designated frame pointer - // const bp = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.fp), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), - // else => @intCast(usize, ctx.mcontext.regs[29]), - // }; else => unreachable, }; } @@ -231,7 +216,10 @@ pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext 0...29 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.x[reg_number]), 30 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.lr), 31 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.sp), - 32 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.elr), // TODO: This seems wrong, but it was in the old debug.zig code for PC, check this + + // TODO: This seems wrong, but it was in the previous debug.zig code for mapping PC, check this + 32 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.elr), + else => error.InvalidRegister, }, else => switch (reg_number) { @@ -252,76 +240,3 @@ pub fn getRegDefaultValue(reg_number: u8, out: []u8) void { _ = reg_number; @memset(out, undefined); } - -fn writeUnknownReg(writer: anytype, reg_number: u8) !void { - try writer.print("reg{}", .{reg_number}); -} - -pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number: u8) !void { - if (arch) |a| { - switch (a) { - .x86_64 => { - switch (reg_number) { - 0 => try writer.writeAll("RAX"), - 1 => try writer.writeAll("RDX"), - 2 => try writer.writeAll("RCX"), - 3 => try writer.writeAll("RBX"), - 4 => try writer.writeAll("RSI"), - 5 => try writer.writeAll("RDI"), - 6 => try writer.writeAll("RBP"), - 7 => try writer.writeAll("RSP"), - 8...15 => try writer.print("R{}", .{reg_number}), - 16 => try writer.writeAll("RIP"), - 17...32 => try writer.print("XMM{}", .{reg_number - 17}), - 33...40 => try writer.print("ST{}", .{reg_number - 33}), - 41...48 => try writer.print("MM{}", .{reg_number - 41}), - 49 => try writer.writeAll("RFLAGS"), - 50 => try writer.writeAll("ES"), - 51 => try writer.writeAll("CS"), - 52 => try writer.writeAll("SS"), - 53 => try writer.writeAll("DS"), - 54 => try writer.writeAll("FS"), - 55 => try writer.writeAll("GS"), - // 56-57 Reserved - 58 => try writer.writeAll("FS.BASE"), - 59 => try writer.writeAll("GS.BASE"), - // 60-61 Reserved - 62 => try writer.writeAll("TR"), - 63 => try writer.writeAll("LDTR"), - 64 => try writer.writeAll("MXCSR"), - 65 => try writer.writeAll("FCW"), - 66 => try writer.writeAll("FSW"), - 67...82 => try writer.print("XMM{}", .{reg_number - 51}), - // 83-117 Reserved - 118...125 => try writer.print("K{}", .{reg_number - 118}), - // 126-129 Reserved - else => try writeUnknownReg(writer, reg_number), - } - }, - - // TODO: Add x86, aarch64 - - else => try writeUnknownReg(writer, reg_number), - } - } else try writeUnknownReg(writer, reg_number); -} - -const FormatRegisterData = struct { - reg_number: u8, - arch: ?std.Target.Cpu.Arch, -}; - -pub fn formatRegister( - data: FormatRegisterData, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, -) !void { - _ = fmt; - _ = options; - try writeRegisterName(writer, data.arch, data.reg_number); -} - -pub fn fmtRegister(reg_number: u8, arch: ?std.Target.Cpu.Arch) std.fmt.Formatter(formatRegister) { - return .{ .data = .{ .reg_number = reg_number, .arch = arch } }; -} diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 8a8d0830313c..853297a2f290 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -38,12 +38,12 @@ const Opcode = enum(u8) { val_expression = 0x16, // These opcodes encode an operand in the lower 6 bits of the opcode itself - pub const lo_inline = Opcode.advance_loc; + pub const lo_inline = @enumToInt(Opcode.advance_loc); pub const hi_inline = @enumToInt(Opcode.restore) | 0b111111; // These opcodes are trailed by zero or more operands - pub const lo_reserved = Opcode.nop; - pub const hi_reserved = Opcode.val_expression; + pub const lo_reserved = @enumToInt(Opcode.nop); + pub const hi_reserved = @enumToInt(Opcode.val_expression); // Vendor-specific opcodes pub const lo_user = 0x1c; @@ -187,28 +187,40 @@ pub const Instruction = union(Opcode) { val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }), val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }), + fn readOperands( + self: *Instruction, + stream: *std.io.FixedBufferStream([]const u8), + opcode_value: ?u6, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !void { + switch (self.*) { + inline else => |*inst| inst.* = try @TypeOf(inst.*).read(stream, opcode_value, addr_size_bytes, endian), + } + } + pub fn read( stream: *std.io.FixedBufferStream([]const u8), addr_size_bytes: u8, endian: std.builtin.Endian, ) !Instruction { - @setEvalBranchQuota(1800); - return switch (try stream.reader().readByte()) { - inline @enumToInt(Opcode.lo_inline)...Opcode.hi_inline => |opcode| blk: { + inline Opcode.lo_inline...Opcode.hi_inline => |opcode| blk: { const e = @intToEnum(Opcode, opcode & 0b11000000); - const payload_type = std.meta.TagPayload(Instruction, e); - const value = try payload_type.read(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian); - break :blk @unionInit(Instruction, @tagName(e), value); + var result = @unionInit(Instruction, @tagName(e), undefined); + try result.readOperands(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian); + break :blk result; }, - inline @enumToInt(Opcode.lo_reserved)...@enumToInt(Opcode.hi_reserved) => |opcode| blk: { + inline Opcode.lo_reserved...Opcode.hi_reserved => |opcode| blk: { const e = @intToEnum(Opcode, opcode); - const payload_type = std.meta.TagPayload(Instruction, e); - const value = try payload_type.read(stream, null, addr_size_bytes, endian); - break :blk @unionInit(Instruction, @tagName(e), value); + var result = @unionInit(Instruction, @tagName(e), undefined); + try result.readOperands(stream, null, addr_size_bytes, endian); + break :blk result; }, Opcode.lo_user...Opcode.hi_user => error.UnimplementedUserOpcode, else => |opcode| blk: { + + // TODO: Remove this std.debug.print("Opcode {x}\n", .{opcode}); break :blk error.InvalidOpcode; From a325d7f6d197f7ec1bb6b658075feb8b9e30a264 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 22 May 2023 19:24:43 -0400 Subject: [PATCH 15/81] fmt and cleanup --- lib/std/debug.zig | 57 +-------------------------------- lib/std/dwarf.zig | 1 - lib/std/dwarf/expressions.zig | 59 ++++++++++++++--------------------- 3 files changed, 24 insertions(+), 93 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 0bbc2b511dce..af2fe20b7667 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -162,8 +162,6 @@ pub fn dumpStackTraceFromBase(context: anytype) void { } var it = StackIterator.initWithContext(null, debug_info, context) catch return; - - // TODO: Should `it.dwarf_context.pc` be `it.getIp()`? (but then the non-dwarf case has to store ip) printSourceAtAddress(debug_info, stderr, it.dwarf_context.pc, tty_config) catch return; while (it.next()) |return_address| { @@ -528,7 +526,7 @@ pub const StackIterator = struct { } fn next_internal(self: *StackIterator) ?usize { - if (self.debug_info != null) { + if (self.debug_info != null) { if (self.next_dwarf()) |_| { return self.dwarf_context.pc; } else |err| { @@ -2039,59 +2037,6 @@ fn dumpSegfaultInfoPosix(sig: i32, addr: usize, ctx_ptr: ?*const anyopaque) void }, else => {}, } - - // TODO: Move this logic to dwarf.abi.regBytes - - // switch (native_arch) { - // .x86 => { - // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); - // const ip = @intCast(usize, ctx.mcontext.gregs[os.REG.EIP]) ; - // const bp = @intCast(usize, ctx.mcontext.gregs[os.REG.EBP]); - // dumpStackTraceFromBase(bp, ip); - // }, - // .x86_64 => { - // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); - // const ip = switch (native_os) { - // .linux, .netbsd, .solaris => @intCast(usize, ctx.mcontext.gregs[os.REG.RIP]), - // .freebsd => @intCast(usize, ctx.mcontext.rip), - // .openbsd => @intCast(usize, ctx.sc_rip), - // .macos => @intCast(usize, ctx.mcontext.ss.rip), - // else => unreachable, - // }; - // const bp = switch (native_os) { - // .linux, .netbsd, .solaris => @intCast(usize, ctx.mcontext.gregs[os.REG.RBP]), - // .openbsd => @intCast(usize, ctx.sc_rbp), - // .freebsd => @intCast(usize, ctx.mcontext.rbp), - // .macos => @intCast(usize, ctx.mcontext.ss.rbp), - // else => unreachable, - // }; - // dumpStackTraceFromBase(bp, ip); - // }, - // .arm => { - // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); - // const ip = @intCast(usize, ctx.mcontext.arm_pc); - // const bp = @intCast(usize, ctx.mcontext.arm_fp); - // dumpStackTraceFromBase(bp, ip); - // }, - // .aarch64 => { - // const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); - // const ip = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.pc), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.PC]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.elr), - // else => @intCast(usize, ctx.mcontext.pc), - // }; - // // x29 is the ABI-designated frame pointer - // const bp = switch (native_os) { - // .macos => @intCast(usize, ctx.mcontext.ss.fp), - // .netbsd => @intCast(usize, ctx.mcontext.gregs[os.REG.FP]), - // .freebsd => @intCast(usize, ctx.mcontext.gpregs.x[os.REG.FP]), - // else => @intCast(usize, ctx.mcontext.regs[29]), - // }; - // dumpStackTraceFromBase(bp, ip); - // }, - // else => {}, - // } } fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(windows.WINAPI) c_long { diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index b5294b59876e..0ad342cae86f 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1603,7 +1603,6 @@ pub const DwarfInfo = struct { // TODO: Evaluate expression _ = expression; return error.UnimplementedTODO; - }, else => return error.InvalidCFARule, }; diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index dd838ecf963f..6d94138d68fe 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -18,14 +18,14 @@ pub const StackMachineOptions = struct { /// Expressions can be decoded for non-native address size and endianness, /// but can only be executed if the current target matches the configuration. pub fn StackMachine(comptime options: StackMachineOptions) type { - const addr_type = switch(options.addr_size) { + const addr_type = switch (options.addr_size) { 2 => u16, 4 => u32, 8 => u64, else => @compileError("Unsupported address size of " ++ options.addr_size), }; - const addr_type_signed = switch(options.addr_size) { + const addr_type_signed = switch (options.addr_size) { 2 => i16, 4 => i32, 8 => i64, @@ -61,19 +61,15 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { fn generic(value: anytype) Value { const int_info = @typeInfo(@TypeOf(value)).Int; if (@sizeOf(@TypeOf(value)) > options.addr_size) { - return .{ - .generic = switch (int_info.signedness) { - .signed => @bitCast(addr_type, @truncate(addr_type_signed, value)), - .unsigned => @truncate(addr_type, value), - } - }; + return .{ .generic = switch (int_info.signedness) { + .signed => @bitCast(addr_type, @truncate(addr_type_signed, value)), + .unsigned => @truncate(addr_type, value), + } }; } else { - return .{ - .generic = switch (int_info.signedness) { - .signed => @bitCast(addr_type, @intCast(addr_type_signed, value)), - .unsigned => @intCast(addr_type, value), - } - }; + return .{ .generic = switch (int_info.signedness) { + .signed => @bitCast(addr_type, @intCast(addr_type_signed, value)), + .unsigned => @intCast(addr_type, value), + } }; } } @@ -113,20 +109,15 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { => generic(try leb.readILEB128(i64, reader)), OP.lit0...OP.lit31 => |n| generic(n - OP.lit0), OP.reg0...OP.reg31 => |n| .{ .register = n - OP.reg0 }, - OP.breg0...OP.breg31 => |n| .{ - .base_register = .{ - .base_register = n - OP.breg0, - .offset = try leb.readILEB128(i64, reader), - } - }, + OP.breg0...OP.breg31 => |n| .{ .base_register = .{ + .base_register = n - OP.breg0, + .offset = try leb.readILEB128(i64, reader), + } }, OP.regx => .{ .register = try leb.readULEB128(u8, reader) }, - OP.bregx, - OP.regval_type => .{ - .base_register = .{ - .base_register = try leb.readULEB128(u8, reader), - .offset = try leb.readILEB128(i64, reader), - } - }, + OP.bregx, OP.regval_type => .{ .base_register = .{ + .base_register = try leb.readULEB128(u8, reader), + .offset = try leb.readILEB128(i64, reader), + } }, OP.piece => .{ .composite_location = .{ .size = try leb.readULEB128(u8, reader), @@ -139,9 +130,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { .offset = try leb.readILEB128(i64, reader), }, }, - OP.implicit_value, - OP.entry_value - => blk: { + OP.implicit_value, OP.entry_value => blk: { const size = try leb.readULEB128(u8, reader); if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; const block = stream.buffer[stream.pos..][0..size]; @@ -156,12 +145,10 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; const value_bytes = stream.buffer[stream.pos..][0..size]; stream.pos += size; - break :blk .{ - .base_type = .{ - .type_offset = type_offset, - .value_bytes = value_bytes, - } - }; + break :blk .{ .base_type = .{ + .type_offset = type_offset, + .value_bytes = value_bytes, + } }; }, OP.deref_type, OP.xderef_type, From d1a9bb1fea6491fb9ad4ad7552d8edf816c59aef Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 25 May 2023 09:52:58 -0400 Subject: [PATCH 16/81] debug: fixup context detection for wasi --- lib/std/debug.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index af2fe20b7667..2652ad57a84c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -411,7 +411,8 @@ pub const StackIterator = struct { // When DebugInfo and a register context is available, this iterator can unwind // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). debug_info: ?*DebugInfo, - dwarf_context: if (@hasDecl(os, "ucontext_t")) DW.UnwindContext else void = undefined, + dwarf_context: if (supports_context) DW.UnwindContext else void = undefined, + const supports_context = @hasDecl(os.system, "ucontext_t"); pub fn init(first_address: ?usize, fp: ?usize) StackIterator { if (native_arch == .sparc64) { @@ -526,7 +527,7 @@ pub const StackIterator = struct { } fn next_internal(self: *StackIterator) ?usize { - if (self.debug_info != null) { + if (supports_context and self.debug_info != null) { if (self.next_dwarf()) |_| { return self.dwarf_context.pc; } else |err| { From d74c8acdfbf297587db2c85a85808bcedbb9e219 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 25 May 2023 10:15:02 -0400 Subject: [PATCH 17/81] dwarf: fixup for sort changes --- lib/std/dwarf.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 0ad342cae86f..bafef1c6729d 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1546,7 +1546,7 @@ pub const DwarfInfo = struct { } // TODO: Avoiding sorting if has_eh_frame_hdr exists - std.sort.sort(FrameDescriptionEntry, di.fde_list.items, {}, struct { + std.mem.sort(FrameDescriptionEntry, di.fde_list.items, {}, struct { fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { _ = ctx; return a.pc_begin < b.pc_begin; From 2f75d20d87fe68eb2695acd37fc2364c06c4c582 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 25 May 2023 13:11:21 -0400 Subject: [PATCH 18/81] debug: use an explicit context type instead of anytype for dumpStackTraceFromBase, update crash_report to use this for exceptions --- lib/std/debug.zig | 13 +++++++-- src/crash_report.zig | 63 +++++++------------------------------------- 2 files changed, 20 insertions(+), 56 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 2652ad57a84c..a8e8b7bce867 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -133,11 +133,20 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { } } +pub const StackTraceContext = blk: { + if (native_os == .windows) { + break :blk @typeInfo(@TypeOf(os.windows.CONTEXT.getRegs)).Fn.return_type.?; + } else if (@hasDecl(os.system, "ucontext_t")) { + break :blk *const os.ucontext_t; + } else { + break :blk void; + } +}; + /// Tries to print the stack trace starting from the supplied base pointer to stderr, /// unbuffered, and ignores any error returned. -/// `context` is either *const os.ucontext_t on posix, or the result of CONTEXT.getRegs() on Windows. /// TODO multithreaded awareness -pub fn dumpStackTraceFromBase(context: anytype) void { +pub fn dumpStackTraceFromBase(context: StackTraceContext) void { nosuspend { if (comptime builtin.target.isWasm()) { if (native_os == .wasi) { diff --git a/src/crash_report.zig b/src/crash_report.zig index fc41528321c7..5cd00c5b1312 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -203,53 +203,11 @@ fn handleSegfaultPosix(sig: i32, info: *const os.siginfo_t, ctx_ptr: ?*const any }; const stack_ctx: StackContext = switch (builtin.cpu.arch) { - .x86 => ctx: { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = @as(usize, @intCast(ctx.mcontext.gregs[os.REG.EIP])); - const bp = @as(usize, @intCast(ctx.mcontext.gregs[os.REG.EBP])); - break :ctx StackContext{ .exception = .{ .bp = bp, .ip = ip } }; - }, - .x86_64 => ctx: { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = switch (builtin.os.tag) { - .linux, .netbsd, .solaris => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.RIP])), - .freebsd => @as(usize, @intCast(ctx.mcontext.rip)), - .openbsd => @as(usize, @intCast(ctx.sc_rip)), - .macos => @as(usize, @intCast(ctx.mcontext.ss.rip)), - else => unreachable, - }; - const bp = switch (builtin.os.tag) { - .linux, .netbsd, .solaris => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.RBP])), - .openbsd => @as(usize, @intCast(ctx.sc_rbp)), - .freebsd => @as(usize, @intCast(ctx.mcontext.rbp)), - .macos => @as(usize, @intCast(ctx.mcontext.ss.rbp)), - else => unreachable, - }; - break :ctx StackContext{ .exception = .{ .bp = bp, .ip = ip } }; - }, - .arm => ctx: { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = @as(usize, @intCast(ctx.mcontext.arm_pc)); - const bp = @as(usize, @intCast(ctx.mcontext.arm_fp)); - break :ctx StackContext{ .exception = .{ .bp = bp, .ip = ip } }; - }, - .aarch64 => ctx: { - const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); - const ip = switch (native_os) { - .macos => @as(usize, @intCast(ctx.mcontext.ss.pc)), - .netbsd => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.PC])), - .freebsd => @as(usize, @intCast(ctx.mcontext.gpregs.elr)), - else => @as(usize, @intCast(ctx.mcontext.pc)), - }; - // x29 is the ABI-designated frame pointer - const bp = switch (native_os) { - .macos => @as(usize, @intCast(ctx.mcontext.ss.fp)), - .netbsd => @as(usize, @intCast(ctx.mcontext.gregs[os.REG.FP])), - .freebsd => @as(usize, @intCast(ctx.mcontext.gpregs.x[os.REG.FP])), - else => @as(usize, @intCast(ctx.mcontext.regs[29])), - }; - break :ctx StackContext{ .exception = .{ .bp = bp, .ip = ip } }; - }, + .x86, + .x86_64, + .arm, + .aarch64, + => StackContext{ .exception = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)) }, else => .not_supported, }; @@ -277,7 +235,7 @@ fn handleSegfaultWindowsExtra(info: *os.windows.EXCEPTION_POINTERS, comptime msg const stack_ctx = if (@hasDecl(os.windows, "CONTEXT")) ctx: { const regs = info.ContextRecord.getRegs(); - break :ctx StackContext{ .exception = .{ .bp = regs.bp, .ip = regs.ip } }; + break :ctx StackContext{ .exception = regs }; } else ctx: { const addr = @intFromPtr(info.ExceptionRecord.ExceptionAddress); break :ctx StackContext{ .current = .{ .ret_addr = addr } }; @@ -314,10 +272,7 @@ const StackContext = union(enum) { current: struct { ret_addr: ?usize, }, - exception: struct { - bp: usize, - ip: usize, - }, + exception: debug.StackTraceContext, not_supported: void, pub fn dumpStackTrace(ctx: @This()) void { @@ -325,8 +280,8 @@ const StackContext = union(enum) { .current => |ct| { debug.dumpCurrentStackTrace(ct.ret_addr); }, - .exception => |ex| { - debug.dumpStackTraceFromBase(ex.bp, ex.ip); + .exception => |context| { + debug.dumpStackTraceFromBase(context); }, .not_supported => { const stderr = io.getStdErr().writer(); From 551f153718ee1f670ce95c4e485bfb759b84632e Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 26 May 2023 18:47:21 -0400 Subject: [PATCH 19/81] dwarf: fixes for non-64 bit systems --- lib/std/dwarf.zig | 20 ++++++++++++-------- lib/std/dwarf/call_frame.zig | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index bafef1c6729d..549b004c4984 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -693,8 +693,8 @@ pub const DwarfInfo = struct { } pub fn deinit(di: *DwarfInfo, allocator: mem.Allocator) void { - for (di.sections) |s| { - if (s.owned) allocator.free(s.data); + for (di.sections) |opt_section| { + if (opt_section) |s| if (s.owned) allocator.free(s.data); } for (di.abbrev_table_list.items) |*abbrev| { abbrev.deinit(); @@ -1504,12 +1504,12 @@ pub const DwarfInfo = struct { while (stream.pos < stream.buffer.len) { const length_offset = stream.pos; - var length: u64 = try reader.readInt(u32, di.endian); + var length: usize = try reader.readInt(u32, di.endian); if (length == 0) break; var is_64 = length == math.maxInt(u32); if (is_64) { - length = try reader.readInt(u64, di.endian); + length = std.math.cast(usize, try reader.readInt(u64, di.endian)) orelse return error.LengthOverflow; } const id_len = @as(u8, if (is_64) 8 else 4); @@ -1746,10 +1746,14 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo }; if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { + if (@sizeOf(usize) != addr_size_bytes) { + // See the documentation for `follow_indirect` + return error.NonNativeIndirection; + } + + const native_ptr = math.cast(usize, ptr) orelse return error.PointerOverflow; return switch (addr_size_bytes) { - 2 => return @intToPtr(*const u16, ptr).*, - 4 => return @intToPtr(*const u32, ptr).*, - 8 => return @intToPtr(*const u64, ptr).*, + 2, 4, 8 => return @intToPtr(*const usize, native_ptr).*, else => return error.UnsupportedAddrSize, }; } else { @@ -1960,7 +1964,7 @@ pub const FrameDescriptionEntry = struct { var aug_data: []const u8 = &[_]u8{}; const lsda_pointer = if (cie.aug_str.len > 0) blk: { - const aug_data_len = try leb.readULEB128(u64, reader); + const aug_data_len = try leb.readULEB128(usize, reader); const aug_data_start = stream.pos; aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 853297a2f290..f473356716de 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -99,7 +99,7 @@ const Operand = enum { .u16_delta => try reader.readInt(u16, endian), .u32_delta => try reader.readInt(u32, endian), .block => { - const block_len = try leb.readULEB128(u64, reader); + const block_len = try leb.readULEB128(usize, reader); if (stream.pos + block_len > stream.buffer.len) return error.InvalidOperand; const block = stream.buffer[stream.pos..][0..block_len]; From 5ebca4392e3d3e616607c00785d67c4ae31330cb Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 27 May 2023 14:14:43 -0400 Subject: [PATCH 20/81] debug: fixing more compile errors on arches that I hadn't tested on yet --- lib/std/debug.zig | 87 ++++++++++++++++++++++++++++--------------- lib/std/dwarf.zig | 1 + lib/std/dwarf/abi.zig | 11 ++++++ 3 files changed, 70 insertions(+), 29 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index a8e8b7bce867..57e7883862e5 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -421,7 +421,11 @@ pub const StackIterator = struct { // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). debug_info: ?*DebugInfo, dwarf_context: if (supports_context) DW.UnwindContext else void = undefined, - const supports_context = @hasDecl(os.system, "ucontext_t"); + const supports_context = @hasDecl(os.system, "ucontext_t") and + (builtin.os.tag != .linux or switch (builtin.cpu.arch) { + .mips, .riscv64 => false, + else => true, + }); pub fn init(first_address: ?usize, fp: ?usize) StackIterator { if (native_arch == .sparc64) { @@ -528,7 +532,7 @@ pub const StackIterator = struct { fn next_dwarf(self: *StackIterator) !void { const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); - if (module.getDwarfInfo()) |di| { + if (try module.getDwarfInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc)) |di| { self.dwarf_context.reg_ctx.eh_frame = true; self.dwarf_context.reg_ctx.is_macho = di.is_macho; try di.unwindFrame(self.debug_info.?.allocator, &self.dwarf_context, module.base_address); @@ -1684,6 +1688,26 @@ pub const ModuleDebugInfo = switch (native_os) { return info; } + fn getOFileForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*OFileInfo { + nosuspend { + const relocated_address = address - self.base_address; + const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse + return null; + + const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); + var o_file_info = self.ofiles.get(o_file_path) orelse + (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return null, + else => return err, + }); + + return &o_file_info.di; + } + } + pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { nosuspend { // Translate the VA into an address into this object @@ -1749,40 +1773,38 @@ pub const ModuleDebugInfo = switch (native_os) { } } - pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { - // TODO: Implement - _ = self; - return null; + pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const DW.DwarfInfo { + nosuspend { + const relocated_address = address - self.base_address; + const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse + return null; + + const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); + var o_file_info = self.ofiles.get(o_file_path) orelse + (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return null, + else => return err, + }); + + return &o_file_info.di; + } } }, .uefi, .windows => struct { base_address: usize, debug_data: PdbOrDwarf, coff_image_base: u64, + /// Only used if debug_data is .pdb coff_section_headers: []coff.SectionHeader, fn deinit(self: *@This(), allocator: mem.Allocator) void { - switch (self.debug_data) { - .dwarf => |*dwarf| { - allocator.free(dwarf.debug_info); - allocator.free(dwarf.debug_abbrev); - allocator.free(dwarf.debug_str); - allocator.free(dwarf.debug_line); - if (dwarf.debug_str_offsets) |d| allocator.free(d); - if (dwarf.debug_line_str) |d| allocator.free(d); - if (dwarf.debug_ranges) |d| allocator.free(d); - if (dwarf.debug_loclists) |d| allocator.free(d); - if (dwarf.debug_rnglists) |d| allocator.free(d); - if (dwarf.debug_addr) |d| allocator.free(d); - if (dwarf.debug_names) |d| allocator.free(d); - if (dwarf.debug_frame) |d| allocator.free(d); - }, - .pdb => { - allocator.free(self.coff_section_headers); - }, - } - self.debug_data.deinit(allocator); + if (self.debug_data == .pdb) { + allocator.free(self.coff_section_headers); + } } pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { @@ -1835,7 +1857,10 @@ pub const ModuleDebugInfo = switch (native_os) { }; } - pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { + pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const DW.DwarfInfo { + _ = allocator; + _ = address; + return switch (self.debug_data) { .dwarf => |*dwarf| dwarf, else => null, @@ -1860,7 +1885,9 @@ pub const ModuleDebugInfo = switch (native_os) { return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); } - pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { + pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const DW.DwarfInfo { + _ = allocator; + _ = address; return &self.dwarf; } }, @@ -1877,8 +1904,10 @@ pub const ModuleDebugInfo = switch (native_os) { return SymbolInfo{}; } - pub fn getDwarfInfo(self: *@This()) ?*const DW.DwarfInfo { + pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const DW.DwarfInfo { _ = self; + _ = allocator; + _ = address; return null; } }, diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 549b004c4984..e18efc446b50 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1556,6 +1556,7 @@ pub const DwarfInfo = struct { } pub fn unwindFrame(di: *const DwarfInfo, allocator: mem.Allocator, context: *UnwindContext, module_base_address: usize) !void { + if (!comptime abi.isSupportedArch(builtin.target.cpu.arch)) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return; // TODO: Handle signal frame (ie. use_prev_instr in libunwind) diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index fdaac05b8778..9d594fa5fe97 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -8,6 +8,17 @@ pub const RegisterContext = struct { is_macho: bool, }; +pub fn isSupportedArch(arch: std.Target.Cpu.Arch) bool { + return switch (arch) { + .x86, + .x86_64, + .arm, + .aarch64, + => true, + else => false, + }; +} + pub fn ipRegNum() u8 { return switch (builtin.cpu.arch) { .x86 => 8, From 865d4d2d8c066fd72b1e81cfc81c927aeccde8db Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 27 May 2023 16:43:45 -0400 Subject: [PATCH 21/81] debug: more fixups for mips linux not having ucontext_t - increase test-std max_rss to 1.1 above the CI observed amount --- lib/std/debug.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 57e7883862e5..2a1039e748af 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -423,7 +423,7 @@ pub const StackIterator = struct { dwarf_context: if (supports_context) DW.UnwindContext else void = undefined, const supports_context = @hasDecl(os.system, "ucontext_t") and (builtin.os.tag != .linux or switch (builtin.cpu.arch) { - .mips, .riscv64 => false, + .mips, .mipsel, .mips64, .mips64el, .riscv64 => false, else => true, }); From dd2035735fa2160f85bf8fb8cd8b978d77fac292 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 27 May 2023 17:33:13 -0400 Subject: [PATCH 22/81] debug: fix memory leak when an error occurs opening a pdb file --- lib/std/debug.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 2a1039e748af..248ace447dac 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -881,6 +881,7 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe // Only used by pdb path di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); + errdefer allocator.free(di.coff_section_headers); var path_buf: [windows.MAX_PATH]u8 = undefined; const len = try coff_obj.getPdbPath(path_buf[0..]); From 5781016c35c27a64b08c0701a92ad7c1a253a869 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 29 May 2023 01:26:30 -0400 Subject: [PATCH 23/81] dwarf: add support for .eh_frame_hdr when unwinding - .eh_frame_hdr contains a binary-searchable data structure for finding an FDE. If present, we can use this section to avoid having to parse the entire FDE/CIE list in the binary, instead only entries that are actually required for unwinding are read. - rework the inputs pc-relative pointer decoding to support both already-mapped sections as well as sections mapped from a file - store the VirtualMachine on UnwindContext so the allocations can be reused --- lib/std/debug.zig | 64 ++++-- lib/std/dwarf.zig | 401 +++++++++++++++++++++++++++-------- lib/std/dwarf/call_frame.zig | 7 + 3 files changed, 363 insertions(+), 109 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 248ace447dac..d74939121239 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -171,6 +171,7 @@ pub fn dumpStackTraceFromBase(context: StackTraceContext) void { } var it = StackIterator.initWithContext(null, debug_info, context) catch return; + defer it.deinit(); printSourceAtAddress(debug_info, stderr, it.dwarf_context.pc, tty_config) catch return; while (it.next()) |return_address| { @@ -219,6 +220,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT } else { // TODO: This should use the dwarf unwinder if it's available var it = StackIterator.init(first_address, null); + defer it.deinit(); for (stack_trace.instruction_addresses, 0..) |*addr, i| { addr.* = it.next() orelse { stack_trace.index = i; @@ -445,10 +447,18 @@ pub const StackIterator = struct { pub fn initWithContext(first_address: ?usize, debug_info: *DebugInfo, context: *const os.ucontext_t) !StackIterator { var iterator = init(first_address, null); iterator.debug_info = debug_info; - iterator.dwarf_context = try DW.UnwindContext.init(context); + iterator.dwarf_context = try DW.UnwindContext.init(context, &isValidMemory); return iterator; } + pub fn deinit(self: *StackIterator) void { + if (supports_context) { + if (self.debug_info) |debug_info| { + self.dwarf_context.deinit(debug_info.allocator); + } + } + } + // Offset of the saved BP wrt the frame pointer. const fp_offset = if (native_arch.isRISCV()) // On RISC-V the frame pointer points to the top of the saved register @@ -599,6 +609,8 @@ pub fn writeCurrentStackTrace( // TODO: Capture a context and use initWithContext var it = StackIterator.init(start_addr, null); + defer it.deinit(); + while (it.next()) |return_address| { // On arm64 macOS, the address of the last frame is 0x0 rather than 0x1 as on x86_64 macOS, // therefore, we do a check for `return_address == 0` before subtracting 1 from it to avoid @@ -957,19 +969,15 @@ pub fn readElfDebugInfo( var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; - // Take ownership over any owned sections from the parent scope + // Combine section list. This takes ownership over any owned sections from the parent scope. for (parent_sections, §ions) |*parent, *section| { if (parent.*) |*p| { section.* = p.*; p.owned = false; } } - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - // TODO: This function should take a ptr to GNU_EH_FRAME (which is .eh_frame_hdr) from the ELF headers - // and prefil sections[.eh_frame_hdr] - var separate_debug_filename: ?[]const u8 = null; var separate_debug_crc: ?u32 = null; @@ -992,6 +1000,7 @@ pub fn readElfDebugInfo( if (mem.eql(u8, "." ++ section.name, name)) section_index = i; } if (section_index == null) continue; + if (sections[section_index.?] != null) continue; const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { @@ -1496,7 +1505,8 @@ pub const DebugInfo = struct { // Output base_address: usize = undefined, name: []const u8 = undefined, - build_id: ?[]const u8 = undefined, + build_id: ?[]const u8 = null, + gnu_eh_frame: ?[]const u8 = null, } = .{ .address = address }; const CtxTy = @TypeOf(ctx); @@ -1523,19 +1533,24 @@ pub const DebugInfo = struct { } } else return; - // TODO: Look for the GNU_EH_FRAME section and pass it to readElfDebugInfo - for (info.dlpi_phdr[0..info.dlpi_phnum]) |phdr| { - if (phdr.p_type != elf.PT_NOTE) continue; - - const note_bytes = @intToPtr([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; - const name_size = mem.readIntSliceNative(u32, note_bytes[0..4]); - if (name_size != 4) continue; - const desc_size = mem.readIntSliceNative(u32, note_bytes[4..8]); - const note_type = mem.readIntSliceNative(u32, note_bytes[8..12]); - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.build_id = note_bytes[16..][0..desc_size]; + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const note_bytes = @intToPtr([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; + const name_size = mem.readIntSliceNative(u32, note_bytes[0..4]); + if (name_size != 4) continue; + const desc_size = mem.readIntSliceNative(u32, note_bytes[4..8]); + const note_type = mem.readIntSliceNative(u32, note_bytes[8..12]); + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; + context.build_id = note_bytes[16..][0..desc_size]; + }, + elf.PT_GNU_EH_FRAME => { + context.gnu_eh_frame = @intToPtr([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; + }, + else => {}, + } } // Stop the iteration @@ -1555,7 +1570,16 @@ pub const DebugInfo = struct { errdefer self.allocator.destroy(obj_di); var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; - // TODO: If GNU_EH_FRAME was found, set it in sections + if (ctx.gnu_eh_frame) |eh_frame_hdr| { + // This is a special case - pointer offsets inside .eh_frame_hdr + // are encoded relative to its base address, so we must use the + // version that is already memory mapped, and not the one that + // will be mapped separately from the ELF file. + sections[@enumToInt(DW.DwarfSection.eh_frame_hdr)] = .{ + .data = eh_frame_hdr, + .owned = false, + }; + } obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); obj_di.base_address = ctx.base_address; diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index e18efc446b50..46c8b98797a6 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -682,6 +682,8 @@ pub const DwarfInfo = struct { compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{}, func_list: std.ArrayListUnmanaged(Func) = .{}, + eh_frame_hdr: ?ExceptionFrameHeader = null, + // These lookup tables are only used if `eh_frame_hdr` is null cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .{}, // Sorted by start_pc fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{}, @@ -1489,60 +1491,79 @@ pub const DwarfInfo = struct { } pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator, binary_mem: []const u8) !void { - var has_eh_frame_hdr = false; - if (di.section(.eh_frame_hdr)) |eh_frame_hdr| { - has_eh_frame_hdr = true; + if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { + var stream = io.fixedBufferStream(eh_frame_hdr); + const reader = stream.reader(); - // TODO: Parse this section to get the lookup table, and skip loading the entire section + const version = try reader.readByte(); + if (version != 1) break :blk; + + const eh_frame_ptr_enc = try reader.readByte(); + if (eh_frame_ptr_enc == EH.PE.omit) break :blk; + const fde_count_enc = try reader.readByte(); + if (fde_count_enc == EH.PE.omit) break :blk; + const table_enc = try reader.readByte(); + if (table_enc == EH.PE.omit) break :blk; + + const eh_frame_ptr = std.math.cast(usize, try readEhPointer(reader, eh_frame_ptr_enc, @sizeOf(usize), .{ + .pc_rel_base = @ptrToInt(&eh_frame_hdr[stream.pos]), + .follow_indirect = true, + }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf(); + + const fde_count = std.math.cast(usize, try readEhPointer(reader, fde_count_enc, @sizeOf(usize), .{ + .pc_rel_base = @ptrToInt(&eh_frame_hdr[stream.pos]), + .follow_indirect = true, + }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf(); + + const entry_size = try ExceptionFrameHeader.entrySize(table_enc); + const entries_len = fde_count * entry_size; + if (entries_len > eh_frame_hdr.len - stream.pos) return badDwarf(); + + di.eh_frame_hdr = .{ + .eh_frame_ptr = eh_frame_ptr, + .table_enc = table_enc, + .fde_count = fde_count, + .entries = eh_frame_hdr[stream.pos..][0..entries_len], + }; - _ = eh_frame_hdr; + // No need to scan .eh_frame, we have a binary search table already + return; } if (di.section(.eh_frame)) |eh_frame| { var stream = io.fixedBufferStream(eh_frame); - const reader = stream.reader(); - while (stream.pos < stream.buffer.len) { - const length_offset = stream.pos; - var length: usize = try reader.readInt(u32, di.endian); - if (length == 0) break; - - var is_64 = length == math.maxInt(u32); - if (is_64) { - length = std.math.cast(usize, try reader.readInt(u64, di.endian)) orelse return error.LengthOverflow; - } - - const id_len = @as(u8, if (is_64) 8 else 4); - const id = if (is_64) try reader.readInt(u64, di.endian) else try reader.readInt(u32, di.endian); - const entry_bytes = eh_frame[stream.pos..][0 .. length - id_len]; - - if (id == 0) { - const cie = try CommonInformationEntry.parse( - entry_bytes, - @ptrToInt(eh_frame.ptr), - @ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), - true, - length_offset, - @sizeOf(usize), - di.endian, - ); - try di.cie_map.put(allocator, length_offset, cie); - } else { - const cie_offset = stream.pos - id_len - id; - const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); - const fde = try FrameDescriptionEntry.parse( - entry_bytes, - @ptrToInt(eh_frame.ptr), - @ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), - true, - cie, - @sizeOf(usize), - di.endian, - ); - try di.fde_list.append(allocator, fde); + const entry_header = try EntryHeader.read(&stream, di.endian); + switch (entry_header.type) { + .cie => { + const cie = try CommonInformationEntry.parse( + entry_header.entry_bytes, + -@intCast(isize, @ptrToInt(binary_mem.ptr)), + //@ptrToInt(eh_frame.ptr), + //@ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), + true, + entry_header.length_offset, + @sizeOf(usize), + di.endian, + ); + try di.cie_map.put(allocator, entry_header.length_offset, cie); + }, + .fde => |cie_offset| { + const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); + const fde = try FrameDescriptionEntry.parse( + entry_header.entry_bytes, + -@intCast(isize, @ptrToInt(binary_mem.ptr)), + //@ptrToInt(eh_frame.ptr), + //@ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), + true, + cie, + @sizeOf(usize), + di.endian, + ); + try di.fde_list.append(allocator, fde); + }, + .terminator => break, } - - stream.pos += entry_bytes.len; } // TODO: Avoiding sorting if has_eh_frame_hdr exists @@ -1560,59 +1581,67 @@ pub const DwarfInfo = struct { if (context.pc == 0) return; // TODO: Handle signal frame (ie. use_prev_instr in libunwind) - // TOOD: Use eh_frame_hdr to accelerate the search if available - //const eh_frame_hdr = di.section(.eh_frame_hdr) orelse return error.MissingDebugInfo; - - // Find the FDE - const unmapped_pc = context.pc - module_base_address; - const index = std.sort.binarySearch(FrameDescriptionEntry, unmapped_pc, di.fde_list.items, {}, struct { - pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { - if (pc < mid_item.pc_begin) { - return .lt; - } else { - const range_end = mid_item.pc_begin + mid_item.pc_range; - if (pc < range_end) { - return .eq; - } - return .gt; - } - } - }.compareFn); + // Find the FDE and CIE + var cie: CommonInformationEntry = undefined; + var fde: FrameDescriptionEntry = undefined; - const fde = if (index) |i| &di.fde_list.items[i] else return error.MissingFDE; - const cie = di.cie_map.getPtr(fde.cie_length_offset) orelse return error.MissingCIE; + // In order to support reading .eh_frame from the ELF file (vs using the already-mapped section), + // scanAllUnwindInfo has already mapped any pc-relative offsets such that they we be relative to zero + // instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly + // as pointers will be decoded relative to the alreayd-mapped .eh_frame. + var mapped_pc: usize = undefined; - // const prev_cfa = context.cfa; - // const prev_pc = context.pc; + if (di.eh_frame_hdr) |header| { + mapped_pc = context.pc; + try header.findEntry(context.isValidMemory, @ptrToInt(di.section(.eh_frame_hdr).?.ptr), mapped_pc, &cie, &fde); + } else { + mapped_pc = context.pc - module_base_address; + const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct { + pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { + if (pc < mid_item.pc_begin) { + return .lt; + } else { + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) { + return .eq; + } - // TODO: Cache this on self so we can re-use the allocations? - var vm = call_frame.VirtualMachine{}; - defer vm.deinit(allocator); + return .gt; + } + } + }.compareFn); - const row = try vm.runToNative(allocator, unmapped_pc, cie.*, fde.*); + fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; + cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; + } + + context.vm.reset(); + + const row = try context.vm.runToNative(allocator, mapped_pc, cie, fde); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, register, context.reg_ctx)); - - // TODO: Check isValidMemory? break :blk try call_frame.applyOffset(value, offset); }, .expression => |expression| { // TODO: Evaluate expression _ = expression; + return error.UnimplementedTODO; }, else => return error.InvalidCFARule, }; + if (!context.isValidMemory(context.cfa.?)) return error.InvalidCFA; + // Update the context with the previous frame's values var next_ucontext = context.ucontext; var has_next_ip = false; - for (vm.rowColumns(row)) |column| { + for (context.vm.rowColumns(row)) |column| { if (column.register) |register| { const dest = try abi.regBytes(&next_ucontext, register, context.reg_ctx); if (register == cie.return_address_register) { @@ -1640,17 +1669,24 @@ pub const UnwindContext = struct { pc: usize, ucontext: os.ucontext_t, reg_ctx: abi.RegisterContext, + isValidMemory: *const fn (address: usize) bool, + vm: call_frame.VirtualMachine = .{}, - pub fn init(ucontext: *const os.ucontext_t) !UnwindContext { + pub fn init(ucontext: *const os.ucontext_t, isValidMemory: *const fn (address: usize) bool) !UnwindContext { const pc = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, abi.ipRegNum(), null)); return .{ .cfa = null, .pc = pc, .ucontext = ucontext.*, .reg_ctx = undefined, + .isValidMemory = isValidMemory, }; } + pub fn deinit(self: *UnwindContext, allocator: mem.Allocator) void { + self.vm.deinit(allocator); + } + pub fn getFp(self: *const UnwindContext) !usize { return mem.readIntSliceNative(usize, try abi.regBytes(&self.ucontext, abi.fpRegNum(self.reg_ctx), self.reg_ctx)); } @@ -1694,7 +1730,7 @@ const EhPointerContext = struct { // Whether or not to follow indirect pointers. This should only be // used when decoding pointers at runtime using the current process's - // debug info. + // debug info follow_indirect: bool, // These relative addressing modes are only used in specific cases, and @@ -1762,15 +1798,178 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo } } +/// This represents the decoded .eh_frame_hdr header +pub const ExceptionFrameHeader = struct { + eh_frame_ptr: usize, + table_enc: u8, + fde_count: usize, + entries: []const u8, + + pub fn entrySize(table_enc: u8) !u8 { + return switch (table_enc & EH.PE.type_mask) { + EH.PE.udata2, + EH.PE.sdata2, + => 4, + EH.PE.udata4, + EH.PE.sdata4, + => 8, + EH.PE.udata8, + EH.PE.sdata8, + => 16, + // This is a binary search table, so all entries must be the same length + else => return badDwarf(), + }; + } + + pub fn findEntry( + self: ExceptionFrameHeader, + isValidMemory: *const fn (address: usize) bool, + eh_frame_hdr_ptr: usize, + pc: usize, + cie: *CommonInformationEntry, + fde: *FrameDescriptionEntry, + ) !void { + const entry_size = try entrySize(self.table_enc); + + var left: usize = 0; + var len: usize = self.fde_count; + + var stream = io.fixedBufferStream(self.entries); + const reader = stream.reader(); + + while (len > 1) { + const mid = left + len / 2; + + try stream.seekTo(mid * entry_size); + const pc_begin = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @ptrToInt(&self.entries[stream.pos]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }, builtin.cpu.arch.endian()) orelse return badDwarf(); + + if (pc >= pc_begin) left = mid; + if (pc == pc_begin) break; + + len /= 2; + } + + try stream.seekTo(left * entry_size); + + // Read past pc_begin + _ = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @ptrToInt(&self.entries[stream.pos]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }, builtin.cpu.arch.endian()) orelse return badDwarf(); + + const fde_ptr = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @ptrToInt(&self.entries[stream.pos]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }, builtin.cpu.arch.endian()) orelse return badDwarf(); + + // TODO: Should this also do isValidMemory(fde_ptr) + 11 (worst case header size)? + + // The length of the .eh_frame section is unknown at this point, since .eh_frame_hdr only provides the start + if (!isValidMemory(fde_ptr) or fde_ptr < self.eh_frame_ptr) return badDwarf(); + const eh_frame = @intToPtr([*]const u8, self.eh_frame_ptr)[0..math.maxInt(usize)]; + const fde_offset = fde_ptr - self.eh_frame_ptr; + + var eh_frame_stream = io.fixedBufferStream(eh_frame); + try eh_frame_stream.seekTo(fde_offset); + + const fde_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); + if (!isValidMemory(@ptrToInt(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]))) return badDwarf(); + if (fde_entry_header.type != .fde) return badDwarf(); + + const cie_offset = fde_entry_header.type.fde; + try eh_frame_stream.seekTo(cie_offset); + const cie_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); + if (!isValidMemory(@ptrToInt(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]))) return badDwarf(); + if (cie_entry_header.type != .cie) return badDwarf(); + + cie.* = try CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.length_offset, + @sizeOf(usize), + builtin.cpu.arch.endian(), + ); + + fde.* = try FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie.*, + @sizeOf(usize), + builtin.cpu.arch.endian(), + ); + } +}; + +pub const EntryHeader = struct { + /// Offset of the length in the backing buffer + length_offset: usize, + is_64: bool, + type: union(enum) { + cie, + /// Value is the offset of the corresponding CIE + fde: u64, + terminator: void, + }, + /// The entry's contents, not including the ID field + entry_bytes: []const u8, + + /// Reads a header for either an FDE or a CIE, then advances the stream to the position after the trailing structure. + /// `stream` must be a stream backed by the .eh_frame section. + pub fn read(stream: *std.io.FixedBufferStream([]const u8), endian: std.builtin.Endian) !EntryHeader { + const reader = stream.reader(); + const length_offset = stream.pos; + + var is_64: bool = undefined; + const length = math.cast(usize, try readUnitLength(reader, endian, &is_64)) orelse return badDwarf(); + if (length == 0) return .{ + .length_offset = length_offset, + .is_64 = is_64, + .type = .{ .terminator = {} }, + .entry_bytes = &.{}, + }; + + const id_len = @as(u8, if (is_64) 8 else 4); + const id = if (is_64) try reader.readInt(u64, endian) else try reader.readInt(u32, endian); + const entry_bytes = stream.buffer[stream.pos..][0 .. length - id_len]; + + const result = EntryHeader{ + .length_offset = length_offset, + .is_64 = is_64, + .type = switch (id) { + 0 => .{ .cie = {} }, + // TODO: Support CommonInformationEntry.dwarf32_id, CommonInformationEntry.dwarf64_id + else => .{ .fde = stream.pos - id_len - id }, + }, + .entry_bytes = entry_bytes, + }; + + stream.pos += entry_bytes.len; + return result; + } + + /// The length of the entry including the ID field, but not the length field itself + pub fn entryLength(self: EntryHeader) usize { + return self.entry_bytes.len + @as(u8, if (self.is_64) 8 else 4); + } +}; + pub const CommonInformationEntry = struct { // Used in .eh_frame pub const eh_id = 0; // Used in .debug_frame (DWARF32) - pub const dwarf32_id = std.math.maxInt(u32); + pub const dwarf32_id = math.maxInt(u32); // Used in .debug_frame (DWARF64) - pub const dwarf64_id = std.math.maxInt(u64); + pub const dwarf64_id = math.maxInt(u64); // Offset of the length field of this entry in the eh_frame section. // This is the key that FDEs use to reference CIEs. @@ -1804,12 +2003,17 @@ pub const CommonInformationEntry = struct { return false; } - // This function expects to read the CIE starting with the version field. - // The returned struct references memory backed by cie_bytes. + /// This function expects to read the CIE starting with the version field. + /// The returned struct references memory backed by cie_bytes. + /// + /// See the FrameDescriptionEntry.parse documentation for the description + /// of `pc_rel_offset` and `is_runtime`. + /// + /// `length_offset` specifies the offset of this CIE's length field in the + /// .eh_frame section. pub fn parse( cie_bytes: []const u8, - section_base: u64, - section_offset: u64, + pc_rel_offset: i64, is_runtime: bool, length_offset: u64, addr_size_bytes: u8, @@ -1879,7 +2083,7 @@ pub const CommonInformationEntry = struct { personality_enc.?, addr_size_bytes, .{ - .pc_rel_base = @ptrToInt(&cie_bytes[stream.pos]) - section_base + section_offset, + .pc_rel_base = try pcRelBase(@ptrToInt(&cie_bytes[stream.pos]), pc_rel_offset), .follow_indirect = is_runtime, }, endian, @@ -1926,11 +2130,22 @@ pub const FrameDescriptionEntry = struct { aug_data: []const u8, instructions: []const u8, - // This function expects to read the FDE starting with the PC Begin field + /// This function expects to read the FDE starting at the PC Begin field. + /// The returned struct references memory backed by fde_bytes. + /// + /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values + /// used when decoding pointers. This should be set to zero if fde_bytes is + /// backed by the memory of the .eh_frame section in the running executable. + /// + /// Otherwise, it should be the relative offset to translate addresses from + /// where the section is currently stored in memory, to where it *would* be + /// stored at runtime: section runtime offset - backing section data base ptr. + /// + /// Similarly, `is_runtime` specifies this function is being called on a runtime section, and so + /// indirect pointers can be followed. pub fn parse( fde_bytes: []const u8, - section_base: u64, - section_offset: u64, + pc_rel_offset: i64, is_runtime: bool, cie: CommonInformationEntry, addr_size_bytes: u8, @@ -1946,7 +2161,7 @@ pub const FrameDescriptionEntry = struct { cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset, + .pc_rel_base = try pcRelBase(@ptrToInt(&fde_bytes[stream.pos]), pc_rel_offset), .follow_indirect = is_runtime, }, endian, @@ -1975,7 +2190,7 @@ pub const FrameDescriptionEntry = struct { cie.lsda_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = @ptrToInt(&fde_bytes[stream.pos]) - section_base + section_offset, + .pc_rel_base = try pcRelBase(@ptrToInt(&fde_bytes[stream.pos]), pc_rel_offset), .follow_indirect = is_runtime, }, endian, @@ -1998,3 +2213,11 @@ pub const FrameDescriptionEntry = struct { }; } }; + +fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { + if (pc_rel_offset < 0) { + return math.sub(usize, field_ptr, @intCast(usize, -pc_rel_offset)); + } else { + return math.add(usize, field_ptr, @intCast(usize, pc_rel_offset)); + } +} diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index f473356716de..f28fd7ccc49f 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -354,6 +354,13 @@ pub const VirtualMachine = struct { self.* = undefined; } + pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; + } + /// Return a slice backed by the row's non-CFA columns pub fn rowColumns(self: VirtualMachine, row: Row) []Column { return self.columns.items[row.columns.start..][0..row.columns.len]; From adbc5bbdb317847d1446395d24cd92c0ffc7fc49 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 2 Jun 2023 10:11:46 -0400 Subject: [PATCH 24/81] dwarf: fixup pointer cast --- lib/std/dwarf.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 46c8b98797a6..6178a69db2b6 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1862,11 +1862,11 @@ pub const ExceptionFrameHeader = struct { .data_rel_base = eh_frame_hdr_ptr, }, builtin.cpu.arch.endian()) orelse return badDwarf(); - const fde_ptr = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ + const fde_ptr = math.cast(usize, try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ .pc_rel_base = @ptrToInt(&self.entries[stream.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, - }, builtin.cpu.arch.endian()) orelse return badDwarf(); + }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf(); // TODO: Should this also do isValidMemory(fde_ptr) + 11 (worst case header size)? From a47212c72e9d156e06cb278ac681e51e11e140b7 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 23 Jun 2023 11:54:06 -0400 Subject: [PATCH 25/81] - rebase and update to lastest master --- lib/std/debug.zig | 28 +++++++++++++--------------- lib/std/dwarf.zig | 32 ++++++++++++++++---------------- lib/std/dwarf/call_frame.zig | 14 +++++++------- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index d74939121239..ec841617fdf8 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -988,7 +988,7 @@ pub fn readElfDebugInfo( if (mem.eql(u8, name, ".gnu_debuglink")) { const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); const debug_filename = mem.sliceTo(@ptrCast([*:0]const u8, gnu_debuglink.ptr), 0); - const crc_offset = mem.alignForward(@ptrToInt(&debug_filename[debug_filename.len]) + 1, 4) - @ptrToInt(gnu_debuglink.ptr); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); const crc_bytes = gnu_debuglink[crc_offset .. crc_offset + 4]; separate_debug_crc = mem.readIntSliceNative(u32, crc_bytes); separate_debug_filename = debug_filename; @@ -1007,11 +1007,9 @@ pub fn readElfDebugInfo( var section_stream = io.fixedBufferStream(section_bytes); var section_reader = section_stream.reader(); const chdr = section_reader.readStruct(elf.Chdr) catch continue; - - // TODO: Support ZSTD if (chdr.ch_type != .ZLIB) continue; - var zlib_stream = std.compress.zlib.zlibStream(allocator, section_stream.reader()) catch continue; + var zlib_stream = std.compress.zlib.decompressStream(allocator, section_stream.reader()) catch continue; defer zlib_stream.deinit(); var decompressed_section = try allocator.alloc(u8, chdr.ch_size); @@ -1031,10 +1029,10 @@ pub fn readElfDebugInfo( } const missing_debug_info = - sections[@enumToInt(DW.DwarfSection.debug_info)] == null or - sections[@enumToInt(DW.DwarfSection.debug_abbrev)] == null or - sections[@enumToInt(DW.DwarfSection.debug_str)] == null or - sections[@enumToInt(DW.DwarfSection.debug_line)] == null; + sections[@intFromEnum(DW.DwarfSection.debug_info)] == null or + sections[@intFromEnum(DW.DwarfSection.debug_abbrev)] == null or + sections[@intFromEnum(DW.DwarfSection.debug_str)] == null or + sections[@intFromEnum(DW.DwarfSection.debug_line)] == null; // Attempt to load debug info from an external file // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html @@ -1537,7 +1535,7 @@ pub const DebugInfo = struct { switch (phdr.p_type) { elf.PT_NOTE => { // Look for .note.gnu.build-id - const note_bytes = @intToPtr([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; + const note_bytes = @ptrFromInt([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; const name_size = mem.readIntSliceNative(u32, note_bytes[0..4]); if (name_size != 4) continue; const desc_size = mem.readIntSliceNative(u32, note_bytes[4..8]); @@ -1547,7 +1545,7 @@ pub const DebugInfo = struct { context.build_id = note_bytes[16..][0..desc_size]; }, elf.PT_GNU_EH_FRAME => { - context.gnu_eh_frame = @intToPtr([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; + context.gnu_eh_frame = @ptrFromInt([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; }, else => {}, } @@ -1575,7 +1573,7 @@ pub const DebugInfo = struct { // are encoded relative to its base address, so we must use the // version that is already memory mapped, and not the one that // will be mapped separately from the ELF file. - sections[@enumToInt(DW.DwarfSection.eh_frame_hdr)] = .{ + sections[@intFromEnum(DW.DwarfSection.eh_frame_hdr)] = .{ .data = eh_frame_hdr, .owned = false, }; @@ -1689,10 +1687,10 @@ pub const ModuleDebugInfo = switch (native_os) { } const missing_debug_info = - sections[@enumToInt(DW.DwarfSection.debug_info)] == null or - sections[@enumToInt(DW.DwarfSection.debug_abbrev)] == null or - sections[@enumToInt(DW.DwarfSection.debug_str)] == null or - sections[@enumToInt(DW.DwarfSection.debug_line)] == null; + sections[@intFromEnum(DW.DwarfSection.debug_info)] == null or + sections[@intFromEnum(DW.DwarfSection.debug_abbrev)] == null or + sections[@intFromEnum(DW.DwarfSection.debug_str)] == null or + sections[@intFromEnum(DW.DwarfSection.debug_line)] == null; if (missing_debug_info) return error.MissingDebugInfo; var di = DW.DwarfInfo{ diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 6178a69db2b6..97bacb298b86 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -691,7 +691,7 @@ pub const DwarfInfo = struct { is_macho: bool, pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 { - return if (di.sections[@enumToInt(dwarf_section)]) |s| s.data else null; + return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; } pub fn deinit(di: *DwarfInfo, allocator: mem.Allocator) void { @@ -1506,12 +1506,12 @@ pub const DwarfInfo = struct { if (table_enc == EH.PE.omit) break :blk; const eh_frame_ptr = std.math.cast(usize, try readEhPointer(reader, eh_frame_ptr_enc, @sizeOf(usize), .{ - .pc_rel_base = @ptrToInt(&eh_frame_hdr[stream.pos]), + .pc_rel_base = @intFromPtr(&eh_frame_hdr[stream.pos]), .follow_indirect = true, }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf(); const fde_count = std.math.cast(usize, try readEhPointer(reader, fde_count_enc, @sizeOf(usize), .{ - .pc_rel_base = @ptrToInt(&eh_frame_hdr[stream.pos]), + .pc_rel_base = @intFromPtr(&eh_frame_hdr[stream.pos]), .follow_indirect = true, }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf(); @@ -1538,7 +1538,7 @@ pub const DwarfInfo = struct { .cie => { const cie = try CommonInformationEntry.parse( entry_header.entry_bytes, - -@intCast(isize, @ptrToInt(binary_mem.ptr)), + -@intCast(isize, @intFromPtr(binary_mem.ptr)), //@ptrToInt(eh_frame.ptr), //@ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), true, @@ -1552,7 +1552,7 @@ pub const DwarfInfo = struct { const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); const fde = try FrameDescriptionEntry.parse( entry_header.entry_bytes, - -@intCast(isize, @ptrToInt(binary_mem.ptr)), + -@intCast(isize, @intFromPtr(binary_mem.ptr)), //@ptrToInt(eh_frame.ptr), //@ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), true, @@ -1594,7 +1594,7 @@ pub const DwarfInfo = struct { if (di.eh_frame_hdr) |header| { mapped_pc = context.pc; - try header.findEntry(context.isValidMemory, @ptrToInt(di.section(.eh_frame_hdr).?.ptr), mapped_pc, &cie, &fde); + try header.findEntry(context.isValidMemory, @intFromPtr(di.section(.eh_frame_hdr).?.ptr), mapped_pc, &cie, &fde); } else { mapped_pc = context.pc - module_base_address; const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct { @@ -1790,7 +1790,7 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo const native_ptr = math.cast(usize, ptr) orelse return error.PointerOverflow; return switch (addr_size_bytes) { - 2, 4, 8 => return @intToPtr(*const usize, native_ptr).*, + 2, 4, 8 => return @ptrFromInt(*const usize, native_ptr).*, else => return error.UnsupportedAddrSize, }; } else { @@ -1842,7 +1842,7 @@ pub const ExceptionFrameHeader = struct { try stream.seekTo(mid * entry_size); const pc_begin = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @ptrToInt(&self.entries[stream.pos]), + .pc_rel_base = @intFromPtr(&self.entries[stream.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, }, builtin.cpu.arch.endian()) orelse return badDwarf(); @@ -1857,13 +1857,13 @@ pub const ExceptionFrameHeader = struct { // Read past pc_begin _ = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @ptrToInt(&self.entries[stream.pos]), + .pc_rel_base = @intFromPtr(&self.entries[stream.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, }, builtin.cpu.arch.endian()) orelse return badDwarf(); const fde_ptr = math.cast(usize, try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @ptrToInt(&self.entries[stream.pos]), + .pc_rel_base = @intFromPtr(&self.entries[stream.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf(); @@ -1872,20 +1872,20 @@ pub const ExceptionFrameHeader = struct { // The length of the .eh_frame section is unknown at this point, since .eh_frame_hdr only provides the start if (!isValidMemory(fde_ptr) or fde_ptr < self.eh_frame_ptr) return badDwarf(); - const eh_frame = @intToPtr([*]const u8, self.eh_frame_ptr)[0..math.maxInt(usize)]; + const eh_frame = @ptrFromInt([*]const u8, self.eh_frame_ptr)[0..math.maxInt(usize)]; const fde_offset = fde_ptr - self.eh_frame_ptr; var eh_frame_stream = io.fixedBufferStream(eh_frame); try eh_frame_stream.seekTo(fde_offset); const fde_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); - if (!isValidMemory(@ptrToInt(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]))) return badDwarf(); + if (!isValidMemory(@intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]))) return badDwarf(); if (fde_entry_header.type != .fde) return badDwarf(); const cie_offset = fde_entry_header.type.fde; try eh_frame_stream.seekTo(cie_offset); const cie_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); - if (!isValidMemory(@ptrToInt(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]))) return badDwarf(); + if (!isValidMemory(@intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]))) return badDwarf(); if (cie_entry_header.type != .cie) return badDwarf(); cie.* = try CommonInformationEntry.parse( @@ -2083,7 +2083,7 @@ pub const CommonInformationEntry = struct { personality_enc.?, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@ptrToInt(&cie_bytes[stream.pos]), pc_rel_offset), + .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[stream.pos]), pc_rel_offset), .follow_indirect = is_runtime, }, endian, @@ -2161,7 +2161,7 @@ pub const FrameDescriptionEntry = struct { cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@ptrToInt(&fde_bytes[stream.pos]), pc_rel_offset), + .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[stream.pos]), pc_rel_offset), .follow_indirect = is_runtime, }, endian, @@ -2190,7 +2190,7 @@ pub const FrameDescriptionEntry = struct { cie.lsda_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@ptrToInt(&fde_bytes[stream.pos]), pc_rel_offset), + .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[stream.pos]), pc_rel_offset), .follow_indirect = is_runtime, }, endian, diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index f28fd7ccc49f..cbf267aba388 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -38,12 +38,12 @@ const Opcode = enum(u8) { val_expression = 0x16, // These opcodes encode an operand in the lower 6 bits of the opcode itself - pub const lo_inline = @enumToInt(Opcode.advance_loc); - pub const hi_inline = @enumToInt(Opcode.restore) | 0b111111; + pub const lo_inline = @intFromEnum(Opcode.advance_loc); + pub const hi_inline = @intFromEnum(Opcode.restore) | 0b111111; // These opcodes are trailed by zero or more operands - pub const lo_reserved = @enumToInt(Opcode.nop); - pub const hi_reserved = @enumToInt(Opcode.val_expression); + pub const lo_reserved = @intFromEnum(Opcode.nop); + pub const hi_reserved = @intFromEnum(Opcode.val_expression); // Vendor-specific opcodes pub const lo_user = 0x1c; @@ -206,13 +206,13 @@ pub const Instruction = union(Opcode) { ) !Instruction { return switch (try stream.reader().readByte()) { inline Opcode.lo_inline...Opcode.hi_inline => |opcode| blk: { - const e = @intToEnum(Opcode, opcode & 0b11000000); + const e = @enumFromInt(Opcode, opcode & 0b11000000); var result = @unionInit(Instruction, @tagName(e), undefined); try result.readOperands(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian); break :blk result; }, inline Opcode.lo_reserved...Opcode.hi_reserved => |opcode| blk: { - const e = @intToEnum(Opcode, opcode); + const e = @enumFromInt(Opcode, opcode); var result = @unionInit(Instruction, @tagName(e), undefined); try result.readOperands(stream, null, addr_size_bytes, endian); break :blk result; @@ -304,7 +304,7 @@ pub const VirtualMachine = struct { .same_value => {}, .offset => |offset| { if (context.cfa) |cfa| { - const ptr = @intToPtr(*const usize, try applyOffset(cfa, offset)); + const ptr = @ptrFromInt(*const usize, try applyOffset(cfa, offset)); // TODO: context.isValidMemory(ptr) mem.writeIntSliceNative(usize, out, ptr.*); From 521988299d3e87c00388207319b09eb4ebd5d443 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 23 Jun 2023 16:08:11 -0400 Subject: [PATCH 26/81] add more safety checks when searching for eh_frame entries using findEntry --- lib/std/dwarf.zig | 51 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 97bacb298b86..db9a905c52af 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1580,7 +1580,7 @@ pub const DwarfInfo = struct { if (!comptime abi.isSupportedArch(builtin.target.cpu.arch)) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return; - // TODO: Handle signal frame (ie. use_prev_instr in libunwind) + // TODO: Handle unwinding from a signal frame (ie. use_prev_instr in libunwind) // Find the FDE and CIE var cie: CommonInformationEntry = undefined; @@ -1594,7 +1594,14 @@ pub const DwarfInfo = struct { if (di.eh_frame_hdr) |header| { mapped_pc = context.pc; - try header.findEntry(context.isValidMemory, @intFromPtr(di.section(.eh_frame_hdr).?.ptr), mapped_pc, &cie, &fde); + try header.findEntry( + context.isValidMemory, + null, // TODO: Check di for this + @intFromPtr(di.section(.eh_frame_hdr).?.ptr), + mapped_pc, + &cie, + &fde, + ); } else { mapped_pc = context.pc - module_base_address; const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct { @@ -1821,9 +1828,28 @@ pub const ExceptionFrameHeader = struct { }; } + fn isValidPtr( + self: ExceptionFrameHeader, + ptr: usize, + isValidMemory: *const fn (address: usize) bool, + eh_frame_len: ?usize, + ) bool { + if (eh_frame_len) |len| { + return ptr >= self.eh_frame_ptr and ptr < self.eh_frame_ptr + len; + } else { + return isValidMemory(ptr); + } + } + + /// Find an entry by binary searching the eh_frame_hdr section. + /// + /// Since the length of the eh_frame section (`eh_frame_len`) may not be known by the caller, + /// `isValidMemory` will be called before accessing any memory referenced by + /// the header entries. If `eh_frame_len` is provided, then these checks can be skipped. pub fn findEntry( self: ExceptionFrameHeader, isValidMemory: *const fn (address: usize) bool, + eh_frame_len: ?usize, eh_frame_hdr_ptr: usize, pc: usize, cie: *CommonInformationEntry, @@ -1855,7 +1881,7 @@ pub const ExceptionFrameHeader = struct { try stream.seekTo(left * entry_size); - // Read past pc_begin + // Read past the pc_begin field of the entry _ = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{ .pc_rel_base = @intFromPtr(&self.entries[stream.pos]), .follow_indirect = true, @@ -1868,24 +1894,29 @@ pub const ExceptionFrameHeader = struct { .data_rel_base = eh_frame_hdr_ptr, }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf(); - // TODO: Should this also do isValidMemory(fde_ptr) + 11 (worst case header size)? + // Verify the length fields of the FDE header are readable + if (!self.isValidPtr(fde_ptr, isValidMemory, eh_frame_len) or fde_ptr < self.eh_frame_ptr) return badDwarf(); - // The length of the .eh_frame section is unknown at this point, since .eh_frame_hdr only provides the start - if (!isValidMemory(fde_ptr) or fde_ptr < self.eh_frame_ptr) return badDwarf(); - const eh_frame = @ptrFromInt([*]const u8, self.eh_frame_ptr)[0..math.maxInt(usize)]; - const fde_offset = fde_ptr - self.eh_frame_ptr; + var fde_entry_header_len: usize = 4; + if (!self.isValidPtr(fde_ptr + 3, isValidMemory, eh_frame_len)) return badDwarf(); + if (self.isValidPtr(fde_ptr + 11, isValidMemory, eh_frame_len)) fde_entry_header_len = 12; + // Even if eh_frame_len is not specified, all ranges accssed are checked by isValidPtr + const eh_frame = @ptrFromInt([*]const u8, self.eh_frame_ptr)[0..eh_frame_len orelse math.maxInt(u32)]; + + const fde_offset = fde_ptr - self.eh_frame_ptr; var eh_frame_stream = io.fixedBufferStream(eh_frame); try eh_frame_stream.seekTo(fde_offset); const fde_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); - if (!isValidMemory(@intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]))) return badDwarf(); + if (!self.isValidPtr(@intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf(); if (fde_entry_header.type != .fde) return badDwarf(); + // CIEs always come before FDEs (the offset is a subtration), so we can assume this memory is readable const cie_offset = fde_entry_header.type.fde; try eh_frame_stream.seekTo(cie_offset); const cie_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); - if (!isValidMemory(@intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]))) return badDwarf(); + if (!self.isValidPtr(@intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf(); if (cie_entry_header.type != .cie) return badDwarf(); cie.* = try CommonInformationEntry.parse( From 84a1244b6c8d8b093d514cf267c832ad24d4400f Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 25 Jun 2023 01:58:55 -0400 Subject: [PATCH 27/81] dwarf: use eh_frame length if it's available --- lib/std/dwarf.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index db9a905c52af..47a7563beb12 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1593,10 +1593,11 @@ pub const DwarfInfo = struct { var mapped_pc: usize = undefined; if (di.eh_frame_hdr) |header| { + const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; mapped_pc = context.pc; try header.findEntry( context.isValidMemory, - null, // TODO: Check di for this + eh_frame_len, @intFromPtr(di.section(.eh_frame_hdr).?.ptr), mapped_pc, &cie, From 6abf1fbfe6f7f3c06e70e331a3c9e5101624d501 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 25 Jun 2023 02:47:48 -0400 Subject: [PATCH 28/81] update to new builtin syntax --- lib/std/debug.zig | 8 ++++---- lib/std/dwarf.zig | 20 ++++++++------------ lib/std/dwarf/call_frame.zig | 18 +++++++++--------- lib/std/dwarf/expressions.zig | 8 ++++---- src/crash_report.zig | 2 +- 5 files changed, 26 insertions(+), 30 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index ec841617fdf8..1ae26da2470d 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -987,7 +987,7 @@ pub fn readElfDebugInfo( if (mem.eql(u8, name, ".gnu_debuglink")) { const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@ptrCast([*:0]const u8, gnu_debuglink.ptr), 0); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); const crc_bytes = gnu_debuglink[crc_offset .. crc_offset + 4]; separate_debug_crc = mem.readIntSliceNative(u32, crc_bytes); @@ -1535,7 +1535,7 @@ pub const DebugInfo = struct { switch (phdr.p_type) { elf.PT_NOTE => { // Look for .note.gnu.build-id - const note_bytes = @ptrFromInt([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; + const note_bytes = @as([*]const u8, @ptrFromInt(info.dlpi_addr + phdr.p_vaddr))[0..phdr.p_memsz]; const name_size = mem.readIntSliceNative(u32, note_bytes[0..4]); if (name_size != 4) continue; const desc_size = mem.readIntSliceNative(u32, note_bytes[4..8]); @@ -1545,7 +1545,7 @@ pub const DebugInfo = struct { context.build_id = note_bytes[16..][0..desc_size]; }, elf.PT_GNU_EH_FRAME => { - context.gnu_eh_frame = @ptrFromInt([*]const u8, info.dlpi_addr + phdr.p_vaddr)[0..phdr.p_memsz]; + context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.dlpi_addr + phdr.p_vaddr))[0..phdr.p_memsz]; }, else => {}, } @@ -2094,7 +2094,7 @@ fn dumpSegfaultInfoPosix(sig: i32, addr: usize, ctx_ptr: ?*const anyopaque) void .arm, .aarch64, => { - const ctx = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)); + const ctx: *const os.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); dumpStackTraceFromBase(ctx); }, else => {}, diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 47a7563beb12..ce8c59c6e9d9 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1538,9 +1538,7 @@ pub const DwarfInfo = struct { .cie => { const cie = try CommonInformationEntry.parse( entry_header.entry_bytes, - -@intCast(isize, @intFromPtr(binary_mem.ptr)), - //@ptrToInt(eh_frame.ptr), - //@ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), + -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), true, entry_header.length_offset, @sizeOf(usize), @@ -1552,9 +1550,7 @@ pub const DwarfInfo = struct { const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); const fde = try FrameDescriptionEntry.parse( entry_header.entry_bytes, - -@intCast(isize, @intFromPtr(binary_mem.ptr)), - //@ptrToInt(eh_frame.ptr), - //@ptrToInt(eh_frame.ptr) - @ptrToInt(binary_mem.ptr), + -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), true, cie, @sizeOf(usize), @@ -1783,10 +1779,10 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo }; const ptr = if (base) |b| switch (value) { - .signed => |s| @intCast(u64, s + @intCast(i64, b)), + .signed => |s| @as(u64, @intCast(s + @as(i64, @intCast(b)))), .unsigned => |u| u + b, } else switch (value) { - .signed => |s| @intCast(u64, s), + .signed => |s| @as(u64, @intCast(s)), .unsigned => |u| u, }; @@ -1798,7 +1794,7 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo const native_ptr = math.cast(usize, ptr) orelse return error.PointerOverflow; return switch (addr_size_bytes) { - 2, 4, 8 => return @ptrFromInt(*const usize, native_ptr).*, + 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, else => return error.UnsupportedAddrSize, }; } else { @@ -1903,7 +1899,7 @@ pub const ExceptionFrameHeader = struct { if (self.isValidPtr(fde_ptr + 11, isValidMemory, eh_frame_len)) fde_entry_header_len = 12; // Even if eh_frame_len is not specified, all ranges accssed are checked by isValidPtr - const eh_frame = @ptrFromInt([*]const u8, self.eh_frame_ptr)[0..eh_frame_len orelse math.maxInt(u32)]; + const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse math.maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; var eh_frame_stream = io.fixedBufferStream(eh_frame); @@ -2248,8 +2244,8 @@ pub const FrameDescriptionEntry = struct { fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { if (pc_rel_offset < 0) { - return math.sub(usize, field_ptr, @intCast(usize, -pc_rel_offset)); + return math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); } else { - return math.add(usize, field_ptr, @intCast(usize, pc_rel_offset)); + return math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index cbf267aba388..460d157bf9ef 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -206,13 +206,13 @@ pub const Instruction = union(Opcode) { ) !Instruction { return switch (try stream.reader().readByte()) { inline Opcode.lo_inline...Opcode.hi_inline => |opcode| blk: { - const e = @enumFromInt(Opcode, opcode & 0b11000000); + const e: Opcode = @enumFromInt(opcode & 0b11000000); var result = @unionInit(Instruction, @tagName(e), undefined); - try result.readOperands(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian); + try result.readOperands(stream, @as(u6, @intCast(opcode & 0b111111)), addr_size_bytes, endian); break :blk result; }, inline Opcode.lo_reserved...Opcode.hi_reserved => |opcode| blk: { - const e = @enumFromInt(Opcode, opcode); + const e: Opcode = @enumFromInt(opcode); var result = @unionInit(Instruction, @tagName(e), undefined); try result.readOperands(stream, null, addr_size_bytes, endian); break :blk result; @@ -234,9 +234,9 @@ pub const Instruction = union(Opcode) { /// an error and fall back to FP-based unwinding. pub fn applyOffset(base: usize, offset: i64) !usize { return if (offset >= 0) - try std.math.add(usize, base, @intCast(usize, offset)) + try std.math.add(usize, base, @as(usize, @intCast(offset))) else - try std.math.sub(usize, base, @intCast(usize, -offset)); + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); } /// This is a virtual machine that runs DWARF call frame instructions. @@ -304,7 +304,7 @@ pub const VirtualMachine = struct { .same_value => {}, .offset => |offset| { if (context.cfa) |cfa| { - const ptr = @ptrFromInt(*const usize, try applyOffset(cfa, offset)); + const ptr: *const usize = @ptrFromInt(try applyOffset(cfa, offset)); // TODO: context.isValidMemory(ptr) mem.writeIntSliceNative(usize, out, ptr.*); @@ -480,7 +480,7 @@ pub const VirtualMachine = struct { => |i| { try self.resolveCopyOnWrite(allocator); const column = try self.getOrAddColumn(allocator, i.operands.register); - column.rule = .{ .offset = @intCast(i64, i.operands.offset) * cie.data_alignment_factor }; + column.rule = .{ .offset = @as(i64, @intCast(i.operands.offset)) * cie.data_alignment_factor }; }, inline .restore, .restore_extended, @@ -526,7 +526,7 @@ pub const VirtualMachine = struct { try self.resolveCopyOnWrite(allocator); self.current_row.cfa = .{ .register = i.operands.register, - .rule = .{ .val_offset = @intCast(i64, i.operands.offset) }, + .rule = .{ .val_offset = @intCast(i.operands.offset) }, }; }, .def_cfa_sf => |i| { @@ -544,7 +544,7 @@ pub const VirtualMachine = struct { .def_cfa_offset => |i| { try self.resolveCopyOnWrite(allocator); if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ .val_offset = @intCast(i64, i.operands.offset) }; + self.current_row.cfa.rule = .{ .val_offset = @intCast(i.operands.offset) }; }, .def_cfa_offset_sf => |i| { try self.resolveCopyOnWrite(allocator); diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 6d94138d68fe..f2b8bfc88136 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -62,13 +62,13 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { const int_info = @typeInfo(@TypeOf(value)).Int; if (@sizeOf(@TypeOf(value)) > options.addr_size) { return .{ .generic = switch (int_info.signedness) { - .signed => @bitCast(addr_type, @truncate(addr_type_signed, value)), - .unsigned => @truncate(addr_type, value), + .signed => @bitCast(@as(addr_type_signed, @truncate(value))), + .unsigned => @truncate(value), } }; } else { return .{ .generic = switch (int_info.signedness) { - .signed => @bitCast(addr_type, @intCast(addr_type_signed, value)), - .unsigned => @intCast(addr_type, value), + .signed => @bitCast(@as(addr_type_signed, @intCast(value))), + .unsigned => @intCast(value), } }; } } diff --git a/src/crash_report.zig b/src/crash_report.zig index 5cd00c5b1312..83c5af7ba077 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -207,7 +207,7 @@ fn handleSegfaultPosix(sig: i32, info: *const os.siginfo_t, ctx_ptr: ?*const any .x86_64, .arm, .aarch64, - => StackContext{ .exception = @ptrCast(*const os.ucontext_t, @alignCast(@alignOf(os.ucontext_t), ctx_ptr)) }, + => StackContext{ .exception = @ptrCast(@alignCast(ctx_ptr)) }, else => .not_supported, }; From 41832aa1e651a4a19f3ab275a54c28ed27180cd5 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 26 Jun 2023 00:57:28 -0400 Subject: [PATCH 29/81] linux: add getcontext for x86_64 --- lib/std/os/linux.zig | 3 +- lib/std/os/linux/x86_64.zig | 68 +++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 6362e9ece1a4..4f5cffc75303 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -86,6 +86,7 @@ pub const timeval = arch_bits.timeval; pub const timezone = arch_bits.timezone; pub const ucontext_t = arch_bits.ucontext_t; pub const user_desc = arch_bits.user_desc; +pub const getcontext = arch_bits.getcontext; pub const tls = @import("linux/tls.zig"); pub const pie = @import("linux/start_pie.zig"); @@ -4694,7 +4695,7 @@ else /// processes. RTPRIO, - /// Maximum CPU time in µs that a process scheduled under a real-time + /// Maximum CPU time in µs that a process scheduled under a real-time /// scheduling policy may consume without making a blocking system /// call before being forcibly descheduled. RTTIME, diff --git a/lib/std/os/linux/x86_64.zig b/lib/std/os/linux/x86_64.zig index 41c9c9ea46ca..0beb70e69126 100644 --- a/lib/std/os/linux/x86_64.zig +++ b/lib/std/os/linux/x86_64.zig @@ -395,3 +395,71 @@ pub const ucontext_t = extern struct { sigmask: sigset_t, fpregs_mem: [64]usize, }; + +fn gpRegisterOffset(comptime reg_index: comptime_int) usize { + return @offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "gregs") + @sizeOf(usize) * reg_index; +} + +pub inline fn getcontext(context: *ucontext_t) usize { + asm volatile ( + \\ movq %%r8, (%[r8_offset])(%[context]) + \\ movq %%r9, (%[r9_offset])(%[context]) + \\ movq %%r10, (%[r10_offset])(%[context]) + \\ movq %%r11, (%[r11_offset])(%[context]) + \\ movq %%r12, (%[r12_offset])(%[context]) + \\ movq %%r13, (%[r13_offset])(%[context]) + \\ movq %%r14, (%[r14_offset])(%[context]) + \\ movq %%r15, (%[r15_offset])(%[context]) + \\ movq %%rdi, (%[rdi_offset])(%[context]) + \\ movq %%rsi, (%[rsi_offset])(%[context]) + \\ movq %%rbp, (%[rbp_offset])(%[context]) + \\ movq %%rbx, (%[rbx_offset])(%[context]) + \\ movq %%rdx, (%[rdx_offset])(%[context]) + \\ movq %%rax, (%[rax_offset])(%[context]) + \\ movq %%rcx, (%[rcx_offset])(%[context]) + \\ movq %%rsp, (%[rsp_offset])(%[context]) + \\ leaq (%%rip), %%rcx + \\ movq %%rcx, (%[rip_offset])(%[context]) + \\ pushfq + \\ popq (%[efl_offset])(%[context]) + \\ leaq (%[fpmem_offset])(%[context]), %%rcx + \\ movq %%rcx, (%[fpstate_offset])(%[context]) + \\ fnstenv (%%rcx) + \\ stmxcsr (%[mxcsr_offset])(%[context]) + : + : [context] "{rdi}" (context), + [r8_offset] "p" (comptime gpRegisterOffset(REG.R8)), + [r9_offset] "p" (comptime gpRegisterOffset(REG.R9)), + [r10_offset] "p" (comptime gpRegisterOffset(REG.R10)), + [r11_offset] "p" (comptime gpRegisterOffset(REG.R11)), + [r12_offset] "p" (comptime gpRegisterOffset(REG.R12)), + [r13_offset] "p" (comptime gpRegisterOffset(REG.R13)), + [r14_offset] "p" (comptime gpRegisterOffset(REG.R14)), + [r15_offset] "p" (comptime gpRegisterOffset(REG.R15)), + [rdi_offset] "p" (comptime gpRegisterOffset(REG.RDI)), + [rsi_offset] "p" (comptime gpRegisterOffset(REG.RSI)), + [rbp_offset] "p" (comptime gpRegisterOffset(REG.RBP)), + [rbx_offset] "p" (comptime gpRegisterOffset(REG.RBX)), + [rdx_offset] "p" (comptime gpRegisterOffset(REG.RDX)), + [rax_offset] "p" (comptime gpRegisterOffset(REG.RAX)), + [rcx_offset] "p" (comptime gpRegisterOffset(REG.RCX)), + [rsp_offset] "p" (comptime gpRegisterOffset(REG.RSP)), + [rip_offset] "p" (comptime gpRegisterOffset(REG.RIP)), + [efl_offset] "p" (comptime gpRegisterOffset(REG.EFL)), + [fpstate_offset] "p" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")), + [fpmem_offset] "p" (@offsetOf(ucontext_t, "fpregs_mem")), + [mxcsr_offset] "p" (@offsetOf(ucontext_t, "fpregs_mem") + @offsetOf(fpstate, "mxcsr")), + : "memory", "rcx" + ); + + // TODO: Read GS/FS registers? + + // TODO: `flags` isn't present in the getcontext man page, figure out what to write here + context.flags = 0; + context.link = null; + + const altstack_result = linux.sigaltstack(null, &context.stack); + if (altstack_result != 0) return altstack_result; + + return linux.sigprocmask(0, null, &context.sigmask); +} From a9b6f2d92984e1b4d4a9fe2b0ba0b14ec8b812c5 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 26 Jun 2023 00:58:42 -0400 Subject: [PATCH 30/81] dwarf: add support for .debug_frame and CIE version 4 --- lib/std/dwarf.zig | 126 ++++++++++++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 48 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index ce8c59c6e9d9..f15287619189 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -7,6 +7,7 @@ const os = std.os; const mem = std.mem; const math = std.math; const leb = @import("leb128.zig"); +const assert = std.debug.assert; pub const TAG = @import("dwarf/TAG.zig"); pub const AT = @import("dwarf/AT.zig"); @@ -1530,45 +1531,49 @@ pub const DwarfInfo = struct { return; } - if (di.section(.eh_frame)) |eh_frame| { - var stream = io.fixedBufferStream(eh_frame); - while (stream.pos < stream.buffer.len) { - const entry_header = try EntryHeader.read(&stream, di.endian); - switch (entry_header.type) { - .cie => { - const cie = try CommonInformationEntry.parse( - entry_header.entry_bytes, - -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), - true, - entry_header.length_offset, - @sizeOf(usize), - di.endian, - ); - try di.cie_map.put(allocator, entry_header.length_offset, cie); - }, - .fde => |cie_offset| { - const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); - const fde = try FrameDescriptionEntry.parse( - entry_header.entry_bytes, - -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), - true, - cie, - @sizeOf(usize), - di.endian, - ); - try di.fde_list.append(allocator, fde); - }, - .terminator => break, + const frame_sections = [2]DwarfSection{ .eh_frame, .debug_frame }; + for (frame_sections) |frame_section| { + if (di.section(frame_section)) |eh_frame| { + var stream = io.fixedBufferStream(eh_frame); + while (stream.pos < stream.buffer.len) { + const entry_header = try EntryHeader.read(&stream, frame_section, di.endian); + switch (entry_header.type) { + .cie => { + const cie = try CommonInformationEntry.parse( + entry_header.entry_bytes, + -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), + true, + frame_section, + entry_header.length_offset, + @sizeOf(usize), + di.endian, + ); + try di.cie_map.put(allocator, entry_header.length_offset, cie); + }, + .fde => |cie_offset| { + const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); + const fde = try FrameDescriptionEntry.parse( + entry_header.entry_bytes, + -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), + true, + cie, + @sizeOf(usize), + di.endian, + ); + try di.fde_list.append(allocator, fde); + }, + .terminator => break, + } } - } - // TODO: Avoiding sorting if has_eh_frame_hdr exists - std.mem.sort(FrameDescriptionEntry, di.fde_list.items, {}, struct { - fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { - _ = ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); + // TODO: Avoiding sorting if has_eh_frame_hdr exists + std.mem.sort(FrameDescriptionEntry, di.fde_list.items, {}, struct { + fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { + _ = ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + } } } @@ -1905,14 +1910,14 @@ pub const ExceptionFrameHeader = struct { var eh_frame_stream = io.fixedBufferStream(eh_frame); try eh_frame_stream.seekTo(fde_offset); - const fde_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); + const fde_entry_header = try EntryHeader.read(&eh_frame_stream, .eh_frame, builtin.cpu.arch.endian()); if (!self.isValidPtr(@intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf(); if (fde_entry_header.type != .fde) return badDwarf(); // CIEs always come before FDEs (the offset is a subtration), so we can assume this memory is readable const cie_offset = fde_entry_header.type.fde; try eh_frame_stream.seekTo(cie_offset); - const cie_entry_header = try EntryHeader.read(&eh_frame_stream, builtin.cpu.arch.endian()); + const cie_entry_header = try EntryHeader.read(&eh_frame_stream, .eh_frame, builtin.cpu.arch.endian()); if (!self.isValidPtr(@intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf(); if (cie_entry_header.type != .cie) return badDwarf(); @@ -1920,6 +1925,7 @@ pub const ExceptionFrameHeader = struct { cie_entry_header.entry_bytes, 0, true, + .eh_frame, cie_entry_header.length_offset, @sizeOf(usize), builtin.cpu.arch.endian(), @@ -1937,7 +1943,7 @@ pub const ExceptionFrameHeader = struct { }; pub const EntryHeader = struct { - /// Offset of the length in the backing buffer + /// Offset of the length field in the backing buffer length_offset: usize, is_64: bool, type: union(enum) { @@ -1950,8 +1956,10 @@ pub const EntryHeader = struct { entry_bytes: []const u8, /// Reads a header for either an FDE or a CIE, then advances the stream to the position after the trailing structure. - /// `stream` must be a stream backed by the .eh_frame section. - pub fn read(stream: *std.io.FixedBufferStream([]const u8), endian: std.builtin.Endian) !EntryHeader { + /// `stream` must be a stream backed by either the .eh_frame or .debug_frame sections. + pub fn read(stream: *std.io.FixedBufferStream([]const u8), dwarf_section: DwarfSection, endian: std.builtin.Endian) !EntryHeader { + assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); + const reader = stream.reader(); const length_offset = stream.pos; @@ -1967,14 +1975,21 @@ pub const EntryHeader = struct { const id_len = @as(u8, if (is_64) 8 else 4); const id = if (is_64) try reader.readInt(u64, endian) else try reader.readInt(u32, endian); const entry_bytes = stream.buffer[stream.pos..][0 .. length - id_len]; + const cie_id: u64 = switch (dwarf_section) { + .eh_frame => CommonInformationEntry.eh_id, + .debug_frame => if (is_64) CommonInformationEntry.dwarf64_id else CommonInformationEntry.dwarf32_id, + else => unreachable, + }; const result = EntryHeader{ .length_offset = length_offset, .is_64 = is_64, - .type = switch (id) { - 0 => .{ .cie = {} }, - // TODO: Support CommonInformationEntry.dwarf32_id, CommonInformationEntry.dwarf64_id - else => .{ .fde = stream.pos - id_len - id }, + .type = if (id == cie_id) .{ .cie = {} } else .{ + .fde = switch (dwarf_section) { + .eh_frame => stream.pos - id_len - id, + .debug_frame => id, + else => unreachable, + }, }, .entry_bytes = entry_bytes, }; @@ -2004,6 +2019,11 @@ pub const CommonInformationEntry = struct { length_offset: u64, version: u8, + address_size: u8, + + // Only present in version 4 + segment_selector_size: ?u8, + code_alignment_factor: u32, data_alignment_factor: i32, return_address_register: u8, @@ -2038,11 +2058,12 @@ pub const CommonInformationEntry = struct { /// of `pc_rel_offset` and `is_runtime`. /// /// `length_offset` specifies the offset of this CIE's length field in the - /// .eh_frame section. + /// .eh_frame / .debug_framesection. pub fn parse( cie_bytes: []const u8, pc_rel_offset: i64, is_runtime: bool, + dwarf_section: DwarfSection, length_offset: u64, addr_size_bytes: u8, endian: std.builtin.Endian, @@ -2053,7 +2074,11 @@ pub const CommonInformationEntry = struct { const reader = stream.reader(); const version = try reader.readByte(); - if (version != 1 and version != 3) return error.UnsupportedDwarfVersion; + switch (dwarf_section) { + .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, + .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, + else => return error.UnsupportedDwarfSection, + } var has_eh_data = false; var has_aug_data = false; @@ -2083,6 +2108,9 @@ pub const CommonInformationEntry = struct { for (0..addr_size_bytes) |_| _ = try reader.readByte(); } + const address_size = if (version == 4) try reader.readByte() else addr_size_bytes; + const segment_selector_size = if (version == 4) try reader.readByte() else null; + const code_alignment_factor = try leb.readULEB128(u32, reader); const data_alignment_factor = try leb.readILEB128(i32, reader); const return_address_register = if (version == 1) try reader.readByte() else try leb.readULEB128(u8, reader); @@ -2134,6 +2162,8 @@ pub const CommonInformationEntry = struct { return .{ .length_offset = length_offset, .version = version, + .address_size = address_size, + .segment_selector_size = segment_selector_size, .code_alignment_factor = code_alignment_factor, .data_alignment_factor = data_alignment_factor, .return_address_register = return_address_register, From 5cd8ab2473a4255f081f417eb3ec95e1a3a9e9d8 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 26 Jun 2023 00:59:28 -0400 Subject: [PATCH 31/81] debug: enhance writeCurrentStackTrace to use context-based unwinding when available --- lib/std/debug.zig | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 1ae26da2470d..5428d7a33b0e 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -137,7 +137,7 @@ pub const StackTraceContext = blk: { if (native_os == .windows) { break :blk @typeInfo(@TypeOf(os.windows.CONTEXT.getRegs)).Fn.return_type.?; } else if (@hasDecl(os.system, "ucontext_t")) { - break :blk *const os.ucontext_t; + break :blk os.ucontext_t; } else { break :blk void; } @@ -146,7 +146,7 @@ pub const StackTraceContext = blk: { /// Tries to print the stack trace starting from the supplied base pointer to stderr, /// unbuffered, and ignores any error returned. /// TODO multithreaded awareness -pub fn dumpStackTraceFromBase(context: StackTraceContext) void { +pub fn dumpStackTraceFromBase(context: *const StackTraceContext) void { nosuspend { if (comptime builtin.target.isWasm()) { if (native_os == .wasi) { @@ -413,6 +413,14 @@ pub fn writeStackTrace( } } +inline fn getContext(context: *StackTraceContext) bool { + if (native_os == .windows) { + @compileError("Syscall please!"); + } + + return @hasDecl(os.system, "getcontext") and os.system.getcontext(context) == 0; +} + pub const StackIterator = struct { // Skip every frame before this address is found. first_address: ?usize, @@ -423,7 +431,7 @@ pub const StackIterator = struct { // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). debug_info: ?*DebugInfo, dwarf_context: if (supports_context) DW.UnwindContext else void = undefined, - const supports_context = @hasDecl(os.system, "ucontext_t") and + pub const supports_context = @hasDecl(os.system, "ucontext_t") and (builtin.os.tag != .linux or switch (builtin.cpu.arch) { .mips, .mipsel, .mips64, .mips64el, .riscv64 => false, else => true, @@ -607,8 +615,10 @@ pub fn writeCurrentStackTrace( return writeCurrentStackTraceWindows(out_stream, debug_info, tty_config, start_addr); } - // TODO: Capture a context and use initWithContext - var it = StackIterator.init(start_addr, null); + var context: StackTraceContext = undefined; + var it = (if (getContext(&context)) blk: { + break :blk StackIterator.initWithContext(start_addr, debug_info, &context) catch null; + } else null) orelse StackIterator.init(start_addr, null); defer it.deinit(); while (it.next()) |return_address| { @@ -2163,7 +2173,7 @@ fn dumpSegfaultInfoWindows(info: *windows.EXCEPTION_POINTERS, msg: u8, label: ?[ else => unreachable, } catch os.abort(); - dumpStackTraceFromBase(regs); + dumpStackTraceFromBase(®s); } pub fn dumpStackPointerAddr(prefix: []const u8) void { From 89ef004646896a145ec0607678882a395fabda3d Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 27 Jun 2023 02:02:49 -0400 Subject: [PATCH 32/81] debug: x86 unwinding support, more unwinding fixes - Fix unwindFrame using the previous FDE row instead of the current one - Handle unwinding through noreturn functions - Add x86-linux getcontext - Fixup x86_64-linux getcontext not restoring the fp env - Fix start_addr filtering on x86-windows --- lib/std/debug.zig | 67 ++++++++++++++++++++++++------------ lib/std/dwarf.zig | 19 +++++++--- lib/std/dwarf/call_frame.zig | 4 ++- lib/std/os/linux/x86.zig | 54 +++++++++++++++++++++++++++++ lib/std/os/linux/x86_64.zig | 1 + src/crash_report.zig | 11 +++--- 6 files changed, 123 insertions(+), 33 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 5428d7a33b0e..2c3f93b55ea2 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -135,7 +135,7 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { pub const StackTraceContext = blk: { if (native_os == .windows) { - break :blk @typeInfo(@TypeOf(os.windows.CONTEXT.getRegs)).Fn.return_type.?; + break :blk std.os.windows.CONTEXT; } else if (@hasDecl(os.system, "ucontext_t")) { break :blk os.ucontext_t; } else { @@ -166,7 +166,14 @@ pub fn dumpStackTraceFromBase(context: *const StackTraceContext) void { }; const tty_config = io.tty.detectConfig(io.getStdErr()); if (native_os == .windows) { - writeCurrentStackTraceWindows(stderr, debug_info, tty_config, context.ip) catch return; + // On x86_64 and aarch64, the stack will be unwound using RtlVirtualUnwind using the context + // provided by the exception handler. On x86, RtlVirtualUnwind doesn't exist. Instead, a new backtrace + // will be captured and frames prior to the exception will be filtered. + // The caveat is that RtlCaptureStackBackTrace does not include the KiUserExceptionDispatcher frame, + // which is where the IP in `context` points to, so it can't be used as start_addr. + // Instead, start_addr is recovered from the stack. + const start_addr = if (builtin.cpu.arch == .x86) @as(*const usize, @ptrFromInt(context.getRegs().bp + 4)).* else null; + writeStackTraceWindows(stderr, debug_info, tty_config, context, start_addr) catch return; return; } @@ -196,12 +203,12 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT if (native_os == .windows) { const addrs = stack_trace.instruction_addresses; const first_addr = first_address orelse { - stack_trace.index = walkStackWindows(addrs[0..]); + stack_trace.index = walkStackWindows(addrs[0..], null); return; }; var addr_buf_stack: [32]usize = undefined; const addr_buf = if (addr_buf_stack.len > addrs.len) addr_buf_stack[0..] else addrs; - const n = walkStackWindows(addr_buf[0..]); + const n = walkStackWindows(addr_buf[0..], null); const first_index = for (addr_buf[0..n], 0..) |addr, i| { if (addr == first_addr) { break i; @@ -218,7 +225,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT } stack_trace.index = slice.len; } else { - // TODO: This should use the dwarf unwinder if it's available + // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required) var it = StackIterator.init(first_address, null); defer it.deinit(); for (stack_trace.instruction_addresses, 0..) |*addr, i| { @@ -415,10 +422,18 @@ pub fn writeStackTrace( inline fn getContext(context: *StackTraceContext) bool { if (native_os == .windows) { - @compileError("Syscall please!"); + context.* = std.mem.zeroes(windows.CONTEXT); + windows.ntdll.RtlCaptureContext(context); + return true; } - return @hasDecl(os.system, "getcontext") and os.system.getcontext(context) == 0; + const supports_getcontext = @hasDecl(os.system, "getcontext") and + (builtin.os.tag != .linux or switch (builtin.cpu.arch) { + .x86, .x86_64 => true, + else => false, + }); + + return supports_getcontext and os.system.getcontext(context) == 0; } pub const StackIterator = struct { @@ -431,6 +446,7 @@ pub const StackIterator = struct { // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). debug_info: ?*DebugInfo, dwarf_context: if (supports_context) DW.UnwindContext else void = undefined, + pub const supports_context = @hasDecl(os.system, "ucontext_t") and (builtin.os.tag != .linux or switch (builtin.cpu.arch) { .mips, .mipsel, .mips64, .mips64el, .riscv64 => false, @@ -548,19 +564,20 @@ pub const StackIterator = struct { } } - fn next_dwarf(self: *StackIterator) !void { + fn next_dwarf(self: *StackIterator) !usize { const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); if (try module.getDwarfInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc)) |di| { self.dwarf_context.reg_ctx.eh_frame = true; self.dwarf_context.reg_ctx.is_macho = di.is_macho; - try di.unwindFrame(self.debug_info.?.allocator, &self.dwarf_context, module.base_address); + return di.unwindFrame(self.debug_info.?.allocator, &self.dwarf_context, module.base_address); } else return error.MissingDebugInfo; } fn next_internal(self: *StackIterator) ?usize { if (supports_context and self.debug_info != null) { - if (self.next_dwarf()) |_| { - return self.dwarf_context.pc; + if (self.dwarf_context.pc == 0) return null; + if (self.next_dwarf()) |return_address| { + return return_address; } else |err| { if (err != error.MissingFDE) print("DWARF unwind error: {}\n", .{err}); @@ -611,12 +628,13 @@ pub fn writeCurrentStackTrace( tty_config: io.tty.Config, start_addr: ?usize, ) !void { + var context: StackTraceContext = undefined; + const has_context = getContext(&context); if (native_os == .windows) { - return writeCurrentStackTraceWindows(out_stream, debug_info, tty_config, start_addr); + return writeStackTraceWindows(out_stream, debug_info, tty_config, &context, start_addr); } - var context: StackTraceContext = undefined; - var it = (if (getContext(&context)) blk: { + var it = (if (has_context) blk: { break :blk StackIterator.initWithContext(start_addr, debug_info, &context) catch null; } else null) orelse StackIterator.init(start_addr, null); defer it.deinit(); @@ -632,7 +650,7 @@ pub fn writeCurrentStackTrace( } } -pub noinline fn walkStackWindows(addresses: []usize) usize { +pub noinline fn walkStackWindows(addresses: []usize, existing_context: ?*const windows.CONTEXT) usize { if (builtin.cpu.arch == .x86) { // RtlVirtualUnwind doesn't exist on x86 return windows.ntdll.RtlCaptureStackBackTrace(0, addresses.len, @as(**anyopaque, @ptrCast(addresses.ptr)), null); @@ -640,8 +658,13 @@ pub noinline fn walkStackWindows(addresses: []usize) usize { const tib = @as(*const windows.NT_TIB, @ptrCast(&windows.teb().Reserved1)); - var context: windows.CONTEXT = std.mem.zeroes(windows.CONTEXT); - windows.ntdll.RtlCaptureContext(&context); + var context: windows.CONTEXT = undefined; + if (existing_context) |context_ptr| { + context = context_ptr.*; + } else { + context = std.mem.zeroes(windows.CONTEXT); + windows.ntdll.RtlCaptureContext(&context); + } var i: usize = 0; var image_base: usize = undefined; @@ -683,14 +706,15 @@ pub noinline fn walkStackWindows(addresses: []usize) usize { return i; } -pub fn writeCurrentStackTraceWindows( +pub fn writeStackTraceWindows( out_stream: anytype, debug_info: *DebugInfo, tty_config: io.tty.Config, + context: *const windows.CONTEXT, start_addr: ?usize, ) !void { var addr_buf: [1024]usize = undefined; - const n = walkStackWindows(addr_buf[0..]); + const n = walkStackWindows(addr_buf[0..], context); const addrs = addr_buf[0..n]; var start_i: usize = if (start_addr) |saddr| blk: { for (addrs, 0..) |addr, i| { @@ -2164,16 +2188,15 @@ fn handleSegfaultWindowsExtra( } fn dumpSegfaultInfoWindows(info: *windows.EXCEPTION_POINTERS, msg: u8, label: ?[]const u8) void { - const regs = info.ContextRecord.getRegs(); const stderr = io.getStdErr().writer(); _ = switch (msg) { 0 => stderr.print("{s}\n", .{label.?}), 1 => stderr.print("Segmentation fault at address 0x{x}\n", .{info.ExceptionRecord.ExceptionInformation[1]}), - 2 => stderr.print("Illegal instruction at address 0x{x}\n", .{regs.ip}), + 2 => stderr.print("Illegal instruction at address 0x{x}\n", .{info.ContextRecord.getRegs().ip}), else => unreachable, } catch os.abort(); - dumpStackTraceFromBase(®s); + dumpStackTraceFromBase(info.ContextRecord); } pub fn dumpStackPointerAddr(prefix: []const u8) void { diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index f15287619189..06714aad2fb7 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1577,9 +1577,9 @@ pub const DwarfInfo = struct { } } - pub fn unwindFrame(di: *const DwarfInfo, allocator: mem.Allocator, context: *UnwindContext, module_base_address: usize) !void { + pub fn unwindFrame(di: *const DwarfInfo, allocator: mem.Allocator, context: *UnwindContext, module_base_address: usize) !usize { if (!comptime abi.isSupportedArch(builtin.target.cpu.arch)) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return; + if (context.pc == 0) return 0; // TODO: Handle unwinding from a signal frame (ie. use_prev_instr in libunwind) @@ -1626,8 +1626,11 @@ pub const DwarfInfo = struct { } context.vm.reset(); + context.reg_ctx.eh_frame = cie.version != 4; + + _ = try context.vm.runToNative(allocator, mapped_pc, cie, fde); + const row = &context.vm.current_row; - const row = try context.vm.runToNative(allocator, mapped_pc, cie, fde); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; @@ -1650,7 +1653,7 @@ pub const DwarfInfo = struct { var next_ucontext = context.ucontext; var has_next_ip = false; - for (context.vm.rowColumns(row)) |column| { + for (context.vm.rowColumns(row.*)) |column| { if (column.register) |register| { const dest = try abi.regBytes(&next_ucontext, register, context.reg_ctx); if (register == cie.return_address_register) { @@ -1670,6 +1673,14 @@ pub const DwarfInfo = struct { } mem.writeIntSliceNative(usize, try abi.regBytes(&context.ucontext, abi.spRegNum(context.reg_ctx), context.reg_ctx), context.cfa.?); + + // The call instruction will have pushed the address of the instruction that follows the call as the return address + // However, this return address may be past the end of the function if the caller was `noreturn`. + // TODO: Check this on non-x86_64 + const return_address = context.pc; + if (context.pc > 0) context.pc -= 1; + + return return_address; } }; diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 460d157bf9ef..f512d7a909bc 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -386,7 +386,9 @@ pub const VirtualMachine = struct { } /// Runs the CIE instructions, then the FDE instructions. Execution halts - /// once the row that corresponds to `pc` is known, and it is returned. + /// once the row that corresponds to `pc` is known (and set as `current_row`). + /// + /// The state of the row prior to the last execution step is returned. pub fn runTo( self: *VirtualMachine, allocator: std.mem.Allocator, diff --git a/lib/std/os/linux/x86.zig b/lib/std/os/linux/x86.zig index 05c012c77cdb..81a3bac92b86 100644 --- a/lib/std/os/linux/x86.zig +++ b/lib/std/os/linux/x86.zig @@ -389,3 +389,57 @@ pub const SC = struct { pub const recvmmsg = 19; pub const sendmmsg = 20; }; + +fn gpRegisterOffset(comptime reg_index: comptime_int) usize { + return @offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "gregs") + @sizeOf(usize) * reg_index; +} + +pub inline fn getcontext(context: *ucontext_t) usize { + asm volatile ( + \\ movl %%edi, (%[edi_offset])(%[context]) + \\ movl %%esi, (%[esi_offset])(%[context]) + \\ movl %%ebp, (%[ebp_offset])(%[context]) + \\ movl %%esp, (%[esp_offset])(%[context]) + \\ movl %%ebx, (%[ebx_offset])(%[context]) + \\ movl %%edx, (%[edx_offset])(%[context]) + \\ movl %%ecx, (%[ecx_offset])(%[context]) + \\ movl %%eax, (%[eax_offset])(%[context]) + \\ xorl %%ecx, %%ecx + \\ movw %%fs, %%cx + \\ movl %%ecx, (%[fs_offset])(%[context]) + \\ leal (%[regspace_offset])(%[context]), %%ecx + \\ movl %%ecx, (%[fpregs_offset])(%[context]) + \\ fnstenv (%%ecx) + \\ fldenv (%%ecx) + \\ call getcontext_read_eip + \\ getcontext_read_eip: pop %%ecx + \\ movl %%ecx, (%[eip_offset])(%[context]) + : + : [context] "{edi}" (context), + [edi_offset] "p" (comptime gpRegisterOffset(REG.EDI)), + [esi_offset] "p" (comptime gpRegisterOffset(REG.ESI)), + [ebp_offset] "p" (comptime gpRegisterOffset(REG.EBP)), + [esp_offset] "p" (comptime gpRegisterOffset(REG.ESP)), + [ebx_offset] "p" (comptime gpRegisterOffset(REG.EBX)), + [edx_offset] "p" (comptime gpRegisterOffset(REG.EDX)), + [ecx_offset] "p" (comptime gpRegisterOffset(REG.ECX)), + [eax_offset] "p" (comptime gpRegisterOffset(REG.EAX)), + [eip_offset] "p" (comptime gpRegisterOffset(REG.EIP)), + [fs_offset] "p" (comptime gpRegisterOffset(REG.FS)), + [fpregs_offset] "p" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")), + [regspace_offset] "p" (@offsetOf(ucontext_t, "regspace")), + : "memory", "ecx" + ); + + // TODO: Read CS/SS registers? + // TODO: Store mxcsr state, need an actual definition of fpstate for that + + // TODO: `flags` isn't present in the getcontext man page, figure out what to write here + context.flags = 0; + context.link = null; + + const altstack_result = linux.sigaltstack(null, &context.stack); + if (altstack_result != 0) return altstack_result; + + return linux.sigprocmask(0, null, &context.sigmask); +} diff --git a/lib/std/os/linux/x86_64.zig b/lib/std/os/linux/x86_64.zig index 0beb70e69126..786a95c7a596 100644 --- a/lib/std/os/linux/x86_64.zig +++ b/lib/std/os/linux/x86_64.zig @@ -425,6 +425,7 @@ pub inline fn getcontext(context: *ucontext_t) usize { \\ leaq (%[fpmem_offset])(%[context]), %%rcx \\ movq %%rcx, (%[fpstate_offset])(%[context]) \\ fnstenv (%%rcx) + \\ fldenv (%%rcx) \\ stmxcsr (%[mxcsr_offset])(%[context]) : : [context] "{rdi}" (context), diff --git a/src/crash_report.zig b/src/crash_report.zig index 83c5af7ba077..f09fce14f9a2 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -233,10 +233,9 @@ fn handleSegfaultWindows(info: *os.windows.EXCEPTION_POINTERS) callconv(os.windo fn handleSegfaultWindowsExtra(info: *os.windows.EXCEPTION_POINTERS, comptime msg: WindowsSegfaultMessage) noreturn { PanicSwitch.preDispatch(); - const stack_ctx = if (@hasDecl(os.windows, "CONTEXT")) ctx: { - const regs = info.ContextRecord.getRegs(); - break :ctx StackContext{ .exception = regs }; - } else ctx: { + const stack_ctx = if (@hasDecl(os.windows, "CONTEXT")) + StackContext{ .exception = info.ContextRecord } + else ctx: { const addr = @intFromPtr(info.ExceptionRecord.ExceptionAddress); break :ctx StackContext{ .current = .{ .ret_addr = addr } }; }; @@ -251,7 +250,7 @@ fn handleSegfaultWindowsExtra(info: *os.windows.EXCEPTION_POINTERS, comptime msg }, .illegal_instruction => { const ip: ?usize = switch (stack_ctx) { - .exception => |ex| ex.ip, + .exception => |ex| ex.getRegs().ip, .current => |cur| cur.ret_addr, .not_supported => null, }; @@ -272,7 +271,7 @@ const StackContext = union(enum) { current: struct { ret_addr: ?usize, }, - exception: debug.StackTraceContext, + exception: *const debug.StackTraceContext, not_supported: void, pub fn dumpStackTrace(ctx: @This()) void { From caa334712fc8f540349e54fa3008aa3fdef64e13 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 27 Jun 2023 23:53:05 -0400 Subject: [PATCH 33/81] linux: rework getcontext to closer match the specification (saved IP/SP match the state after it would return) debug: fixup ucontext_t check --- lib/std/debug.zig | 2 +- lib/std/os/linux/x86.zig | 85 ++++++++++++++++++++++----------- lib/std/os/linux/x86_64.zig | 95 +++++++++++++++++++++++-------------- 3 files changed, 118 insertions(+), 64 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 2c3f93b55ea2..7b74be24af68 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -136,7 +136,7 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { pub const StackTraceContext = blk: { if (native_os == .windows) { break :blk std.os.windows.CONTEXT; - } else if (@hasDecl(os.system, "ucontext_t")) { + } else if (StackIterator.supports_context) { break :blk os.ucontext_t; } else { break :blk void; diff --git a/lib/std/os/linux/x86.zig b/lib/std/os/linux/x86.zig index 81a3bac92b86..e5d75c1831eb 100644 --- a/lib/std/os/linux/x86.zig +++ b/lib/std/os/linux/x86.zig @@ -394,28 +394,51 @@ fn gpRegisterOffset(comptime reg_index: comptime_int) usize { return @offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "gregs") + @sizeOf(usize) * reg_index; } -pub inline fn getcontext(context: *ucontext_t) usize { +noinline fn getContextReturnAddress() usize { + return @returnAddress(); +} + +pub fn getContextInternal() callconv(.Naked) void { asm volatile ( - \\ movl %%edi, (%[edi_offset])(%[context]) - \\ movl %%esi, (%[esi_offset])(%[context]) - \\ movl %%ebp, (%[ebp_offset])(%[context]) - \\ movl %%esp, (%[esp_offset])(%[context]) - \\ movl %%ebx, (%[ebx_offset])(%[context]) - \\ movl %%edx, (%[edx_offset])(%[context]) - \\ movl %%ecx, (%[ecx_offset])(%[context]) - \\ movl %%eax, (%[eax_offset])(%[context]) + \\ movl $0, (%[flags_offset])(%%edx) + \\ movl $0, (%[link_offset])(%%edx) + \\ movl %%edi, (%[edi_offset])(%%edx) + \\ movl %%esi, (%[esi_offset])(%%edx) + \\ movl %%ebp, (%[ebp_offset])(%%edx) + \\ movl %%ebx, (%[ebx_offset])(%%edx) + \\ movl %%edx, (%[edx_offset])(%%edx) + \\ movl %%ecx, (%[ecx_offset])(%%edx) + \\ movl %%eax, (%[eax_offset])(%%edx) + \\ movl (%%esp), %%ecx + \\ movl %%ecx, (%[eip_offset])(%%edx) + \\ leal 4(%%esp), %%ecx + \\ movl %%ecx, (%[esp_offset])(%%edx) \\ xorl %%ecx, %%ecx \\ movw %%fs, %%cx - \\ movl %%ecx, (%[fs_offset])(%[context]) - \\ leal (%[regspace_offset])(%[context]), %%ecx - \\ movl %%ecx, (%[fpregs_offset])(%[context]) + \\ movl %%ecx, (%[fs_offset])(%%edx) + \\ leal (%[regspace_offset])(%%edx), %%ecx + \\ movl %%ecx, (%[fpregs_offset])(%%edx) \\ fnstenv (%%ecx) \\ fldenv (%%ecx) - \\ call getcontext_read_eip - \\ getcontext_read_eip: pop %%ecx - \\ movl %%ecx, (%[eip_offset])(%[context]) + \\ pushl %%ebx + \\ pushl %%esi + \\ xorl %%ebx, %%ebx + \\ movl %[sigaltstack], %%eax + \\ leal (%[stack_offset])(%%edx), %%ecx + \\ int $0x80 + \\ cmpl $0, %%eax + \\ jne return + \\ movl %[sigprocmask], %%eax + \\ xorl %%ecx, %%ecx + \\ leal (%[sigmask_offset])(%%edx), %%edx + \\ movl %[sigset_size], %%esi + \\ int $0x80 + \\ return: + \\ popl %%esi + \\ popl %%ebx : - : [context] "{edi}" (context), + : [flags_offset] "p" (@offsetOf(ucontext_t, "flags")), + [link_offset] "p" (@offsetOf(ucontext_t, "link")), [edi_offset] "p" (comptime gpRegisterOffset(REG.EDI)), [esi_offset] "p" (comptime gpRegisterOffset(REG.ESI)), [ebp_offset] "p" (comptime gpRegisterOffset(REG.EBP)), @@ -428,18 +451,24 @@ pub inline fn getcontext(context: *ucontext_t) usize { [fs_offset] "p" (comptime gpRegisterOffset(REG.FS)), [fpregs_offset] "p" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")), [regspace_offset] "p" (@offsetOf(ucontext_t, "regspace")), - : "memory", "ecx" + [sigaltstack] "i" (@intFromEnum(linux.SYS.sigaltstack)), + [stack_offset] "p" (@offsetOf(ucontext_t, "stack")), + [sigprocmask] "i" (@intFromEnum(linux.SYS.rt_sigprocmask)), + [sigmask_offset] "p" (@offsetOf(ucontext_t, "sigmask")), + [sigset_size] "i" (linux.NSIG / 8), + : "memory", "eax", "ecx", "edx" ); +} - // TODO: Read CS/SS registers? - // TODO: Store mxcsr state, need an actual definition of fpstate for that - - // TODO: `flags` isn't present in the getcontext man page, figure out what to write here - context.flags = 0; - context.link = null; - - const altstack_result = linux.sigaltstack(null, &context.stack); - if (altstack_result != 0) return altstack_result; - - return linux.sigprocmask(0, null, &context.sigmask); +pub inline fn getcontext(context: *ucontext_t) usize { + // This method is used so that getContextInternal can control + // its prologue in order to read ESP from a constant offset. + // The unused &getContextInternal input is required so the function is included in the binary. + return asm volatile ( + \\ call os.linux.x86.getContextInternal + : [ret] "={eax}" (-> usize), + : [context] "{edx}" (context), + [getContextInternal] "X" (&getContextInternal), + : "memory", "ecx" + ); } diff --git a/lib/std/os/linux/x86_64.zig b/lib/std/os/linux/x86_64.zig index 786a95c7a596..e5febce14d30 100644 --- a/lib/std/os/linux/x86_64.zig +++ b/lib/std/os/linux/x86_64.zig @@ -400,35 +400,53 @@ fn gpRegisterOffset(comptime reg_index: comptime_int) usize { return @offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "gregs") + @sizeOf(usize) * reg_index; } -pub inline fn getcontext(context: *ucontext_t) usize { +fn getContextInternal() callconv(.Naked) void { + // TODO: Read GS/FS registers? asm volatile ( - \\ movq %%r8, (%[r8_offset])(%[context]) - \\ movq %%r9, (%[r9_offset])(%[context]) - \\ movq %%r10, (%[r10_offset])(%[context]) - \\ movq %%r11, (%[r11_offset])(%[context]) - \\ movq %%r12, (%[r12_offset])(%[context]) - \\ movq %%r13, (%[r13_offset])(%[context]) - \\ movq %%r14, (%[r14_offset])(%[context]) - \\ movq %%r15, (%[r15_offset])(%[context]) - \\ movq %%rdi, (%[rdi_offset])(%[context]) - \\ movq %%rsi, (%[rsi_offset])(%[context]) - \\ movq %%rbp, (%[rbp_offset])(%[context]) - \\ movq %%rbx, (%[rbx_offset])(%[context]) - \\ movq %%rdx, (%[rdx_offset])(%[context]) - \\ movq %%rax, (%[rax_offset])(%[context]) - \\ movq %%rcx, (%[rcx_offset])(%[context]) - \\ movq %%rsp, (%[rsp_offset])(%[context]) - \\ leaq (%%rip), %%rcx - \\ movq %%rcx, (%[rip_offset])(%[context]) + \\ movq $0, (%[flags_offset])(%%rdi) + \\ movq $0, (%[link_offset])(%%rdi) + \\ movq %%r8, (%[r8_offset])(%%rdi) + \\ movq %%r9, (%[r9_offset])(%%rdi) + \\ movq %%r10, (%[r10_offset])(%%rdi) + \\ movq %%r11, (%[r11_offset])(%%rdi) + \\ movq %%r12, (%[r12_offset])(%%rdi) + \\ movq %%r13, (%[r13_offset])(%%rdi) + \\ movq %%r14, (%[r14_offset])(%%rdi) + \\ movq %%r15, (%[r15_offset])(%%rdi) + \\ movq %%rdi, (%[rdi_offset])(%%rdi) + \\ movq %%rsi, (%[rsi_offset])(%%rdi) + \\ movq %%rbp, (%[rbp_offset])(%%rdi) + \\ movq %%rbx, (%[rbx_offset])(%%rdi) + \\ movq %%rdx, (%[rdx_offset])(%%rdi) + \\ movq %%rax, (%[rax_offset])(%%rdi) + \\ movq %%rcx, (%[rcx_offset])(%%rdi) + \\ movq (%%rsp), %%rcx + \\ movq %%rcx, (%[rip_offset])(%%rdi) + \\ leaq 8(%%rsp), %%rcx + \\ movq %%rcx, (%[rsp_offset])(%%rdi) \\ pushfq - \\ popq (%[efl_offset])(%[context]) - \\ leaq (%[fpmem_offset])(%[context]), %%rcx - \\ movq %%rcx, (%[fpstate_offset])(%[context]) + \\ popq (%[efl_offset])(%%rdi) + \\ leaq (%[fpmem_offset])(%%rdi), %%rcx + \\ movq %%rcx, (%[fpstate_offset])(%%rdi) \\ fnstenv (%%rcx) \\ fldenv (%%rcx) - \\ stmxcsr (%[mxcsr_offset])(%[context]) + \\ stmxcsr (%[mxcsr_offset])(%%rdi) + \\ leaq (%[stack_offset])(%%rdi), %%rsi + \\ movq %%rdi, %%r8 + \\ xorq %%rdi, %%rdi + \\ movq %[sigaltstack], %%rax + \\ syscall + \\ cmpq $0, %%rax + \\ jne return + \\ movq %[sigprocmask], %%rax + \\ xorq %%rsi, %%rsi + \\ leaq (%[sigmask_offset])(%%r8), %%rdx + \\ movq %[sigset_size], %%r10 + \\ syscall + \\ return: : - : [context] "{rdi}" (context), + : [flags_offset] "p" (@offsetOf(ucontext_t, "flags")), + [link_offset] "p" (@offsetOf(ucontext_t, "link")), [r8_offset] "p" (comptime gpRegisterOffset(REG.R8)), [r9_offset] "p" (comptime gpRegisterOffset(REG.R9)), [r10_offset] "p" (comptime gpRegisterOffset(REG.R10)), @@ -450,17 +468,24 @@ pub inline fn getcontext(context: *ucontext_t) usize { [fpstate_offset] "p" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")), [fpmem_offset] "p" (@offsetOf(ucontext_t, "fpregs_mem")), [mxcsr_offset] "p" (@offsetOf(ucontext_t, "fpregs_mem") + @offsetOf(fpstate, "mxcsr")), - : "memory", "rcx" + [sigaltstack] "i" (@intFromEnum(linux.SYS.sigaltstack)), + [stack_offset] "p" (@offsetOf(ucontext_t, "stack")), + [sigprocmask] "i" (@intFromEnum(linux.SYS.rt_sigprocmask)), + [sigmask_offset] "p" (@offsetOf(ucontext_t, "sigmask")), + [sigset_size] "i" (linux.NSIG / 8), + : "memory", "rcx", "rdx", "rdi", "rsi", "r8", "r10", "r11" ); +} - // TODO: Read GS/FS registers? - - // TODO: `flags` isn't present in the getcontext man page, figure out what to write here - context.flags = 0; - context.link = null; - - const altstack_result = linux.sigaltstack(null, &context.stack); - if (altstack_result != 0) return altstack_result; - - return linux.sigprocmask(0, null, &context.sigmask); +pub inline fn getcontext(context: *ucontext_t) usize { + // This method is used so that getContextInternal can control + // its prologue in order to read RSP from a constant offset + // The unused &getContextInternal input is required so the function is included in the binary. + return asm volatile ( + \\ call os.linux.x86_64.getContextInternal + : [ret] "={rax}" (-> usize), + : [context] "{rdi}" (context), + [getContextInternal] "X" (&getContextInternal), + : "memory", "rcx", "rdx", "rdi", "rsi", "r8", "r10", "r11" + ); } From f991b9dc05706613839743f970a32d516085f182 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 28 Jun 2023 20:37:45 -0400 Subject: [PATCH 34/81] debug: fix reading -gdwarf generated debug sections in COFF files I had accidentally regressed support for -gdwarf in 461fb499f3cff9038a427eae120fb34defc9ab38 when I changed the logic to use the already-mapped exe/dll image instead of loading it from disk. The string table is mapped as all zeroes by the loader, so if a section header's name is longer than 8 bytes (like the ones generated by -gdwarf), then the name can't be read. Now, if any section headers require the string table, the file is mapped from disk. windows: Add NtCreateSection/NtMapViewOfSection/NtUnmapViewOfSection --- lib/std/coff.zig | 5 ++ lib/std/debug.zig | 124 ++++++++++++++++++++++++++++++----- lib/std/os/windows.zig | 29 ++++++++ lib/std/os/windows/ntdll.zig | 26 ++++++++ 4 files changed, 169 insertions(+), 15 deletions(-) diff --git a/lib/std/coff.zig b/lib/std/coff.zig index a08c2c514d18..706f888fbc60 100644 --- a/lib/std/coff.zig +++ b/lib/std/coff.zig @@ -1214,6 +1214,11 @@ pub const Coff = struct { return Strtab{ .buffer = self.data[offset..][0..size] }; } + pub fn strtabRequired(self: *const Coff) bool { + for (self.getSectionHeaders()) |*sect_hdr| if (sect_hdr.getName() == null) return true; + return false; + } + pub fn getSectionHeaders(self: *const Coff) []align(1) const SectionHeader { const coff_header = self.getCoffHeader(); const offset = self.coff_header_offset + @sizeOf(CoffHeader) + coff_header.size_of_optional_header; diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 7b74be24af68..ddef223a02f9 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -887,12 +887,8 @@ pub fn openSelfDebugInfo(allocator: mem.Allocator) OpenSelfDebugInfoError!DebugI } } -fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDebugInfo { +fn readCoffDebugInfo(allocator: mem.Allocator, coff_obj: *coff.Coff) !ModuleDebugInfo { nosuspend { - const coff_obj = try allocator.create(coff.Coff); - defer allocator.destroy(coff_obj); - coff_obj.* = try coff.Coff.init(coff_bytes); - var di = ModuleDebugInfo{ .base_address = undefined, .coff_image_base = coff_obj.getImageBase(), @@ -908,9 +904,14 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { - sections[i] = .{ - .data = try coff_obj.getSectionDataAlloc("." ++ section.name, allocator), - .owned = true, + sections[i] = if (coff_obj.getSectionDataAlloc("." ++ section.name, allocator)) |data| blk: { + break :blk .{ + .data = data, + .owned = true, + }; + } else |err| blk: { + if (err == error.MissingCoffSection) break :blk null; + return err; }; } @@ -920,7 +921,7 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe .is_macho = false, }; - try DW.openDwarfDebugInfo(&dwarf, allocator, coff_bytes); + try DW.openDwarfDebugInfo(&dwarf, allocator, coff_obj.data); di.debug_data = PdbOrDwarf{ .dwarf = dwarf }; return di; } @@ -1358,6 +1359,21 @@ pub const WindowsModuleInfo = struct { base_address: usize, size: u32, name: []const u8, + handle: windows.HMODULE, + + // Set when the image file needed to be mapped from disk + mapped_file: ?struct { + file: File, + section_handle: windows.HANDLE, + section_view: []const u8, + + pub fn deinit(self: @This()) void { + const process_handle = windows.kernel32.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(@ptrCast(self.section_view.ptr))) == .SUCCESS); + windows.CloseHandle(self.section_handle); + self.file.close(); + } + } = null, }; pub const DebugInfo = struct { @@ -1373,6 +1389,8 @@ pub const DebugInfo = struct { }; if (native_os == .windows) { + errdefer debug_info.modules.deinit(allocator); + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); if (handle == windows.INVALID_HANDLE_VALUE) { switch (windows.kernel32.GetLastError()) { @@ -1390,9 +1408,16 @@ pub const DebugInfo = struct { var module_valid = true; while (module_valid) { const module_info = try debug_info.modules.addOne(allocator); - module_info.base_address = @intFromPtr(module_entry.modBaseAddr); - module_info.size = module_entry.modBaseSize; - module_info.name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; + const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; + errdefer allocator.free(name); + + module_info.* = .{ + .base_address = @intFromPtr(module_entry.modBaseAddr), + .size = module_entry.modBaseSize, + .name = name, + .handle = module_entry.hModule, + }; + module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; } } @@ -1411,6 +1436,7 @@ pub const DebugInfo = struct { if (native_os == .windows) { for (self.modules.items) |module| { self.allocator.free(module.name); + if (module.mapped_file) |mapped_file| mapped_file.deinit(); } self.modules.deinit(self.allocator); } @@ -1500,17 +1526,85 @@ pub const DebugInfo = struct { } fn lookupModuleWin32(self: *DebugInfo, address: usize) !*ModuleDebugInfo { - for (self.modules.items) |module| { + for (self.modules.items) |*module| { if (address >= module.base_address and address < module.base_address + module.size) { if (self.address_map.get(module.base_address)) |obj_di| { return obj_di; } - const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; const obj_di = try self.allocator.create(ModuleDebugInfo); errdefer self.allocator.destroy(obj_di); - obj_di.* = try readCoffDebugInfo(self.allocator, mapped_module); + const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; + var coff_obj = try coff.Coff.init(mapped_module); + + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + // openFileAbsoluteW requires the prefix to be present + mem.copy(u16, name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); + + const process_handle = windows.kernel32.GetCurrentProcess(); + const len = windows.kernel32.K32GetModuleFileNameExW( + process_handle, + module.handle, + @ptrCast(&name_buffer[4]), + windows.PATH_MAX_WIDE, + ); + + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + errdefer coff_file.close(); + + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + + var coff_len: usize = 0; + var base_ptr: usize = 0; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(&base_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); + + const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; + coff_obj = try coff.Coff.init(section_view); + + module.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); + + obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); obj_di.base_address = module.base_address; try self.address_map.putNoClobber(module.base_address, obj_di); diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 1337efdd341d..9f8aa326a99b 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -3301,6 +3301,35 @@ pub const REGSAM = ACCESS_MASK; pub const ACCESS_MASK = DWORD; pub const LSTATUS = LONG; +pub const SECTION_INHERIT = enum(c_int) { + ViewShare = 0, + ViewUnmap = 1, +}; + +pub const SECTION_QUERY = 0x0001; +pub const SECTION_MAP_WRITE = 0x0002; +pub const SECTION_MAP_READ = 0x0004; +pub const SECTION_MAP_EXECUTE = 0x0008; +pub const SECTION_EXTEND_SIZE = 0x0010; +pub const SECTION_ALL_ACCESS = + STANDARD_RIGHTS_REQUIRED | + SECTION_QUERY | + SECTION_MAP_WRITE | + SECTION_MAP_READ | + SECTION_MAP_EXECUTE | + SECTION_EXTEND_SIZE; + +pub const SEC_64K_PAGES = 0x80000; +pub const SEC_FILE = 0x800000; +pub const SEC_IMAGE = 0x1000000; +pub const SEC_PROTECTED_IMAGE = 0x2000000; +pub const SEC_RESERVE = 0x4000000; +pub const SEC_COMMIT = 0x8000000; +pub const SEC_IMAGE_NO_EXECUTE = SEC_IMAGE | SEC_NOCACHE; +pub const SEC_NOCACHE = 0x10000000; +pub const SEC_WRITECOMBINE = 0x40000000; +pub const SEC_LARGE_PAGES = 0x80000000; + pub const HKEY = *opaque {}; pub const HKEY_LOCAL_MACHINE: HKEY = @as(HKEY, @ptrFromInt(0x80000002)); diff --git a/lib/std/os/windows/ntdll.zig b/lib/std/os/windows/ntdll.zig index 328ecb80f562..8c14f1fad9ed 100644 --- a/lib/std/os/windows/ntdll.zig +++ b/lib/std/os/windows/ntdll.zig @@ -36,6 +36,7 @@ const THREADINFOCLASS = windows.THREADINFOCLASS; const PROCESSINFOCLASS = windows.PROCESSINFOCLASS; const LPVOID = windows.LPVOID; const LPCVOID = windows.LPCVOID; +const SECTION_INHERIT = windows.SECTION_INHERIT; pub extern "ntdll" fn NtQueryInformationProcess( ProcessHandle: HANDLE, @@ -125,6 +126,31 @@ pub extern "ntdll" fn NtCreateFile( EaBuffer: ?*anyopaque, EaLength: ULONG, ) callconv(WINAPI) NTSTATUS; +pub extern "ntdll" fn NtCreateSection( + SectionHandle: *HANDLE, + DesiredAccess: ACCESS_MASK, + ObjectAttributes: ?*OBJECT_ATTRIBUTES, + MaximumSize: ?*LARGE_INTEGER, + SectionPageProtection: ULONG, + AllocationAttributes: ULONG, + FileHandle: ?HANDLE, +) callconv(WINAPI) NTSTATUS; +pub extern "ntdll" fn NtMapViewOfSection( + SectionHandle: HANDLE, + ProcessHandle: HANDLE, + BaseAddress: *PVOID, + ZeroBits: ?*ULONG, + CommitSize: SIZE_T, + SectionOffset: ?*LARGE_INTEGER, + ViewSize: *SIZE_T, + InheritDispostion: SECTION_INHERIT, + AllocationType: ULONG, + Win32Protect: ULONG, +) callconv(WINAPI) NTSTATUS; +pub extern "ntdll" fn NtUnmapViewOfSection( + ProcessHandle: HANDLE, + BaseAddress: PVOID, +) callconv(WINAPI) NTSTATUS; pub extern "ntdll" fn NtDeviceIoControlFile( FileHandle: HANDLE, Event: ?HANDLE, From 23d9b59b868e7f256668535adf9803facf75e538 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 1 Jul 2023 01:39:29 -0400 Subject: [PATCH 35/81] c: add getcontext debug: make getContext public --- lib/std/c.zig | 2 ++ lib/std/debug.zig | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/std/c.zig b/lib/std/c.zig index 149f3ab7e199..94b7f576574a 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -413,6 +413,8 @@ pub extern "c" fn timer_delete(timerid: c.timer_t) c_int; pub extern "c" fn timer_settime(timerid: c.timer_t, flags: c_int, new_value: *const c.itimerspec, old_value: *c.itimerspec) c_int; pub extern "c" fn timer_gettime(timerid: c.timer_t, flags: c_int, curr_value: *c.itimerspec) c_int; +pub extern "c" fn getcontext(ucp: *std.os.ucontext_t) c_int; + pub const max_align_t = if (builtin.abi == .msvc) f64 else if (builtin.target.isDarwin()) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index ddef223a02f9..4905672f211c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -420,7 +420,7 @@ pub fn writeStackTrace( } } -inline fn getContext(context: *StackTraceContext) bool { +pub inline fn getContext(context: *StackTraceContext) bool { if (native_os == .windows) { context.* = std.mem.zeroes(windows.CONTEXT); windows.ntdll.RtlCaptureContext(context); From 9c908ea814b6b5ae67622c0a9cce4704c6734590 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 1 Jul 2023 01:40:34 -0400 Subject: [PATCH 36/81] test: add standalone test for DWARF unwinding with -fomit-frame-pointer --- test/standalone.zig | 4 ++ test/standalone/dwarf_unwinding/build.zig | 37 ++++++++++++++++++ test/standalone/dwarf_unwinding/main.zig | 40 ++++++++++++++++++++ test/standalone/dwarf_unwinding/shared_lib.c | 24 ++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 test/standalone/dwarf_unwinding/build.zig create mode 100644 test/standalone/dwarf_unwinding/main.zig create mode 100644 test/standalone/dwarf_unwinding/shared_lib.c diff --git a/test/standalone.zig b/test/standalone.zig index cfdb09ea0714..c9277e26a872 100644 --- a/test/standalone.zig +++ b/test/standalone.zig @@ -230,6 +230,10 @@ pub const build_cases = [_]BuildCase{ .build_root = "test/standalone/zerolength_check", .import = @import("standalone/zerolength_check/build.zig"), }, + .{ + .build_root = "test/standalone/dwarf_unwinding", + .import = @import("standalone/dwarf_unwinding/build.zig"), + }, }; const std = @import("std"); diff --git a/test/standalone/dwarf_unwinding/build.zig b/test/standalone/dwarf_unwinding/build.zig new file mode 100644 index 000000000000..85d0a9ac7d7a --- /dev/null +++ b/test/standalone/dwarf_unwinding/build.zig @@ -0,0 +1,37 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const test_step = b.step("test", "Test it"); + b.default_step = test_step; + + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + if (!std.debug.StackIterator.supports_context) return; + + const c_shared_lib = b.addSharedLibrary(.{ + .name = "c_shared_lib", + .target = target, + .optimize = optimize, + }); + + if (target.isWindows()) c_shared_lib.defineCMacro("LIB_API", "__declspec(dllexport)"); + + c_shared_lib.strip = false; + c_shared_lib.addCSourceFile("shared_lib.c", &.{"-fomit-frame-pointer"}); + c_shared_lib.linkLibC(); + + const exe = b.addExecutable(.{ + .name = "main", + .root_source_file = .{ .path = "main.zig" }, + .target = target, + .optimize = optimize, + }); + + exe.omit_frame_pointer = true; + exe.linkLibrary(c_shared_lib); + b.installArtifact(exe); + + const run_cmd = b.addRunArtifact(exe); + test_step.dependOn(&run_cmd.step); +} diff --git a/test/standalone/dwarf_unwinding/main.zig b/test/standalone/dwarf_unwinding/main.zig new file mode 100644 index 000000000000..2edf93ea9454 --- /dev/null +++ b/test/standalone/dwarf_unwinding/main.zig @@ -0,0 +1,40 @@ +const std = @import("std"); +const debug = std.debug; +const testing = std.testing; + +noinline fn frame4(expected: *[4]usize, unwound: *[4]usize) void { + expected[0] = @returnAddress(); + + var context: debug.StackTraceContext = undefined; + testing.expect(debug.getContext(&context)) catch @panic("failed to getContext"); + + var debug_info = debug.getSelfDebugInfo() catch @panic("failed to openSelfDebugInfo"); + var it = debug.StackIterator.initWithContext(null, debug_info, &context) catch @panic("failed to initWithContext"); + defer it.deinit(); + + for (unwound) |*addr| { + if (it.next()) |return_address| addr.* = return_address; + } +} + +noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { + expected[1] = @returnAddress(); + frame4(expected, unwound); +} + +fn frame2(expected: *[4]usize, unwound: *[4]usize) callconv(.C) void { + frame3(expected, unwound); +} + +extern fn frame0( + expected: *[4]usize, + unwound: *[4]usize, + frame_2: *const fn (expected: *[4]usize, unwound: *[4]usize) callconv(.C) void, +) void; + +pub fn main() !void { + var expected: [4]usize = undefined; + var unwound: [4]usize = undefined; + frame0(&expected, &unwound, &frame2); + try testing.expectEqual(expected, unwound); +} diff --git a/test/standalone/dwarf_unwinding/shared_lib.c b/test/standalone/dwarf_unwinding/shared_lib.c new file mode 100644 index 000000000000..2329fedda9b6 --- /dev/null +++ b/test/standalone/dwarf_unwinding/shared_lib.c @@ -0,0 +1,24 @@ +#include + +#ifndef LIB_API +#define LIB_API +#endif + +__attribute__((noinline)) void frame1( + void** expected, + void** unwound, + void (*frame2)(void** expected, void** unwound)) { + expected[2] = &&frame_2_ret; + frame2(expected, unwound); + frame_2_ret: +} + +LIB_API void frame0( + void** expected, + void** unwound, + void (*frame2)(void** expected, void** unwound)) { + expected[3] = &&frame_1_ret; + frame1(expected, unwound, frame2); + frame_1_ret: +} + From 7bc1695f15e83bb7e0e1401d2411660b05ab65e3 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 1 Jul 2023 10:43:34 -0400 Subject: [PATCH 37/81] c: musl doesn't implement getcontext, so defer to our implementation in that case --- lib/std/c.zig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/std/c.zig b/lib/std/c.zig index 94b7f576574a..6f193fb9eea0 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -413,7 +413,12 @@ pub extern "c" fn timer_delete(timerid: c.timer_t) c_int; pub extern "c" fn timer_settime(timerid: c.timer_t, flags: c_int, new_value: *const c.itimerspec, old_value: *c.itimerspec) c_int; pub extern "c" fn timer_gettime(timerid: c.timer_t, flags: c_int, curr_value: *c.itimerspec) c_int; -pub extern "c" fn getcontext(ucp: *std.os.ucontext_t) c_int; +pub usingnamespace if (builtin.os.tag == .linux and builtin.target.isMusl()) struct { + // musl does not implement getcontext + const getcontext = std.os.linux.getcontext; +} else struct { + extern "c" fn getcontext(ucp: *std.os.ucontext_t) c_int; +}; pub const max_align_t = if (builtin.abi == .msvc) f64 From ccc9f8206898275474b3651a7456ec8b5c6210ff Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 1 Jul 2023 15:23:37 -0400 Subject: [PATCH 38/81] c: fixup getcontext debug: supports_context -> have_ucontext, supports_getcontext -> have_getcontext test: rework dwarf_unwind test case to also test the non-libc path --- lib/std/c.zig | 4 +- lib/std/debug.zig | 34 ++++----- test/standalone/dwarf_unwinding/build.zig | 69 +++++++++++-------- .../{main.zig => shared_lib_unwind.zig} | 4 +- .../standalone/dwarf_unwinding/zig_unwind.zig | 42 +++++++++++ 5 files changed, 106 insertions(+), 47 deletions(-) rename test/standalone/dwarf_unwinding/{main.zig => shared_lib_unwind.zig} (85%) create mode 100644 test/standalone/dwarf_unwinding/zig_unwind.zig diff --git a/lib/std/c.zig b/lib/std/c.zig index 6f193fb9eea0..860fdab92900 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -415,9 +415,9 @@ pub extern "c" fn timer_gettime(timerid: c.timer_t, flags: c_int, curr_value: *c pub usingnamespace if (builtin.os.tag == .linux and builtin.target.isMusl()) struct { // musl does not implement getcontext - const getcontext = std.os.linux.getcontext; + pub const getcontext = std.os.linux.getcontext; } else struct { - extern "c" fn getcontext(ucp: *std.os.ucontext_t) c_int; + pub extern "c" fn getcontext(ucp: *std.os.ucontext_t) c_int; }; pub const max_align_t = if (builtin.abi == .msvc) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4905672f211c..66ab27bbf049 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -136,7 +136,7 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { pub const StackTraceContext = blk: { if (native_os == .windows) { break :blk std.os.windows.CONTEXT; - } else if (StackIterator.supports_context) { + } else if (have_ucontext) { break :blk os.ucontext_t; } else { break :blk void; @@ -420,6 +420,18 @@ pub fn writeStackTrace( } } +pub const have_getcontext = @hasDecl(os.system, "getcontext") and + (builtin.os.tag != .linux or switch (builtin.cpu.arch) { + .x86, .x86_64 => true, + else => false, +}); + +pub const have_ucontext = @hasDecl(os.system, "ucontext_t") and + (builtin.os.tag != .linux or switch (builtin.cpu.arch) { + .mips, .mipsel, .mips64, .mips64el, .riscv64 => false, + else => true, +}); + pub inline fn getContext(context: *StackTraceContext) bool { if (native_os == .windows) { context.* = std.mem.zeroes(windows.CONTEXT); @@ -427,13 +439,7 @@ pub inline fn getContext(context: *StackTraceContext) bool { return true; } - const supports_getcontext = @hasDecl(os.system, "getcontext") and - (builtin.os.tag != .linux or switch (builtin.cpu.arch) { - .x86, .x86_64 => true, - else => false, - }); - - return supports_getcontext and os.system.getcontext(context) == 0; + return have_getcontext and os.system.getcontext(context) == 0; } pub const StackIterator = struct { @@ -445,13 +451,7 @@ pub const StackIterator = struct { // When DebugInfo and a register context is available, this iterator can unwind // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). debug_info: ?*DebugInfo, - dwarf_context: if (supports_context) DW.UnwindContext else void = undefined, - - pub const supports_context = @hasDecl(os.system, "ucontext_t") and - (builtin.os.tag != .linux or switch (builtin.cpu.arch) { - .mips, .mipsel, .mips64, .mips64el, .riscv64 => false, - else => true, - }); + dwarf_context: if (have_ucontext) DW.UnwindContext else void = undefined, pub fn init(first_address: ?usize, fp: ?usize) StackIterator { if (native_arch == .sparc64) { @@ -476,7 +476,7 @@ pub const StackIterator = struct { } pub fn deinit(self: *StackIterator) void { - if (supports_context) { + if (have_ucontext) { if (self.debug_info) |debug_info| { self.dwarf_context.deinit(debug_info.allocator); } @@ -574,7 +574,7 @@ pub const StackIterator = struct { } fn next_internal(self: *StackIterator) ?usize { - if (supports_context and self.debug_info != null) { + if (have_ucontext and self.debug_info != null) { if (self.dwarf_context.pc == 0) return null; if (self.next_dwarf()) |return_address| { return return_address; diff --git a/test/standalone/dwarf_unwinding/build.zig b/test/standalone/dwarf_unwinding/build.zig index 85d0a9ac7d7a..c59effda9fde 100644 --- a/test/standalone/dwarf_unwinding/build.zig +++ b/test/standalone/dwarf_unwinding/build.zig @@ -7,31 +7,46 @@ pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); - if (!std.debug.StackIterator.supports_context) return; - - const c_shared_lib = b.addSharedLibrary(.{ - .name = "c_shared_lib", - .target = target, - .optimize = optimize, - }); - - if (target.isWindows()) c_shared_lib.defineCMacro("LIB_API", "__declspec(dllexport)"); - - c_shared_lib.strip = false; - c_shared_lib.addCSourceFile("shared_lib.c", &.{"-fomit-frame-pointer"}); - c_shared_lib.linkLibC(); - - const exe = b.addExecutable(.{ - .name = "main", - .root_source_file = .{ .path = "main.zig" }, - .target = target, - .optimize = optimize, - }); - - exe.omit_frame_pointer = true; - exe.linkLibrary(c_shared_lib); - b.installArtifact(exe); - - const run_cmd = b.addRunArtifact(exe); - test_step.dependOn(&run_cmd.step); + // Test unwinding pure zig code (no libc) + { + const exe = b.addExecutable(.{ + .name = "zig_unwind", + .root_source_file = .{ .path = "zig_unwind.zig" }, + .target = target, + .optimize = optimize, + }); + + exe.omit_frame_pointer = true; + + const run_cmd = b.addRunArtifact(exe); + test_step.dependOn(&run_cmd.step); + } + + // Test unwinding through a C shared library + { + const c_shared_lib = b.addSharedLibrary(.{ + .name = "c_shared_lib", + .target = target, + .optimize = optimize, + }); + + if (target.isWindows()) c_shared_lib.defineCMacro("LIB_API", "__declspec(dllexport)"); + + c_shared_lib.strip = false; + c_shared_lib.addCSourceFile("shared_lib.c", &.{"-fomit-frame-pointer"}); + c_shared_lib.linkLibC(); + + const exe = b.addExecutable(.{ + .name = "shared_lib_unwind", + .root_source_file = .{ .path = "shared_lib_unwind.zig" }, + .target = target, + .optimize = optimize, + }); + + exe.omit_frame_pointer = true; + exe.linkLibrary(c_shared_lib); + + const run_cmd = b.addRunArtifact(exe); + test_step.dependOn(&run_cmd.step); + } } diff --git a/test/standalone/dwarf_unwinding/main.zig b/test/standalone/dwarf_unwinding/shared_lib_unwind.zig similarity index 85% rename from test/standalone/dwarf_unwinding/main.zig rename to test/standalone/dwarf_unwinding/shared_lib_unwind.zig index 2edf93ea9454..8f4219797295 100644 --- a/test/standalone/dwarf_unwinding/main.zig +++ b/test/standalone/dwarf_unwinding/shared_lib_unwind.zig @@ -9,7 +9,7 @@ noinline fn frame4(expected: *[4]usize, unwound: *[4]usize) void { testing.expect(debug.getContext(&context)) catch @panic("failed to getContext"); var debug_info = debug.getSelfDebugInfo() catch @panic("failed to openSelfDebugInfo"); - var it = debug.StackIterator.initWithContext(null, debug_info, &context) catch @panic("failed to initWithContext"); + var it = debug.StackIterator.initWithContext(expected[0], debug_info, &context) catch @panic("failed to initWithContext"); defer it.deinit(); for (unwound) |*addr| { @@ -33,6 +33,8 @@ extern fn frame0( ) void; pub fn main() !void { + if (!std.debug.have_ucontext or !std.debug.have_getcontext) return; + var expected: [4]usize = undefined; var unwound: [4]usize = undefined; frame0(&expected, &unwound, &frame2); diff --git a/test/standalone/dwarf_unwinding/zig_unwind.zig b/test/standalone/dwarf_unwinding/zig_unwind.zig new file mode 100644 index 000000000000..707c2b763211 --- /dev/null +++ b/test/standalone/dwarf_unwinding/zig_unwind.zig @@ -0,0 +1,42 @@ +const std = @import("std"); +const debug = std.debug; +const testing = std.testing; + +noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { + expected[0] = @returnAddress(); + + var context: debug.StackTraceContext = undefined; + testing.expect(debug.getContext(&context)) catch @panic("failed to getContext"); + + var debug_info = debug.getSelfDebugInfo() catch @panic("failed to openSelfDebugInfo"); + var it = debug.StackIterator.initWithContext(expected[0], debug_info, &context) catch @panic("failed to initWithContext"); + defer it.deinit(); + + for (unwound) |*addr| { + if (it.next()) |return_address| addr.* = return_address; + } +} + +noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { + expected[1] = @returnAddress(); + frame3(expected, unwound); +} + +noinline fn frame1(expected: *[4]usize, unwound: *[4]usize) void { + expected[2] = @returnAddress(); + frame2(expected, unwound); +} + +noinline fn frame0(expected: *[4]usize, unwound: *[4]usize) void { + expected[3] = @returnAddress(); + frame1(expected, unwound); +} + +pub fn main() !void { + if (!std.debug.have_ucontext or !std.debug.have_getcontext) return; + + var expected: [4]usize = undefined; + var unwound: [4]usize = undefined; + frame0(&expected, &unwound); + try testing.expectEqual(expected, unwound); +} From 6a5e2b713fbaa009bb4ec73cf132a0bab6af6205 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 2 Jul 2023 02:08:06 -0400 Subject: [PATCH 39/81] debug: de-duplicate some code in macos ModuleDebugInfo --- lib/std/debug.zig | 96 ++++++++++++++++++++--------------------------- 1 file changed, 40 insertions(+), 56 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 66ab27bbf049..539dc8034923 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1754,7 +1754,7 @@ pub const ModuleDebugInfo = switch (native_os) { os.munmap(self.mapped_memory); } - fn loadOFile(self: *@This(), allocator: mem.Allocator, o_file_path: []const u8) !OFileInfo { + fn loadOFile(self: *@This(), allocator: mem.Allocator, o_file_path: []const u8) !*OFileInfo { const o_file = try fs.cwd().openFile(o_file_path, .{ .intended_io_mode = .blocking }); const mapped_mem = try mapWholeFile(o_file); @@ -1799,11 +1799,11 @@ pub const ModuleDebugInfo = switch (native_os) { var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; for (segcmd.?.getSections()) |sect| { - const name = sect.sectName(); + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; var section_index: ?usize = null; inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, name)) section_index = i; + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; } if (section_index == null) continue; @@ -1834,65 +1834,31 @@ pub const ModuleDebugInfo = switch (native_os) { }; // Add the debug info to the cache - try self.ofiles.putNoClobber(o_file_path, info); + const result = try self.ofiles.getOrPut(o_file_path); + assert(!result.found_existing); + result.value_ptr.* = info; - return info; - } - - fn getOFileForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*OFileInfo { - nosuspend { - const relocated_address = address - self.base_address; - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse - return null; - - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - var o_file_info = self.ofiles.get(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return null, - else => return err, - }); - - return &o_file_info.di; - } + return result.value_ptr; } pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { nosuspend { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - - // Find the .o file where this symbol is defined - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse - return SymbolInfo{}; - const addr_off = relocated_address - symbol.addr; + const result = try self.getOFileInfoForAddress(allocator, address); + if (result.symbol == null) return .{}; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(self.strings[symbol.strx..], 0); - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - - // Check if its debug infos are already in the cache - var o_file_info = self.ofiles.get(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => { - return SymbolInfo{ .symbol_name = stab_symbol }; - }, - else => return err, - }); - const o_file_di = &o_file_info.di; + const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); + if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; // Translate again the address, this time into an address inside the // .o file - const relocated_address_o = o_file_info.addr_table.get(stab_symbol) orelse return SymbolInfo{ + const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ .symbol_name = "???", }; + const addr_off = result.relocated_address - result.symbol.?.addr; + const o_file_di = &result.o_file_info.?.di; if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { return SymbolInfo{ .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", @@ -1919,30 +1885,48 @@ pub const ModuleDebugInfo = switch (native_os) { }, else => return err, } - - unreachable; } } - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const DW.DwarfInfo { + fn getOFileInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !struct { + relocated_address: usize, + symbol: ?*const MachoSymbol = null, + o_file_info: ?*OFileInfo = null, + } { nosuspend { + // Translate the VA into an address into this object const relocated_address = address - self.base_address; - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse - return null; + // Find the .o file where this symbol is defined + const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ + .relocated_address = relocated_address, + }; + + // Check if its debug infos are already in the cache const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - var o_file_info = self.ofiles.get(o_file_path) orelse + var o_file_info = self.ofiles.getPtr(o_file_path) orelse (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { error.FileNotFound, error.MissingDebugInfo, error.InvalidDebugInfo, - => return null, + => return .{ + .relocated_address = relocated_address, + .symbol = symbol, + }, else => return err, }); - return &o_file_info.di; + return .{ + .relocated_address = relocated_address, + .symbol = symbol, + .o_file_info = o_file_info, + }; } } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const DW.DwarfInfo { + return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; + } }, .uefi, .windows => struct { base_address: usize, From 395ab474eba31f5bbe95e96cab06ab066384c4af Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 2 Jul 2023 02:57:35 -0400 Subject: [PATCH 40/81] dwarf: fix logic error in eh_frame_hdry binary search --- lib/std/dwarf.zig | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 06714aad2fb7..218180692c53 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1886,10 +1886,13 @@ pub const ExceptionFrameHeader = struct { .data_rel_base = eh_frame_hdr_ptr, }, builtin.cpu.arch.endian()) orelse return badDwarf(); - if (pc >= pc_begin) left = mid; - if (pc == pc_begin) break; - - len /= 2; + if (pc < pc_begin) { + len /= 2; + } else { + left = mid; + if (pc == pc_begin) break; + len -= len / 2; + } } try stream.seekTo(left * entry_size); From f04f9705cca5cccdfaff2eab0e57e9bd253e8441 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 2 Jul 2023 18:13:19 -0400 Subject: [PATCH 41/81] dwarf: add support for DWARF5 DW_AT_ranges in subprograms, add DebugRangeIterator Some DWARF5 subprograms have non-contiguous instruction ranges. An example of such a function is `puts` in Ubuntu's libc. This change fixes name lookups for functions that use DW_AT_range in their DIE. --- lib/std/dwarf.zig | 213 ++++++++++++++++++++++++++---------------- lib/std/dwarf/abi.zig | 3 +- 2 files changed, 135 insertions(+), 81 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 218180692c53..6aa5064dc225 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -717,7 +717,6 @@ pub const DwarfInfo = struct { } pub fn getSymbolName(di: *DwarfInfo, address: u64) ?[]const u8 { - // TODO: Can this be binary searched? for (di.func_list.items) |*func| { if (func.pc_range) |range| { if (address >= range.start and address < range.end) { @@ -835,37 +834,56 @@ pub const DwarfInfo = struct { break :x null; }; - const pc_range = x: { - if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| { - if (die_obj.getAttr(AT.high_pc)) |high_pc_value| { - const pc_end = switch (high_pc_value.*) { - FormValue.Address => |value| value, - FormValue.Const => |value| b: { - const offset = try value.asUnsignedLe(); - break :b (low_pc + offset); - }, - else => return badDwarf(), - }; - break :x PcRange{ + var found_range = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: { + if (die_obj.getAttr(AT.high_pc)) |high_pc_value| { + const pc_end = switch (high_pc_value.*) { + FormValue.Address => |value| value, + FormValue.Const => |value| b: { + const offset = try value.asUnsignedLe(); + break :b (low_pc + offset); + }, + else => return badDwarf(), + }; + + try di.func_list.append(allocator, Func{ + .name = fn_name, + .pc_range = .{ .start = low_pc, .end = pc_end, - }; - } else { - break :x null; - } - } else |err| { - if (err != error.MissingDebugInfo) return err; - break :x null; + }, + }); } + + break :blk true; + } else |err| blk: { + if (err != error.MissingDebugInfo) return err; + break :blk false; }; - // TODO: Debug issue where `puts` in Ubuntu's libc was not found - //if (fn_name != null and pc_range != null) debug.print("func_list: {s} -> 0x{x}-0x{x}\n", .{fn_name.?, pc_range.?.start, pc_range.?.end}); + if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: { + var iter = DebugRangeIterator.init(ranges_value, di, &compile_unit) catch |err| { + if (err != error.MissingDebugInfo) return err; + break :blk; + }; + + while (try iter.next()) |range| { + found_range = true; + try di.func_list.append(allocator, Func{ + .name = fn_name, + .pc_range = .{ + .start = range.start_addr, + .end = range.end_addr, + }, + }); + } + } - try di.func_list.append(allocator, Func{ - .name = fn_name, - .pc_range = pc_range, - }); + if (!found_range) { + try di.func_list.append(allocator, Func{ + .name = fn_name, + .pc_range = null, + }); + } }, else => {}, } @@ -966,17 +984,18 @@ pub const DwarfInfo = struct { } } - pub fn findCompileUnit(di: *DwarfInfo, target_address: u64) !*const CompileUnit { - for (di.compile_unit_list.items) |*compile_unit| { - if (compile_unit.pc_range) |range| { - if (target_address >= range.start and target_address < range.end) return compile_unit; - } + const DebugRangeIterator = struct { + base_address: u64, + section_type: DwarfSection, + di: *DwarfInfo, + compile_unit: *const CompileUnit, + stream: io.FixedBufferStream([]const u8), - const opt_debug_ranges = if (compile_unit.version >= 5) di.section(.debug_rnglists) else di.section(.debug_ranges); - const debug_ranges = opt_debug_ranges orelse continue; + pub fn init(ranges_value: *const FormValue, di: *DwarfInfo, compile_unit: *const CompileUnit) !@This() { + const section_type = if (compile_unit.version >= 5) DwarfSection.debug_rnglists else DwarfSection.debug_ranges; + const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo; - const ranges_val = compile_unit.die.getAttr(AT.ranges) orelse continue; - const ranges_offset = switch (ranges_val.*) { + const ranges_offset = switch (ranges_value.*) { .SecOffset => |off| off, .Const => |c| try c.asUnsignedLe(), .RangeListOffset => |idx| off: { @@ -996,8 +1015,7 @@ pub const DwarfInfo = struct { }; var stream = io.fixedBufferStream(debug_ranges); - const in = &stream.reader(); - const seekable = &stream.seekableStream(); + try stream.seekTo(ranges_offset); // All the addresses in the list are relative to the value // specified by DW_AT.low_pc or to some other value encoded @@ -1008,86 +1026,122 @@ pub const DwarfInfo = struct { else => return err, }; - try seekable.seekTo(ranges_offset); + return .{ + .base_address = base_address, + .section_type = section_type, + .di = di, + .compile_unit = compile_unit, + .stream = stream, + }; + } - if (compile_unit.version >= 5) { - while (true) { + // Returns the next range in the list, or null if the end was reached. + pub fn next(self: *@This()) !?struct{ start_addr: u64, end_addr: u64 } { + const in = self.stream.reader(); + switch (self.section_type) { + .debug_rnglists => { const kind = try in.readByte(); switch (kind) { - RLE.end_of_list => break, + RLE.end_of_list => return null, RLE.base_addressx => { const index = try leb.readULEB128(usize, in); - base_address = try di.readDebugAddr(compile_unit.*, index); + self.base_address = try self.di.readDebugAddr(self.compile_unit.*, index); + return try self.next(); }, RLE.startx_endx => { const start_index = try leb.readULEB128(usize, in); - const start_addr = try di.readDebugAddr(compile_unit.*, start_index); + const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); const end_index = try leb.readULEB128(usize, in); - const end_addr = try di.readDebugAddr(compile_unit.*, end_index); + const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index); - if (target_address >= start_addr and target_address < end_addr) { - return compile_unit; - } + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; }, RLE.startx_length => { const start_index = try leb.readULEB128(usize, in); - const start_addr = try di.readDebugAddr(compile_unit.*, start_index); + const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); const len = try leb.readULEB128(usize, in); const end_addr = start_addr + len; - if (target_address >= start_addr and target_address < end_addr) { - return compile_unit; - } + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; }, RLE.offset_pair => { const start_addr = try leb.readULEB128(usize, in); const end_addr = try leb.readULEB128(usize, in); + // This is the only kind that uses the base address - if (target_address >= base_address + start_addr and target_address < base_address + end_addr) { - return compile_unit; - } + return .{ + .start_addr = self.base_address + start_addr, + .end_addr = self.base_address + end_addr, + }; }, RLE.base_address => { - base_address = try in.readInt(usize, di.endian); + self.base_address = try in.readInt(usize, self.di.endian); + return try self.next(); }, RLE.start_end => { - const start_addr = try in.readInt(usize, di.endian); - const end_addr = try in.readInt(usize, di.endian); - if (target_address >= start_addr and target_address < end_addr) { - return compile_unit; - } + const start_addr = try in.readInt(usize, self.di.endian); + const end_addr = try in.readInt(usize, self.di.endian); + + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; }, RLE.start_length => { - const start_addr = try in.readInt(usize, di.endian); + const start_addr = try in.readInt(usize, self.di.endian); const len = try leb.readULEB128(usize, in); const end_addr = start_addr + len; - if (target_address >= start_addr and target_address < end_addr) { - return compile_unit; - } + + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; }, else => return badDwarf(), } - } - } else { - while (true) { - const begin_addr = try in.readInt(usize, di.endian); - const end_addr = try in.readInt(usize, di.endian); - if (begin_addr == 0 and end_addr == 0) { - break; - } + }, + .debug_ranges => { + const start_addr = try in.readInt(usize, self.di.endian); + const end_addr = try in.readInt(usize, self.di.endian); + if (start_addr == 0 and end_addr == 0) return null; + // This entry selects a new value for the base address - if (begin_addr == math.maxInt(usize)) { - base_address = end_addr; - continue; - } - if (target_address >= base_address + begin_addr and target_address < base_address + end_addr) { - return compile_unit; + if (start_addr == math.maxInt(usize)) { + self.base_address = end_addr; + return try self.next(); } - } + + return .{ + .start_addr = self.base_address + start_addr, + .end_addr = self.base_address + end_addr, + }; + }, + else => unreachable, } } + }; + + pub fn findCompileUnit(di: *DwarfInfo, target_address: u64) !*const CompileUnit { + for (di.compile_unit_list.items) |*compile_unit| { + if (compile_unit.pc_range) |range| { + if (target_address >= range.start and target_address < range.end) return compile_unit; + } + + const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, di, compile_unit) catch continue; + while (try iter.next()) |range| { + if (target_address >= range.start_addr and target_address < range.end_addr) return compile_unit; + } + } + return missingDwarf(); } @@ -1566,7 +1620,6 @@ pub const DwarfInfo = struct { } } - // TODO: Avoiding sorting if has_eh_frame_hdr exists std.mem.sort(FrameDescriptionEntry, di.fde_list.items, {}, struct { fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { _ = ctx; diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 9d594fa5fe97..ab1213eb92c4 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -245,7 +245,8 @@ pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext } /// Returns the ABI-defined default value this register has in the unwinding table -/// before running any of the CIE instructions. +/// before running any of the CIE instructions. The DWARF spec defines these values +// to be undefined, but allows ABI authors to override that default. pub fn getRegDefaultValue(reg_number: u8, out: []u8) void { // TODO: Implement any ABI-specific rules for the default value for registers _ = reg_number; From 62598c2187a63a7eb2d8c9f3dca0664ec5db270e Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 2 Jul 2023 18:15:36 -0400 Subject: [PATCH 42/81] debug: rework how unwind errors are printed, and add module name lookup for linux This change enhances stack trace output to include a note that debug info was missing, and therefore the stack trace may not be accurate. For example, if the user is using a libc compiled with -fomit-frame-pointer and doesn't have debug symbols installed, any traces that begin in a libc function may not unwind correctly. This allows the user to notice this and potentially install debug symbols to improve the output. --- lib/std/debug.zig | 105 ++++++++++++++++++++++++++++++++++++++++------ lib/std/dwarf.zig | 2 +- 2 files changed, 93 insertions(+), 14 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 539dc8034923..18c13ada83b2 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -182,6 +182,9 @@ pub fn dumpStackTraceFromBase(context: *const StackTraceContext) void { printSourceAtAddress(debug_info, stderr, it.dwarf_context.pc, tty_config) catch return; while (it.next()) |return_address| { + if (it.getLastError()) |unwind_error| + printUnwindError(debug_info, stderr, unwind_error.address, unwind_error.err, tty_config) catch {}; + // On arm64 macOS, the address of the last frame is 0x0 rather than 0x1 as on x86_64 macOS, // therefore, we do a check for `return_address == 0` before subtracting 1 from it to avoid // an overflow. We do not need to signal `StackIterator` as it will correctly detect this @@ -225,7 +228,9 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT } stack_trace.index = slice.len; } else { - // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required) + // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required). + // A new path for loading DebugInfo needs to be created which will only attempt to parse in-memory sections, because + // stopping to load other debug info (ie. source line info) from disk here is not required for unwinding. var it = StackIterator.init(first_address, null); defer it.deinit(); for (stack_trace.instruction_addresses, 0..) |*addr, i| { @@ -442,6 +447,11 @@ pub inline fn getContext(context: *StackTraceContext) bool { return have_getcontext and os.system.getcontext(context) == 0; } +pub const UnwindError = if (have_ucontext) + @typeInfo(@typeInfo(@TypeOf(StackIterator.next_dwarf)).Fn.return_type.?).ErrorUnion.error_set +else + void; + pub const StackIterator = struct { // Skip every frame before this address is found. first_address: ?usize, @@ -452,6 +462,8 @@ pub const StackIterator = struct { // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). debug_info: ?*DebugInfo, dwarf_context: if (have_ucontext) DW.UnwindContext else void = undefined, + last_error: if (have_ucontext) ?UnwindError else void = undefined, + last_error_address: if (have_ucontext) usize else void = undefined, pub fn init(first_address: ?usize, fp: ?usize) StackIterator { if (native_arch == .sparc64) { @@ -472,6 +484,7 @@ pub const StackIterator = struct { var iterator = init(first_address, null); iterator.debug_info = debug_info; iterator.dwarf_context = try DW.UnwindContext.init(context, &isValidMemory); + iterator.last_error = null; return iterator; } @@ -483,6 +496,23 @@ pub const StackIterator = struct { } } + pub fn getLastError(self: *StackIterator) ?struct { + address: usize, + err: UnwindError, + } { + if (have_ucontext) { + if (self.last_error) |err| { + self.last_error = null; + return .{ + .address = self.last_error_address, + .err = err, + }; + } + } + + return null; + } + // Offset of the saved BP wrt the frame pointer. const fp_offset = if (native_arch.isRISCV()) // On RISC-V the frame pointer points to the top of the saved register @@ -579,14 +609,10 @@ pub const StackIterator = struct { if (self.next_dwarf()) |return_address| { return return_address; } else |err| { - if (err != error.MissingFDE) print("DWARF unwind error: {}\n", .{err}); - - // Fall back to fp unwinding on the first failure, - // as the register context won't be updated - - // TODO: Could still attempt dwarf unwinding after this, maybe marking non-updated registers as - // invalid, so the unwind only fails if it requires out of date registers? + self.last_error = err; + self.last_error_address = self.dwarf_context.pc; + // Fall back to fp unwinding on the first failure, as the register context won't have been updated self.fp = self.dwarf_context.getFp() catch 0; self.debug_info = null; } @@ -640,6 +666,9 @@ pub fn writeCurrentStackTrace( defer it.deinit(); while (it.next()) |return_address| { + if (it.getLastError()) |unwind_error| + try printUnwindError(debug_info, out_stream, unwind_error.address, unwind_error.err, tty_config); + // On arm64 macOS, the address of the last frame is 0x0 rather than 0x1 as on x86_64 macOS, // therefore, we do a check for `return_address == 0` before subtracting 1 from it to avoid // an overflow. We do not need to signal `StackIterator` as it will correctly detect this @@ -785,6 +814,17 @@ fn printUnknownSource(debug_info: *DebugInfo, out_stream: anytype, address: usiz ); } +pub fn printUnwindError(debug_info: *DebugInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { + const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; + try tty_config.setColor(out_stream, .dim); + if (err != error.MissingDebugInfo) { + try out_stream.print("Unwind information for {s} was not available ({}), trace may be incomplete\n\n", .{ module_name, err }); + } else { + try out_stream.print("Unwind information for {s} was not available, trace may be incomplete\n\n", .{module_name}); + } + try tty_config.setColor(out_stream, .reset); +} + pub fn printSourceAtAddress(debug_info: *DebugInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), @@ -1099,16 +1139,14 @@ pub fn readElfDebugInfo( ) catch break :blk; for (global_debug_directories) |global_directory| { - // TODO: joinBuf would be ideal (with a fs.MAX_PATH_BYTES buffer) const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); defer allocator.free(path); - // TODO: Remove - std.debug.print(" Loading external debug info from {s}\n", .{path}); + return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; } } - // use the path from .gnu_debuglink, in the search order as gdb + // use the path from .gnu_debuglink, in the same search order as gdb if (separate_debug_filename) |separate_filename| blk: { if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; @@ -1456,6 +1494,9 @@ pub const DebugInfo = struct { } } + // Returns the module name for a given address. + // This can be called when getModuleForAddress fails, so implementations should provide + // a path that doesn't rely on any side-effects of successful module lookup. pub fn getModuleNameForAddress(self: *DebugInfo, address: usize) ?[]const u8 { if (comptime builtin.target.isDarwin()) { return null; @@ -1466,7 +1507,7 @@ pub const DebugInfo = struct { } else if (comptime builtin.target.isWasm()) { return null; } else { - return null; + return self.lookupModuleNameDl(address); } } @@ -1624,6 +1665,44 @@ pub const DebugInfo = struct { return null; } + fn lookupModuleNameDl(self: *DebugInfo, address: usize) ?[]const u8 { + _ = self; + + var ctx: struct { + // Input + address: usize, + // Output + name: []const u8 = "", + } = .{ .address = address }; + const CtxTy = @TypeOf(ctx); + + if (os.dl_iterate_phdr(&ctx, error{Found}, struct { + fn callback(info: *os.dl_phdr_info, size: usize, context: *CtxTy) !void { + _ = size; + if (context.address < info.dlpi_addr) return; + const phdrs = info.dlpi_phdr[0..info.dlpi_phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + const seg_start = info.dlpi_addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + context.name = mem.sliceTo(info.dlpi_name, 0) orelse ""; + break; + } + } else return; + + return error.Found; + } + }.callback)) { + return null; + } else |err| switch (err) { + error.Found => return fs.path.basename(ctx.name), + } + + return null; + } + fn lookupModuleDl(self: *DebugInfo, address: usize) !*ModuleDebugInfo { var ctx: struct { // Input diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 6aa5064dc225..abfd3f935814 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1036,7 +1036,7 @@ pub const DwarfInfo = struct { } // Returns the next range in the list, or null if the end was reached. - pub fn next(self: *@This()) !?struct{ start_addr: u64, end_addr: u64 } { + pub fn next(self: *@This()) !?struct { start_addr: u64, end_addr: u64 } { const in = self.stream.reader(); switch (self.section_type) { .debug_rnglists => { From b85f84061aebb6c61ab9ca42d8147e8b76154818 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 3 Jul 2023 03:45:06 -0400 Subject: [PATCH 43/81] dwarf: don't dupe function names, as they are backed by the memory mapped sections dwarf: const-correctness fixups dwarf: implement the remaining register rules dwarf: start implmenting the DWARF expression stack machine --- lib/std/debug.zig | 10 +- lib/std/dwarf.zig | 79 +++++++----- lib/std/dwarf/call_frame.zig | 69 +++++++---- lib/std/dwarf/expressions.zig | 220 +++++++++++++++++++++++++++++----- 4 files changed, 291 insertions(+), 87 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 18c13ada83b2..f231a4ac471c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -483,16 +483,14 @@ pub const StackIterator = struct { pub fn initWithContext(first_address: ?usize, debug_info: *DebugInfo, context: *const os.ucontext_t) !StackIterator { var iterator = init(first_address, null); iterator.debug_info = debug_info; - iterator.dwarf_context = try DW.UnwindContext.init(context, &isValidMemory); + iterator.dwarf_context = try DW.UnwindContext.init(debug_info.allocator, context, &isValidMemory); iterator.last_error = null; return iterator; } pub fn deinit(self: *StackIterator) void { - if (have_ucontext) { - if (self.debug_info) |debug_info| { - self.dwarf_context.deinit(debug_info.allocator); - } + if (have_ucontext and self.debug_info != null) { + self.dwarf_context.deinit(); } } @@ -599,7 +597,7 @@ pub const StackIterator = struct { if (try module.getDwarfInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc)) |di| { self.dwarf_context.reg_ctx.eh_frame = true; self.dwarf_context.reg_ctx.is_macho = di.is_macho; - return di.unwindFrame(self.debug_info.?.allocator, &self.dwarf_context, module.base_address); + return di.unwindFrame(&self.dwarf_context, module.base_address); } else return error.MissingDebugInfo; } diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index abfd3f935814..807bc09a2fcb 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -163,15 +163,9 @@ const PcRange = struct { const Func = struct { pc_range: ?PcRange, name: ?[]const u8, - - fn deinit(func: *Func, allocator: mem.Allocator) void { - if (func.name) |name| { - allocator.free(name); - } - } }; -const CompileUnit = struct { +pub const CompileUnit = struct { version: u16, is_64: bool, die: *Die, @@ -181,6 +175,7 @@ const CompileUnit = struct { addr_base: usize, rnglists_base: usize, loclists_base: usize, + frame_base: ?*const FormValue, }; const AbbrevTable = std.ArrayList(AbbrevTableEntry); @@ -216,7 +211,7 @@ const AbbrevAttr = struct { payload: i64, }; -const FormValue = union(enum) { +pub const FormValue = union(enum) { Address: u64, AddrOffset: usize, Block: []u8, @@ -298,7 +293,7 @@ const Die = struct { fn getAttrAddr( self: *const Die, - di: *DwarfInfo, + di: *const DwarfInfo, id: u64, compile_unit: CompileUnit, ) error{ InvalidDebugInfo, MissingDebugInfo }!u64 { @@ -708,9 +703,6 @@ pub const DwarfInfo = struct { allocator.destroy(cu.die); } di.compile_unit_list.deinit(allocator); - for (di.func_list.items) |*func| { - func.deinit(allocator); - } di.func_list.deinit(allocator); di.cie_map.deinit(allocator); di.fde_list.deinit(allocator); @@ -793,6 +785,7 @@ pub const DwarfInfo = struct { .addr_base = if (die_obj.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0, .rnglists_base = if (die_obj.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, .loclists_base = if (die_obj.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, + .frame_base = die_obj.getAttr(AT.frame_base), }; }, TAG.subprogram, TAG.inlined_subroutine, TAG.subroutine, TAG.entry_point => { @@ -802,8 +795,7 @@ pub const DwarfInfo = struct { // Prevent endless loops while (depth > 0) : (depth -= 1) { if (this_die_obj.getAttr(AT.name)) |_| { - const name = try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit); - break :x try allocator.dupe(u8, name); + break :x try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit); } else if (this_die_obj.getAttr(AT.abstract_origin)) |_| { // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin); @@ -834,7 +826,7 @@ pub const DwarfInfo = struct { break :x null; }; - var found_range = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: { + var range_added = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: { if (die_obj.getAttr(AT.high_pc)) |high_pc_value| { const pc_end = switch (high_pc_value.*) { FormValue.Address => |value| value, @@ -852,9 +844,11 @@ pub const DwarfInfo = struct { .end = pc_end, }, }); + + break :blk true; } - break :blk true; + break :blk false; } else |err| blk: { if (err != error.MissingDebugInfo) return err; break :blk false; @@ -867,7 +861,7 @@ pub const DwarfInfo = struct { }; while (try iter.next()) |range| { - found_range = true; + range_added = true; try di.func_list.append(allocator, Func{ .name = fn_name, .pc_range = .{ @@ -878,7 +872,7 @@ pub const DwarfInfo = struct { } } - if (!found_range) { + if (fn_name != null and !range_added) { try di.func_list.append(allocator, Func{ .name = fn_name, .pc_range = null, @@ -952,6 +946,7 @@ pub const DwarfInfo = struct { .addr_base = if (compile_unit_die.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0, .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, + .frame_base = compile_unit_die.getAttr(AT.frame_base), }; compile_unit.pc_range = x: { @@ -987,11 +982,11 @@ pub const DwarfInfo = struct { const DebugRangeIterator = struct { base_address: u64, section_type: DwarfSection, - di: *DwarfInfo, + di: *const DwarfInfo, compile_unit: *const CompileUnit, stream: io.FixedBufferStream([]const u8), - pub fn init(ranges_value: *const FormValue, di: *DwarfInfo, compile_unit: *const CompileUnit) !@This() { + pub fn init(ranges_value: *const FormValue, di: *const DwarfInfo, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) DwarfSection.debug_rnglists else DwarfSection.debug_ranges; const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo; @@ -1129,7 +1124,7 @@ pub const DwarfInfo = struct { } }; - pub fn findCompileUnit(di: *DwarfInfo, target_address: u64) !*const CompileUnit { + pub fn findCompileUnit(di: *const DwarfInfo, target_address: u64) !*const CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; @@ -1630,7 +1625,7 @@ pub const DwarfInfo = struct { } } - pub fn unwindFrame(di: *const DwarfInfo, allocator: mem.Allocator, context: *UnwindContext, module_base_address: usize) !usize { + pub fn unwindFrame(di: *const DwarfInfo, context: *UnwindContext, module_base_address: usize) !usize { if (!comptime abi.isSupportedArch(builtin.target.cpu.arch)) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return 0; @@ -1678,10 +1673,11 @@ pub const DwarfInfo = struct { cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; } + const compile_unit: ?*const CompileUnit = di.findCompileUnit(fde.pc_begin) catch null; context.vm.reset(); context.reg_ctx.eh_frame = cie.version != 4; - _ = try context.vm.runToNative(allocator, mapped_pc, cie, fde); + _ = try context.vm.runToNative(context.allocator, mapped_pc, cie, fde); const row = &context.vm.current_row; context.cfa = switch (row.cfa.rule) { @@ -1690,12 +1686,19 @@ pub const DwarfInfo = struct { const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, register, context.reg_ctx)); break :blk try call_frame.applyOffset(value, offset); }, - .expression => |expression| { - - // TODO: Evaluate expression - _ = expression; - - return error.UnimplementedTODO; + .expression => |expression| blk: { + context.stack_machine.reset(); + const value = try context.stack_machine.run( + expression, + context.allocator, + compile_unit, + &context.ucontext, + context.reg_ctx, + context.cfa orelse 0, + ); + + if (value != .generic) return error.InvalidExpressionValue; + break :blk value.generic; }, else => return error.InvalidCFARule, }; @@ -1713,7 +1716,13 @@ pub const DwarfInfo = struct { has_next_ip = column.rule != .undefined; } - try column.resolveValue(context.*, dest); + try column.resolveValue( + context, + compile_unit, + &context.ucontext, + context.reg_ctx, + dest, + ); } } @@ -1738,16 +1747,19 @@ pub const DwarfInfo = struct { }; pub const UnwindContext = struct { + allocator: mem.Allocator, cfa: ?usize, pc: usize, ucontext: os.ucontext_t, reg_ctx: abi.RegisterContext, isValidMemory: *const fn (address: usize) bool, vm: call_frame.VirtualMachine = .{}, + stack_machine: expressions.StackMachine(.{ .call_frame_mode = true }) = .{}, - pub fn init(ucontext: *const os.ucontext_t, isValidMemory: *const fn (address: usize) bool) !UnwindContext { + pub fn init(allocator: mem.Allocator, ucontext: *const os.ucontext_t, isValidMemory: *const fn (address: usize) bool) !UnwindContext { const pc = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, abi.ipRegNum(), null)); return .{ + .allocator = allocator, .cfa = null, .pc = pc, .ucontext = ucontext.*, @@ -1756,8 +1768,9 @@ pub const UnwindContext = struct { }; } - pub fn deinit(self: *UnwindContext, allocator: mem.Allocator) void { - self.vm.deinit(allocator); + pub fn deinit(self: *UnwindContext) void { + self.vm.deinit(self.allocator); + self.stack_machine.deinit(self.allocator); } pub fn getFp(self: *const UnwindContext) !usize { diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index f512d7a909bc..0fabaa70f095 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -183,9 +183,9 @@ pub const Instruction = union(Opcode) { offset_extended_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }), def_cfa_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }), def_cfa_offset_sf: InstructionType(.{ .offset = .sleb128_offset }), - val_offset: InstructionType(.{ .a = .uleb128_offset, .b = .uleb128_offset }), - val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }), - val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }), + val_offset: InstructionType(.{ .register = .uleb128_register, .offset = .uleb128_offset }), + val_offset_sf: InstructionType(.{ .register = .uleb128_register, .offset = .sleb128_offset }), + val_expression: InstructionType(.{ .register = .uleb128_register, .block = .block }), fn readOperands( self: *Instruction, @@ -292,7 +292,14 @@ pub const VirtualMachine = struct { rule: RegisterRule = .{ .default = {} }, /// Resolves the register rule and places the result into `out` (see dwarf.abi.regBytes) - pub fn resolveValue(self: Column, context: dwarf.UnwindContext, out: []u8) !void { + pub fn resolveValue( + self: Column, + context: *dwarf.UnwindContext, + compile_unit: ?*const dwarf.CompileUnit, + ucontext: *const std.os.ucontext_t, + reg_ctx: abi.RegisterContext, + out: []u8, + ) !void { switch (self.rule) { .default => { const register = self.register orelse return error.InvalidRegister; @@ -321,14 +328,21 @@ pub const VirtualMachine = struct { @memcpy(out, try abi.regBytes(&context.ucontext, register, context.reg_ctx)); }, .expression => |expression| { - // TODO - _ = expression; - unreachable; + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, compile_unit, ucontext, reg_ctx, context.cfa.?); + + if (value != .generic) return error.InvalidExpressionValue; + if (!context.isValidMemory(value.generic)) return error.InvalidExpressionAddress; + + const ptr: *usize = @ptrFromInt(value.generic); + mem.writeIntSliceNative(usize, out, ptr.*); }, .val_expression => |expression| { - // TODO - _ = expression; - unreachable; + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, compile_unit, ucontext, reg_ctx, context.cfa.?); + + if (value != .generic) return error.InvalidExpressionValue; + mem.writeIntSliceNative(usize, out, value.generic); }, .architectural => return error.UnimplementedRule, } @@ -546,12 +560,16 @@ pub const VirtualMachine = struct { .def_cfa_offset => |i| { try self.resolveCopyOnWrite(allocator); if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ .val_offset = @intCast(i.operands.offset) }; + self.current_row.cfa.rule = .{ + .val_offset = @intCast(i.operands.offset), + }; }, .def_cfa_offset_sf => |i| { try self.resolveCopyOnWrite(allocator); if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ .val_offset = i.operands.offset * cie.data_alignment_factor }; + self.current_row.cfa.rule = .{ + .val_offset = i.operands.offset * cie.data_alignment_factor, + }; }, .def_cfa_expression => |i| { try self.resolveCopyOnWrite(allocator); @@ -567,17 +585,26 @@ pub const VirtualMachine = struct { .expression = i.operands.block, }; }, - .val_offset => { - // TODO: Implement - unreachable; + .val_offset => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ + .val_offset = @as(i64, @intCast(i.operands.offset)) * cie.data_alignment_factor, + }; }, - .val_offset_sf => { - // TODO: Implement - unreachable; + .val_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ + .val_offset = i.operands.offset * cie.data_alignment_factor, + }; }, - .val_expression => { - // TODO: Implement - unreachable; + .val_expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.operands.register); + column.rule = .{ + .val_expression = i.operands.block, + }; }, } diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index f2b8bfc88136..49c548aecde2 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -2,6 +2,9 @@ const std = @import("std"); const builtin = @import("builtin"); const OP = @import("OP.zig"); const leb = @import("../leb128.zig"); +const dwarf = @import("../dwarf.zig"); +const abi = dwarf.abi; +const mem = std.mem; pub const StackMachineOptions = struct { /// The address size of the target architecture @@ -33,9 +36,10 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { }; return struct { - const Value = union(enum) { + const Self = @This(); + + const Operand = union(enum) { generic: addr_type, - const_type: []const u8, register: u8, base_register: struct { base_register: u8, @@ -46,7 +50,11 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { offset: i64, }, block: []const u8, - base_type: struct { + register_type: struct { + register: u8, + type_offset: u64, + }, + const_type: struct { type_offset: u64, value_bytes: []const u8, }, @@ -56,9 +64,31 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { }, }; + const Value = union(enum) { + generic: addr_type, + regval_type: struct { + // Offset of DW_TAG_base_type DIE + type_offset: u64, + value: addr_type, + }, + const_type: struct { + // Offset of DW_TAG_base_type DIE + type_offset: u64, + value_bytes: []const u8, + }, + }; + stack: std.ArrayListUnmanaged(Value) = .{}, - fn generic(value: anytype) Value { + pub fn reset(self: *Self) void { + self.stack.clearRetainingCapacity(); + } + + pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { + self.stack.deinit(allocator); + } + + fn generic(value: anytype) Operand { const int_info = @typeInfo(@TypeOf(value)).Int; if (@sizeOf(@TypeOf(value)) > options.addr_size) { return .{ .generic = switch (int_info.signedness) { @@ -73,7 +103,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { } } - pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8) !?Value { + pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8) !?Operand { const reader = stream.reader(); return switch (opcode) { OP.addr, @@ -87,8 +117,8 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { OP.const1s => generic(try reader.readByteSigned()), OP.const2u, OP.call2, - OP.call4, => generic(try reader.readInt(u16, options.endian)), + OP.call4 => generic(try reader.readInt(u32, options.endian)), OP.const2s, OP.bra, OP.skip, @@ -114,21 +144,35 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { .offset = try leb.readILEB128(i64, reader), } }, OP.regx => .{ .register = try leb.readULEB128(u8, reader) }, - OP.bregx, OP.regval_type => .{ .base_register = .{ - .base_register = try leb.readULEB128(u8, reader), - .offset = try leb.readILEB128(i64, reader), - } }, + OP.bregx => blk: { + const base_register = try leb.readULEB128(u8, reader); + const offset = try leb.readILEB128(i64, reader); + break :blk .{ .base_register = .{ + .base_register = base_register, + .offset = offset, + } }; + }, + OP.regval_type => blk: { + const register = try leb.readULEB128(u8, reader); + const type_offset = try leb.readULEB128(u64, reader); + break :blk .{ .register_type = .{ + .register = register, + .type_offset = type_offset, + } }; + }, OP.piece => .{ .composite_location = .{ .size = try leb.readULEB128(u8, reader), .offset = 0, }, }, - OP.bit_piece => .{ - .composite_location = .{ - .size = try leb.readULEB128(u8, reader), - .offset = try leb.readILEB128(i64, reader), - }, + OP.bit_piece => blk: { + const size = try leb.readULEB128(u8, reader); + const offset = try leb.readILEB128(i64, reader); + break :blk .{ .composite_location = .{ + .size = size, + .offset = offset, + } }; }, OP.implicit_value, OP.entry_value => blk: { const size = try leb.readULEB128(u8, reader); @@ -145,7 +189,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; const value_bytes = stream.buffer[stream.pos..][0..size]; stream.pos += size; - break :blk .{ .base_type = .{ + break :blk .{ .const_type = .{ .type_offset = type_offset, .value_bytes = value_bytes, } }; @@ -163,22 +207,144 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { }; } + pub fn run( + self: *Self, + expression: []const u8, + allocator: std.mem.Allocator, + compile_unit: ?*const dwarf.CompileUnit, + ucontext: *const std.os.ucontext_t, + reg_ctx: abi.RegisterContext, + initial_value: usize, + ) !Value { + try self.stack.append(allocator, .{ .generic = initial_value }); + var stream = std.io.fixedBufferStream(expression); + while (try self.step(&stream, allocator, compile_unit, ucontext, reg_ctx)) {} + if (self.stack.items.len == 0) return error.InvalidExpression; + return self.stack.items[self.stack.items.len - 1]; + } + + /// Reads an opcode and its operands from the stream and executes it pub fn step( - self: *StackMachine, - stream: std.io.FixedBufferStream([]const u8), + self: *Self, + stream: *std.io.FixedBufferStream([]const u8), allocator: std.mem.Allocator, - ) !void { - if (@sizeOf(usize) != addr_type or options.endian != builtin.target.cpu.arch.endian()) + compile_unit: ?*const dwarf.CompileUnit, + ucontext: *const std.os.ucontext_t, + reg_ctx: dwarf.abi.RegisterContext, + ) !bool { + if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian()) @compileError("Execution of non-native address sizees / endianness is not supported"); - const opcode = try stream.reader.readByte(); - _ = opcode; - _ = self; - _ = allocator; + const opcode = try stream.reader().readByte(); + if (options.call_frame_mode) { + // Certain opcodes are not allowed in a CFA context, see 6.4.2 + switch (opcode) { + OP.addrx, + OP.call2, + OP.call4, + OP.call_ref, + OP.const_type, + OP.constx, + OP.convert, + OP.deref_type, + OP.regval_type, + OP.reinterpret, + OP.push_object_address, + OP.call_frame_cfa, + => return error.InvalidCFAExpression, + else => {}, + } + } + + switch (opcode) { + + // 2.5.1.1: Literal Encodings + OP.lit0...OP.lit31, + OP.addr, + OP.const1u, + OP.const2u, + OP.const4u, + OP.const8u, + OP.const1s, + OP.const2s, + OP.const4s, + OP.const8s, + OP.constu, + OP.consts, + => try self.stack.append(allocator, .{ .generic = (try readOperand(stream, opcode)).?.generic }), + + OP.const_type => { + const const_type = (try readOperand(stream, opcode)).?.const_type; + try self.stack.append(allocator, .{ .const_type = .{ + .type_offset = const_type.type_offset, + .value_bytes = const_type.value_bytes, + } }); + }, + + OP.addrx, OP.constx => { + const debug_addr_index = (try readOperand(stream, opcode)).?.generic; + + // TODO: Read item from .debug_addr, this requires need DW_AT_addr_base of the compile unit, push onto stack as generic + + _ = debug_addr_index; + unreachable; + }, + + // 2.5.1.2: Register Values + OP.fbreg => { + if (compile_unit == null) return error.ExpressionRequiresCompileUnit; + if (compile_unit.?.frame_base == null) return error.ExpressionRequiresFrameBase; + + const offset: i64 = @intCast((try readOperand(stream, opcode)).?.generic); + _ = offset; + + switch (compile_unit.?.frame_base.?.*) { + .ExprLoc => { + // TODO: Run this expression in a nested stack machine + return error.UnimplementedOpcode; + }, + .LocListOffset => { + // TODO: Read value from .debug_loclists + return error.UnimplementedOpcode; + }, + .SecOffset => { + // TODO: Read value from .debug_loclists + return error.UnimplementedOpcode; + }, + else => return error.InvalidFrameBase, + } + }, + OP.breg0...OP.breg31, OP.bregx => { + const base_register = (try readOperand(stream, opcode)).?.base_register; + var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(ucontext, base_register.base_register, reg_ctx))); + value += base_register.offset; + try self.stack.append(allocator, .{ .generic = @intCast(value) }); + }, + OP.regval_type => { + const register_type = (try readOperand(stream, opcode)).?.register_type; + const value = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, register_type.register, reg_ctx)); + try self.stack.append(allocator, .{ + .regval_type = .{ + .value = value, + .type_offset = register_type.type_offset, + }, + }); + }, + + // 2.5.1.3: Stack Operations + + OP.dup => {}, + + else => { + std.debug.print("Unimplemented DWARF expression opcode: {x}\n", .{opcode}); + unreachable; + }, + + // These have already been handled by readOperand + OP.lo_user...OP.hi_user => unreachable, + } - // switch (opcode) { - // OP.addr => try self.stack.append(allocator, try readOperand(stream, opcode)), - // } + return stream.pos < stream.buffer.len; } }; } From 412cd789bf38a8fb6126803b8eab601b700e5a9e Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 3 Jul 2023 14:31:29 -0400 Subject: [PATCH 44/81] debug: fixup base address calculations for macho dwarf: fixup x86 register mapping logic dwarf: change the register context update to update in-place instead of copying debug: always print the unwind error type --- lib/std/c/darwin.zig | 2 ++ lib/std/c/darwin/x86_64.zig | 23 +++++++++++++ lib/std/debug.zig | 32 ++++++++++-------- lib/std/dwarf.zig | 35 ++++++++++++++++--- lib/std/dwarf/abi.zig | 67 +++++++++++++++++++------------------ 5 files changed, 108 insertions(+), 51 deletions(-) diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 0f60c2f841d4..5f281912215b 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -148,11 +148,13 @@ pub const ucontext_t = extern struct { link: ?*ucontext_t, mcsize: u64, mcontext: *mcontext_t, + __mcontext_data: mcontext_t, }; pub const mcontext_t = extern struct { es: arch_bits.exception_state, ss: arch_bits.thread_state, + fs: arch_bits.float_state, }; extern "c" fn __error() *c_int; diff --git a/lib/std/c/darwin/x86_64.zig b/lib/std/c/darwin/x86_64.zig index c7671bc23a14..db94840d9da2 100644 --- a/lib/std/c/darwin/x86_64.zig +++ b/lib/std/c/darwin/x86_64.zig @@ -31,6 +31,29 @@ pub const thread_state = extern struct { gs: u64, }; +const stmm_reg = [16]u8; +const xmm_reg = [16]u8; +pub const float_state = extern struct { + reserved: [2]c_int, + fcw: u16, + fsw: u16, + ftw: u8, + rsrv1: u8, + fop: u16, + ip: u32, + cs: u16, + rsrv2: u16, + dp: u32, + ds: u16, + rsrv3: u16, + mxcsr: u32, + mxcsrmask: u32, + stmm: [8]stmm_reg, + xmm: [16]xmm_reg, + rsrv4: [96]u8, + reserved1: c_int, +}; + pub const THREAD_STATE = 4; pub const THREAD_STATE_COUNT: c.mach_msg_type_number_t = @sizeOf(thread_state) / @sizeOf(c_int); diff --git a/lib/std/debug.zig b/lib/std/debug.zig index f231a4ac471c..249674e0d40d 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -444,7 +444,10 @@ pub inline fn getContext(context: *StackTraceContext) bool { return true; } - return have_getcontext and os.system.getcontext(context) == 0; + const result = have_getcontext and os.system.getcontext(context) == 0; + if (native_os == .macos) assert(context.mcsize == @sizeOf(std.c.mcontext_t)); + + return result; } pub const UnwindError = if (have_ucontext) @@ -553,6 +556,7 @@ pub const StackIterator = struct { if (native_os == .freestanding) return true; const aligned_address = address & ~@as(usize, @intCast((mem.page_size - 1))); + if (aligned_address == 0) return false; const aligned_memory = @as([*]align(mem.page_size) u8, @ptrFromInt(aligned_address))[0..mem.page_size]; if (native_os != .windows) { @@ -815,11 +819,7 @@ fn printUnknownSource(debug_info: *DebugInfo, out_stream: anytype, address: usiz pub fn printUnwindError(debug_info: *DebugInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); - if (err != error.MissingDebugInfo) { - try out_stream.print("Unwind information for {s} was not available ({}), trace may be incomplete\n\n", .{ module_name, err }); - } else { - try out_stream.print("Unwind information for {s} was not available, trace may be incomplete\n\n", .{module_name}); - } + try out_stream.print("Unwind information for {s} was not available ({}), trace may be incomplete\n\n", .{ module_name, err }); try tty_config.setColor(out_stream, .reset); } @@ -1309,6 +1309,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn return ModuleDebugInfo{ .base_address = undefined, + .vmaddr_slide = undefined, .mapped_memory = mapped_mem, .ofiles = ModuleDebugInfo.OFileTable.init(allocator), .symbols = symbols, @@ -1514,11 +1515,10 @@ pub const DebugInfo = struct { var i: u32 = 0; while (i < image_count) : (i += 1) { - const base_address = std.c._dyld_get_image_vmaddr_slide(i); - - if (address < base_address) continue; - const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); var it = macho.LoadCommandIterator{ .ncmds = header.ncmds, @@ -1527,14 +1527,16 @@ pub const DebugInfo = struct { @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), )[0..header.sizeofcmds]), }; + while (it.next()) |cmd| switch (cmd.cmd()) { .SEGMENT_64 => { const segment_cmd = cmd.cast(macho.segment_command_64).?; - const rebased_address = address - base_address; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const original_address = address - vmaddr_slide; const seg_start = segment_cmd.vmaddr; const seg_end = seg_start + segment_cmd.vmsize; - - if (rebased_address >= seg_start and rebased_address < seg_end) { + if (original_address >= seg_start and original_address < seg_end) { if (self.address_map.get(base_address)) |obj_di| { return obj_di; } @@ -1551,6 +1553,7 @@ pub const DebugInfo = struct { }; obj_di.* = try readMachODebugInfo(self.allocator, macho_file); obj_di.base_address = base_address; + obj_di.vmaddr_slide = vmaddr_slide; try self.address_map.putNoClobber(base_address, obj_di); @@ -1808,6 +1811,7 @@ pub const DebugInfo = struct { pub const ModuleDebugInfo = switch (native_os) { .macos, .ios, .watchos, .tvos => struct { base_address: usize, + vmaddr_slide: usize, mapped_memory: []align(mem.page_size) const u8, symbols: []const MachoSymbol, strings: [:0]const u8, @@ -1972,7 +1976,7 @@ pub const ModuleDebugInfo = switch (native_os) { } { nosuspend { // Translate the VA into an address into this object - const relocated_address = address - self.base_address; + const relocated_address = address - self.vmaddr_slide; // Find the .o file where this symbol is defined const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 807bc09a2fcb..d5f41a74bd7e 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1705,28 +1705,53 @@ pub const DwarfInfo = struct { if (!context.isValidMemory(context.cfa.?)) return error.InvalidCFA; - // Update the context with the previous frame's values - var next_ucontext = context.ucontext; + // Buffering the modifications is done because copying the ucontext is not portable, + // some implementations (ie. darwin) use internal pointers to the mcontext. + var arena = std.heap.ArenaAllocator.init(context.allocator); + defer arena.deinit(); + const update_allocator = arena.allocator(); + + const RegisterUpdate = struct { + // Backed by ucontext + old_value: []u8, + // Backed by arena + new_value: []const u8, + prev: ?*@This(), + }; + var update_tail: ?*RegisterUpdate = null; var has_next_ip = false; for (context.vm.rowColumns(row.*)) |column| { if (column.register) |register| { - const dest = try abi.regBytes(&next_ucontext, register, context.reg_ctx); if (register == cie.return_address_register) { has_next_ip = column.rule != .undefined; } + const old_value = try abi.regBytes(&context.ucontext, register, context.reg_ctx); + const new_value = try update_allocator.alloc(u8, old_value.len); + + const prev = update_tail; + update_tail = try update_allocator.create(RegisterUpdate); + update_tail.?.* = .{ + .old_value = old_value, + .new_value = new_value, + .prev = prev, + }; + try column.resolveValue( context, compile_unit, &context.ucontext, context.reg_ctx, - dest, + new_value, ); } } - context.ucontext = next_ucontext; + while (update_tail) |tail| { + @memcpy(tail.old_value, tail.new_value); + update_tail = tail.prev; + } if (has_next_ip) { context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, comptime abi.ipRegNum(), context.reg_ctx)); diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index ab1213eb92c4..7b1418a293ae 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -68,38 +68,41 @@ pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext var m = &ucontext_ptr.mcontext; return switch (builtin.cpu.arch) { - .x86 => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EAX]), - 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ECX]), - 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EDX]), - 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBX]), - 4...5 => if (reg_ctx) |r| bytes: { - if (reg_number == 4) { - break :bytes if (r.eh_frame and r.is_macho) - mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBP]) - else - mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESP]); - } else { - break :bytes if (r.eh_frame and r.is_macho) - mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESP]) - else - mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBP]); - } - } else error.RegisterContextRequired, - 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESI]), - 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EDI]), - 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EIP]), - 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EFL]), - 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.CS]), - 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.SS]), - 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.DS]), - 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ES]), - 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.FS]), - 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.GS]), - 16...23 => error.InvalidRegister, // TODO: Support loading ST0-ST7 from mcontext.fpregs - // TODO: Map TRAPNO, ERR, UESP - 32...39 => error.InvalidRegister, // TODO: Support loading XMM0-XMM7 from mcontext.fpregs - else => error.InvalidRegister, + .x86 => switch (builtin.os.tag) { + .linux, .netbsd, .solaris => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EAX]), + 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ECX]), + 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EDX]), + 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBX]), + 4...5 => if (reg_ctx) |r| bytes: { + if (reg_number == 4) { + break :bytes if (r.eh_frame and r.is_macho) + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBP]) + else + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESP]); + } else { + break :bytes if (r.eh_frame and r.is_macho) + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESP]) + else + mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBP]); + } + } else error.RegisterContextRequired, + 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ESI]), + 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EDI]), + 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EIP]), + 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EFL]), + 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.CS]), + 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.SS]), + 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.DS]), + 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ES]), + 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.FS]), + 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.GS]), + 16...23 => error.InvalidRegister, // TODO: Support loading ST0-ST7 from mcontext.fpregs + // TODO: Map TRAPNO, ERR, UESP + 32...39 => error.InvalidRegister, // TODO: Support loading XMM0-XMM7 from mcontext.fpregs + else => error.InvalidRegister, + }, + else => error.UnimplementedOs, }, .x86_64 => switch (builtin.os.tag) { .linux, .netbsd, .solaris => switch (reg_number) { From 576ffaa3298b4e99b7e46d4eccad4763b18985f8 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 4 Jul 2023 13:40:50 -0400 Subject: [PATCH 45/81] darwin: update mcontext_t definition for aarch64 to add neon state --- lib/std/c/darwin.zig | 6 +----- lib/std/c/darwin/aarch64.zig | 13 +++++++++++++ lib/std/c/darwin/x86_64.zig | 6 ++++++ lib/std/debug.zig | 9 ++++++++- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 5f281912215b..1901271b83c2 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -151,11 +151,7 @@ pub const ucontext_t = extern struct { __mcontext_data: mcontext_t, }; -pub const mcontext_t = extern struct { - es: arch_bits.exception_state, - ss: arch_bits.thread_state, - fs: arch_bits.float_state, -}; +pub const mcontext_t = arch_bits.mcontext_t; extern "c" fn __error() *c_int; pub extern "c" fn NSVersionOfRunTimeLibrary(library_name: [*:0]const u8) u32; diff --git a/lib/std/c/darwin/aarch64.zig b/lib/std/c/darwin/aarch64.zig index 48b03363a165..d00b92af8383 100644 --- a/lib/std/c/darwin/aarch64.zig +++ b/lib/std/c/darwin/aarch64.zig @@ -1,5 +1,12 @@ // See C headers in // lib/libc/include/aarch64-macos.12-gnu/mach/arm/_structs.h +// lib/libc/include/aarch64-macos.13-none/arm/_mcontext.h + +pub const mcontext_t = extern struct { + es: exception_state, + ss: thread_state, + ns: neon_state, +}; pub const exception_state = extern struct { far: u64, // Virtual Fault Address @@ -17,6 +24,12 @@ pub const thread_state = extern struct { __pad: u32, }; +pub const neon_state = extern struct { + q: [32]u128, + fpsr: u32, + fpcr: u32, +}; + pub const EXC_TYPES_COUNT = 14; pub const EXC_MASK_MACHINE = 0; diff --git a/lib/std/c/darwin/x86_64.zig b/lib/std/c/darwin/x86_64.zig index db94840d9da2..7b66fb2e9798 100644 --- a/lib/std/c/darwin/x86_64.zig +++ b/lib/std/c/darwin/x86_64.zig @@ -1,5 +1,11 @@ const c = @import("../darwin.zig"); +pub const mcontext_t = extern struct { + es: exception_state, + ss: thread_state, + fs: float_state, +}; + pub const exception_state = extern struct { trapno: u16, cpu: u16, diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 249674e0d40d..eef65bd6770e 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -445,7 +445,14 @@ pub inline fn getContext(context: *StackTraceContext) bool { } const result = have_getcontext and os.system.getcontext(context) == 0; - if (native_os == .macos) assert(context.mcsize == @sizeOf(std.c.mcontext_t)); + if (native_os == .macos) { + // TODO: Temp, to discover this size via aarch64 CI + if (context.mcsize != @sizeOf(std.c.mcontext_t)) { + print("context.mcsize does not match! {} vs {}\n", .{ context.mcsize, @sizeOf(std.c.mcontext_t)}); + } + + assert(context.mcsize == @sizeOf(std.c.mcontext_t)); + } return result; } From ad5f74c0b1762e16d98115d7cc7c7f58c40aee7b Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 5 Jul 2023 09:55:54 -0400 Subject: [PATCH 46/81] dwarf: introduce ExpressionContext, add more expression opcodes --- lib/std/debug.zig | 2 +- lib/std/dwarf.zig | 18 ++-- lib/std/dwarf/call_frame.zig | 24 ++--- lib/std/dwarf/expressions.zig | 188 ++++++++++++++++++++++++++++++---- 4 files changed, 186 insertions(+), 46 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index eef65bd6770e..38b2cfc59b5a 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -448,7 +448,7 @@ pub inline fn getContext(context: *StackTraceContext) bool { if (native_os == .macos) { // TODO: Temp, to discover this size via aarch64 CI if (context.mcsize != @sizeOf(std.c.mcontext_t)) { - print("context.mcsize does not match! {} vs {}\n", .{ context.mcsize, @sizeOf(std.c.mcontext_t)}); + print("context.mcsize does not match! {} vs {}\n", .{ context.mcsize, @sizeOf(std.c.mcontext_t) }); } assert(context.mcsize == @sizeOf(std.c.mcontext_t)); diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index d5f41a74bd7e..3a8ea847bf22 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1673,7 +1673,14 @@ pub const DwarfInfo = struct { cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; } - const compile_unit: ?*const CompileUnit = di.findCompileUnit(fde.pc_begin) catch null; + var expression_context = .{ + .isValidMemory = context.isValidMemory, + .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, + .ucontext = &context.ucontext, + .reg_ctx = context.reg_ctx, + .cfa = context.cfa, + }; + context.vm.reset(); context.reg_ctx.eh_frame = cie.version != 4; @@ -1691,9 +1698,7 @@ pub const DwarfInfo = struct { const value = try context.stack_machine.run( expression, context.allocator, - compile_unit, - &context.ucontext, - context.reg_ctx, + expression_context, context.cfa orelse 0, ); @@ -1704,6 +1709,7 @@ pub const DwarfInfo = struct { }; if (!context.isValidMemory(context.cfa.?)) return error.InvalidCFA; + expression_context.cfa = context.cfa; // Buffering the modifications is done because copying the ucontext is not portable, // some implementations (ie. darwin) use internal pointers to the mcontext. @@ -1740,9 +1746,7 @@ pub const DwarfInfo = struct { try column.resolveValue( context, - compile_unit, - &context.ucontext, - context.reg_ctx, + expression_context, new_value, ); } diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 0fabaa70f095..45c947d88b4b 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -218,13 +218,7 @@ pub const Instruction = union(Opcode) { break :blk result; }, Opcode.lo_user...Opcode.hi_user => error.UnimplementedUserOpcode, - else => |opcode| blk: { - - // TODO: Remove this - std.debug.print("Opcode {x}\n", .{opcode}); - - break :blk error.InvalidOpcode; - }, + else => error.InvalidOpcode, }; } }; @@ -295,9 +289,7 @@ pub const VirtualMachine = struct { pub fn resolveValue( self: Column, context: *dwarf.UnwindContext, - compile_unit: ?*const dwarf.CompileUnit, - ucontext: *const std.os.ucontext_t, - reg_ctx: abi.RegisterContext, + expression_context: dwarf.expressions.ExpressionContext, out: []u8, ) !void { switch (self.rule) { @@ -311,9 +303,9 @@ pub const VirtualMachine = struct { .same_value => {}, .offset => |offset| { if (context.cfa) |cfa| { - const ptr: *const usize = @ptrFromInt(try applyOffset(cfa, offset)); - - // TODO: context.isValidMemory(ptr) + const addr = try applyOffset(cfa, offset); + if (expression_context.isValidMemory) |isValidMemory| if (!isValidMemory(addr)) return error.InvalidAddress; + const ptr: *const usize = @ptrFromInt(addr); mem.writeIntSliceNative(usize, out, ptr.*); } else return error.InvalidCFA; }, @@ -329,7 +321,7 @@ pub const VirtualMachine = struct { }, .expression => |expression| { context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, compile_unit, ucontext, reg_ctx, context.cfa.?); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); if (value != .generic) return error.InvalidExpressionValue; if (!context.isValidMemory(value.generic)) return error.InvalidExpressionAddress; @@ -339,12 +331,12 @@ pub const VirtualMachine = struct { }, .val_expression => |expression| { context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, compile_unit, ucontext, reg_ctx, context.cfa.?); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); if (value != .generic) return error.InvalidExpressionValue; mem.writeIntSliceNative(usize, out, value.generic); }, - .architectural => return error.UnimplementedRule, + .architectural => return error.UnimplementedRegisterRule, } } }; diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 49c548aecde2..e5f553ae2957 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -17,6 +17,24 @@ pub const StackMachineOptions = struct { call_frame_mode: bool = false, }; +/// Expressions can be evaluated in different contexts, each requiring its own set of inputs. +/// Callers should specify all the fields relevant to their context. If a field is required +/// by the expression and it isn't in the context, error.IncompleteExpressionContext is returned. +pub const ExpressionContext = struct { + /// If specified, any addresses will pass through this function before being + isValidMemory: ?*const fn (address: usize) bool = null, + + /// The compilation unit this expression relates to, if any + compile_unit: ?*const dwarf.CompileUnit = null, + + /// Register context + ucontext: ?*std.os.ucontext_t, + reg_ctx: ?abi.RegisterContext, + + /// Call frame address, if in a CFI context + cfa: ?usize, +}; + /// A stack machine that can decode and run DWARF expressions. /// Expressions can be decoded for non-native address size and endianness, /// but can only be executed if the current target matches the configuration. @@ -41,6 +59,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { const Operand = union(enum) { generic: addr_type, register: u8, + type_size: u8, base_register: struct { base_register: u8, offset: i64, @@ -60,22 +79,46 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { }, deref_type: struct { size: u8, - offset: u64, + type_offset: u64, }, }; const Value = union(enum) { generic: addr_type, + + // Typed value with a maximum size of a register regval_type: struct { // Offset of DW_TAG_base_type DIE type_offset: u64, + type_size: u8, value: addr_type, }, + + // Typed value specified directly in the instruction stream const_type: struct { // Offset of DW_TAG_base_type DIE type_offset: u64, + // Backed by the instruction stream value_bytes: []const u8, }, + + pub fn asIntegral(self: Value) !addr_type { + return switch (self) { + .generic => |v| v, + + // TODO: For these two prongs, look up the type and assert it's integral? + .regval_type => |regval_type| regval_type.value, + .const_type => |const_type| { + return switch (const_type.value_bytes.len) { + 1 => mem.readIntSliceNative(u8, const_type.value_bytes), + 2 => mem.readIntSliceNative(u16, const_type.value_bytes), + 4 => mem.readIntSliceNative(u32, const_type.value_bytes), + 8 => mem.readIntSliceNative(u64, const_type.value_bytes), + else => return error.InvalidIntegralTypeLength, + }; + }, + }; + } }; stack: std.ArrayListUnmanaged(Value) = .{}, @@ -111,9 +154,10 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { => generic(try reader.readInt(addr_type, options.endian)), OP.const1u, OP.pick, + => generic(try reader.readByte()), OP.deref_size, OP.xderef_size, - => generic(try reader.readByte()), + => .{ .type_size = try reader.readByte() }, OP.const1s => generic(try reader.readByteSigned()), OP.const2u, OP.call2, @@ -199,7 +243,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { => .{ .deref_type = .{ .size = try reader.readByte(), - .offset = try leb.readULEB128(u64, reader), + .type_offset = try leb.readULEB128(u64, reader), }, }, OP.lo_user...OP.hi_user => return error.UnimplementedUserOpcode, @@ -211,14 +255,12 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { self: *Self, expression: []const u8, allocator: std.mem.Allocator, - compile_unit: ?*const dwarf.CompileUnit, - ucontext: *const std.os.ucontext_t, - reg_ctx: abi.RegisterContext, + context: ExpressionContext, initial_value: usize, ) !Value { try self.stack.append(allocator, .{ .generic = initial_value }); var stream = std.io.fixedBufferStream(expression); - while (try self.step(&stream, allocator, compile_unit, ucontext, reg_ctx)) {} + while (try self.step(&stream, allocator, context)) {} if (self.stack.items.len == 0) return error.InvalidExpression; return self.stack.items[self.stack.items.len - 1]; } @@ -228,9 +270,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { self: *Self, stream: *std.io.FixedBufferStream([]const u8), allocator: std.mem.Allocator, - compile_unit: ?*const dwarf.CompileUnit, - ucontext: *const std.os.ucontext_t, - reg_ctx: dwarf.abi.RegisterContext, + context: ExpressionContext, ) !bool { if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian()) @compileError("Execution of non-native address sizees / endianness is not supported"); @@ -281,7 +321,9 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { } }); }, - OP.addrx, OP.constx => { + OP.addrx, + OP.constx, + => { const debug_addr_index = (try readOperand(stream, opcode)).?.generic; // TODO: Read item from .debug_addr, this requires need DW_AT_addr_base of the compile unit, push onto stack as generic @@ -292,13 +334,13 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { // 2.5.1.2: Register Values OP.fbreg => { - if (compile_unit == null) return error.ExpressionRequiresCompileUnit; - if (compile_unit.?.frame_base == null) return error.ExpressionRequiresFrameBase; + if (context.compile_unit == null) return error.ExpressionRequiresCompileUnit; + if (context.compile_unit.?.frame_base == null) return error.ExpressionRequiresFrameBase; const offset: i64 = @intCast((try readOperand(stream, opcode)).?.generic); _ = offset; - switch (compile_unit.?.frame_base.?.*) { + switch (context.compile_unit.?.frame_base.?.*) { .ExprLoc => { // TODO: Run this expression in a nested stack machine return error.UnimplementedOpcode; @@ -314,34 +356,136 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { else => return error.InvalidFrameBase, } }, - OP.breg0...OP.breg31, OP.bregx => { + OP.breg0...OP.breg31, + OP.bregx, + => { + if (context.ucontext == null) return error.IncompleteExpressionContext; + const base_register = (try readOperand(stream, opcode)).?.base_register; - var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(ucontext, base_register.base_register, reg_ctx))); + var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, base_register.base_register, context.reg_ctx))); value += base_register.offset; try self.stack.append(allocator, .{ .generic = @intCast(value) }); }, OP.regval_type => { const register_type = (try readOperand(stream, opcode)).?.register_type; - const value = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, register_type.register, reg_ctx)); + const value = mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, register_type.register, context.reg_ctx)); try self.stack.append(allocator, .{ .regval_type = .{ - .value = value, .type_offset = register_type.type_offset, + .type_size = @sizeOf(addr_type), + .value = value, }, }); }, // 2.5.1.3: Stack Operations + OP.dup => { + if (self.stack.items.len == 0) return error.InvalidExpression; + try self.stack.append(allocator, self.stack.items[self.stack.items.len - 1]); + }, + OP.drop => { + _ = self.stack.pop(); + }, + OP.pick, OP.over => { + const stack_index = if (opcode == OP.over) 1 else (try readOperand(stream, opcode)).?.generic; + if (stack_index >= self.stack.items.len) return error.InvalidExpression; + try self.stack.append(allocator, self.stack.items[self.stack.items.len - 1 - stack_index]); + }, + OP.swap => { + if (self.stack.items.len < 2) return error.InvalidExpression; + mem.swap(Value, &self.stack.items[self.stack.items.len - 1], &self.stack.items[self.stack.items.len - 2]); + }, + OP.rot => { + if (self.stack.items.len < 3) return error.InvalidExpression; + const first = self.stack.items[self.stack.items.len - 1]; + self.stack.items[self.stack.items.len - 1] = self.stack.items[self.stack.items.len - 2]; + self.stack.items[self.stack.items.len - 2] = self.stack.items[self.stack.items.len - 3]; + self.stack.items[self.stack.items.len - 3] = first; + }, + OP.deref, + OP.xderef, + OP.deref_size, + OP.xderef_size, + OP.deref_type, + OP.xderef_type, + => { + if (self.stack.items.len == 0) return error.InvalidExpression; + var addr = try self.stack.pop().asIntegral(); + const addr_space_identifier: ?usize = switch (opcode) { + OP.xderef, + OP.xderef_size, + OP.xderef_type, + => try self.stack.pop().asIntegral(), + else => null, + }; - OP.dup => {}, + // Usage of addr_space_identifier in the address calculation is implementation defined. + // This code will need to be updated to handle any architectures that utilize this. + _ = addr_space_identifier; - else => { - std.debug.print("Unimplemented DWARF expression opcode: {x}\n", .{opcode}); - unreachable; + if (context.isValidMemory) |isValidMemory| if (!isValidMemory(addr)) return error.InvalidExpression; + + const operand = try readOperand(stream, opcode); + const size = switch (opcode) { + OP.deref => @sizeOf(addr_type), + OP.deref_size, + OP.xderef_size, + => operand.?.type_size, + OP.deref_type, + OP.xderef_type, + => operand.?.deref_type.size, + else => unreachable, + }; + + const value: u64 = switch (size) { + 1 => @as(*const u8, @ptrFromInt(addr)).*, + 2 => @as(*const u16, @ptrFromInt(addr)).*, + 4 => @as(*const u32, @ptrFromInt(addr)).*, + 8 => @as(*const u64, @ptrFromInt(addr)).*, + else => return error.InvalidExpression, + }; + + if (opcode == OP.deref_type) { + try self.stack.append(allocator, .{ + .regval_type = .{ + .type_offset = operand.?.deref_type.type_offset, + .type_size = operand.?.deref_type.size, + .value = value, + }, + }); + } else { + try self.stack.append(allocator, .{ .generic = value }); + } + }, + OP.push_object_address, + OP.form_tls_address, + => { + return error.UnimplementedExpressionOpcode; + }, + OP.call_frame_cfa => { + if (context.cfa) |cfa| { + try self.stack.append(allocator, .{ .generic = cfa }); + } else return error.IncompleteExpressionContext; + }, + + // 2.5.1.4: Arithmetic and Logical Operations + OP.abs => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const value: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ .generic = std.math.absCast(value) }; + }, + OP.@"and" => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ .generic = a & try self.stack.items[self.stack.items.len - 1].asIntegral() }; }, // These have already been handled by readOperand OP.lo_user...OP.hi_user => unreachable, + else => { + //std.debug.print("Unimplemented DWARF expression opcode: {x}\n", .{opcode}); + return error.UnknownExpressionOpcode; + }, } return stream.pos < stream.buffer.len; From 424b1299a88b4ac3ae50128118ed510a79c3ab4b Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 5 Jul 2023 13:36:28 -0400 Subject: [PATCH 47/81] dwarf: add expression writer --- lib/std/dwarf/expressions.zig | 503 ++++++++++++++++++++++++++++++++-- 1 file changed, 483 insertions(+), 20 deletions(-) diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index e5f553ae2957..cc6b68793094 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -5,17 +5,7 @@ const leb = @import("../leb128.zig"); const dwarf = @import("../dwarf.zig"); const abi = dwarf.abi; const mem = std.mem; - -pub const StackMachineOptions = struct { - /// The address size of the target architecture - addr_size: u8 = @sizeOf(usize), - - /// Endianess of the target architecture - endian: std.builtin.Endian = .Little, - - /// Restrict the stack machine to a subset of opcodes used in call frame instructions - call_frame_mode: bool = false, -}; +const assert = std.debug.assert; /// Expressions can be evaluated in different contexts, each requiring its own set of inputs. /// Callers should specify all the fields relevant to their context. If a field is required @@ -35,10 +25,21 @@ pub const ExpressionContext = struct { cfa: ?usize, }; +pub const ExpressionOptions = struct { + /// The address size of the target architecture + addr_size: u8 = @sizeOf(usize), + + /// Endianess of the target architecture + endian: std.builtin.Endian = .Little, + + /// Restrict the stack machine to a subset of opcodes used in call frame instructions + call_frame_mode: bool = false, +}; + /// A stack machine that can decode and run DWARF expressions. /// Expressions can be decoded for non-native address size and endianness, /// but can only be executed if the current target matches the configuration. -pub fn StackMachine(comptime options: StackMachineOptions) type { +pub fn StackMachine(comptime options: ExpressionOptions) type { const addr_type = switch (options.addr_size) { 2 => u16, 4 => u32, @@ -60,6 +61,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { generic: addr_type, register: u8, type_size: u8, + branch_offset: i16, base_register: struct { base_register: u8, offset: i64, @@ -114,7 +116,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { 2 => mem.readIntSliceNative(u16, const_type.value_bytes), 4 => mem.readIntSliceNative(u32, const_type.value_bytes), 8 => mem.readIntSliceNative(u64, const_type.value_bytes), - else => return error.InvalidIntegralTypeLength, + else => error.InvalidIntegralTypeLength, }; }, }; @@ -163,10 +165,10 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { OP.call2, => generic(try reader.readInt(u16, options.endian)), OP.call4 => generic(try reader.readInt(u32, options.endian)), - OP.const2s, + OP.const2s => generic(try reader.readInt(i16, options.endian)), OP.bra, OP.skip, - => generic(try reader.readInt(i16, options.endian)), + => .{ .branch_offset = try reader.readInt(i16, options.endian) }, OP.const4u => generic(try reader.readInt(u32, options.endian)), OP.const4s => generic(try reader.readInt(i32, options.endian)), OP.const8u => generic(try reader.readInt(u64, options.endian)), @@ -362,13 +364,21 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { if (context.ucontext == null) return error.IncompleteExpressionContext; const base_register = (try readOperand(stream, opcode)).?.base_register; - var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, base_register.base_register, context.reg_ctx))); + var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes( + context.ucontext.?, + base_register.base_register, + context.reg_ctx, + ))); value += base_register.offset; try self.stack.append(allocator, .{ .generic = @intCast(value) }); }, OP.regval_type => { const register_type = (try readOperand(stream, opcode)).?.register_type; - const value = mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, register_type.register, context.reg_ctx)); + const value = mem.readIntSliceNative(usize, try abi.regBytes( + context.ucontext.?, + register_type.register, + context.reg_ctx, + )); try self.stack.append(allocator, .{ .regval_type = .{ .type_offset = register_type.type_offset, @@ -472,18 +482,242 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { OP.abs => { if (self.stack.items.len == 0) return error.InvalidExpression; const value: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); - self.stack.items[self.stack.items.len - 1] = .{ .generic = std.math.absCast(value) }; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = std.math.absCast(value), + }; }, OP.@"and" => { if (self.stack.items.len < 2) return error.InvalidExpression; const a = try self.stack.pop().asIntegral(); - self.stack.items[self.stack.items.len - 1] = .{ .generic = a & try self.stack.items[self.stack.items.len - 1].asIntegral() }; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = a & try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + OP.div => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(try std.math.divTrunc(isize, b, a)), + }; + }, + OP.minus => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const b = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = try std.math.sub(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), b), + }; + }, + OP.mod => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(@mod(b, a)), + }; + }, + OP.mul => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(@mulWithOverflow(a, b)[0]), + }; + }, + OP.neg => { + if (self.stack.items.len == 0) return error.InvalidExpression; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast( + try std.math.negate( + @as(isize, @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral())), + ), + ), + }; + }, + OP.not => { + if (self.stack.items.len == 0) return error.InvalidExpression; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = ~try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + OP.@"or" => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = a | try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + OP.plus => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const b = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = try std.math.add(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), b), + }; + }, + OP.plus_uconst => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const constant = (try readOperand(stream, opcode)).?.generic; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = try std.math.add(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), constant), + }; + }, + OP.shl => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + const b = try self.stack.items[self.stack.items.len - 1].asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = std.math.shl(usize, b, a), + }; + }, + OP.shr => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + const b = try self.stack.items[self.stack.items.len - 1].asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = std.math.shr(usize, b, a), + }; + }, + OP.shra => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(std.math.shr(isize, b, a)), + }; + }, + OP.xor => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = a ^ try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + + // 2.5.1.5: Control Flow Operations + OP.le, + OP.ge, + OP.eq, + OP.lt, + OP.gt, + OP.ne, + => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = self.stack.pop(); + const b = self.stack.items[self.stack.items.len - 1]; + + if (a == .generic and b == .generic) { + const a_int: isize = @bitCast(a.asIntegral() catch unreachable); + const b_int: isize = @bitCast(b.asIntegral() catch unreachable); + const result = @intFromBool(switch (opcode) { + OP.le => b_int < a_int, + OP.ge => b_int >= a_int, + OP.eq => b_int == a_int, + OP.lt => b_int < a_int, + OP.gt => b_int > a_int, + OP.ne => b_int != a_int, + else => unreachable, + }); + + self.stack.items[self.stack.items.len - 1] = .{ .generic = result }; + } else { + // TODO: Load the types referenced by these values, find their comparison operator, and run it + return error.UnimplementedTypedComparison; + } + }, + OP.skip, OP.bra => { + const branch_offset = (try readOperand(stream, opcode)).?.branch_offset; + const condition = if (opcode == OP.bra) blk: { + if (self.stack.items.len == 0) return error.InvalidExpression; + break :blk try self.stack.pop().asIntegral() != 0; + } else true; + + if (condition) { + const new_pos = std.math.cast( + usize, + try std.math.add(isize, @as(isize, @intCast(stream.pos)), branch_offset), + ) orelse return error.InvalidExpression; + + if (new_pos < 0 or new_pos >= stream.buffer.len) return error.InvalidExpression; + stream.pos = new_pos; + } + }, + OP.call2, + OP.call4, + OP.call_ref, + => { + const debug_info_offset = (try readOperand(stream, opcode)).?.generic; + _ = debug_info_offset; + + // TODO: Load a DIE entry at debug_info_offset in a .debug_info section (the spec says that it + // can be in a separate exe / shared object from the one containing this expression). + // Transfer control to the DW_AT_location attribute, with the current stack as input. + + return error.UnimplementedExpressionCall; + }, + + // 2.5.1.6: Type Conversions + OP.convert => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const type_offset = (try readOperand(stream, opcode)).?.generic; + _ = type_offset; + + // TODO: Load the DW_TAG_base_type entry in context.compile_unit, find a conversion operator + // from the old type to the new type, run it. + + return error.UnimplementedTypeConversion; + }, + OP.reinterpret => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const type_offset = (try readOperand(stream, opcode)).?.generic; + + // TODO: Load the DW_TAG_base_type entries in context.compile_unit and verify both types are the same size + const value = self.stack.items[self.stack.items.len - 1]; + if (type_offset == 0) { + self.stack.items[self.stack.items.len - 1] = .{ .generic = try value.asIntegral() }; + } else { + self.stack.items[self.stack.items.len - 1] = switch (value) { + .generic => |v| .{ + .regval_type = .{ + .type_offset = type_offset, + .type_size = @sizeOf(addr_type), + .value = v, + }, + }, + .regval_type => |r| .{ + .regval_type = .{ + .type_offset = type_offset, + .type_size = r.type_size, + .value = r.value, + }, + }, + .const_type => |c| .{ + .const_type = .{ + .type_offset = type_offset, + .value_bytes = c.value_bytes, + }, + }, + }; + } + }, + + // 2.5.1.7: Special Operations + OP.nop => {}, + OP.entry_value => { + const block = (try readOperand(stream, opcode)).?.block; + _ = block; + + // TODO: If block is an expression, run it on a new stack. Push the resulting value onto this stack. + // TODO: If block is a register location, push the value that location had before running this program onto this stack. + // This implies capturing all register values before executing this block, in case this program modifies them. + // TODO: If the block contains, OP.push_object_address, treat it as OP.nop + + return error.UnimplementedSubExpression; }, // These have already been handled by readOperand OP.lo_user...OP.hi_user => unreachable, else => { - //std.debug.print("Unimplemented DWARF expression opcode: {x}\n", .{opcode}); + //std.debug.print("Unknown DWARF expression opcode: {x}\n", .{opcode}); return error.UnknownExpressionOpcode; }, } @@ -492,3 +726,232 @@ pub fn StackMachine(comptime options: StackMachineOptions) type { } }; } + +pub fn Writer(options: ExpressionOptions) type { + const addr_type = switch (options.addr_size) { + 2 => u16, + 4 => u32, + 8 => u64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + return struct { + /// Zero-operand instructions + pub fn writeOpcode(writer: anytype, comptime opcode: u8) !void { + switch (opcode) { + OP.dup, + OP.drop, + OP.over, + OP.swap, + OP.rot, + OP.deref, + OP.xderef, + OP.push_object_address, + OP.form_tls_address, + OP.call_frame_cfa, + OP.abs, + OP.@"and", + OP.div, + OP.minus, + OP.mod, + OP.mul, + OP.neg, + OP.not, + OP.@"or", + OP.plus, + OP.shl, + OP.shr, + OP.shra, + OP.xor, + OP.le, + OP.ge, + OP.eq, + OP.lt, + OP.gt, + OP.ne, + OP.nop, + => try writer.writeByte(opcode), + else => @compileError("This opcode requires operands, use write() instead"), + } + } + + // 2.5.1.1: Literal Encodings + pub fn writeLiteral(writer: anytype, literal: u8) !void { + switch (literal) { + 0...31 => |n| try writer.writeByte(n + OP.lit0), + else => return error.InvalidLiteral, + } + } + + pub fn writeConst(writer: anytype, comptime T: type, value: T) !void { + if (@typeInfo(T) != .Int) @compileError("Constants must be integers"); + + switch (T) { + u8, i8, u16, i16, u32, i32, u64, i64 => { + try writer.writeByte(switch (T) { + u8 => OP.const1u, + i8 => OP.const1s, + u16 => OP.const2u, + i16 => OP.const2s, + u32 => OP.const4u, + i32 => OP.const4s, + u64 => OP.const8u, + i64 => OP.const8s, + }); + + try writer.writeInt(T, value, options.endian); + }, + else => switch (@typeInfo(T).Int.signedness) { + .unsigned => { + try writer.writeByte(OP.constu); + try leb.writeULEB128(writer, value); + }, + .signed => { + try writer.writeByte(OP.consts); + try leb.writeILEB128(writer, value); + }, + }, + } + } + + pub fn writeConstx(writer: anytype, debug_addr_offset: anytype) !void { + try writer.writeByte(OP.constx); + try leb.writeULEB128(writer, debug_addr_offset); + } + + pub fn writeConstType(writer: anytype, die_offset: anytype, size: u8, value_bytes: []const u8) !void { + if (size != value_bytes.len) return error.InvalidValueSize; + try writer.writeByte(OP.const_type); + try leb.writeULEB128(writer, die_offset); + try writer.writeByte(size); + try writer.writeAll(value_bytes); + } + + pub fn writeAddr(writer: anytype, value: addr_type) !void { + try writer.writeByte(OP.addr); + try writer.writeInt(addr_type, value, options.endian); + } + + pub fn writeAddrx(writer: anytype, debug_addr_offset: anytype) !void { + try writer.writeByte(OP.addrx); + try leb.writeULEB128(writer, debug_addr_offset); + } + + // 2.5.1.2: Register Values + pub fn writeFbreg(writer: anytype, offset: anytype) !void { + try writer.writeByte(OP.fbreg); + try leb.writeILEB128(writer, offset); + } + + pub fn writeBreg(writer: anytype, register: u8, offset: anytype) !void { + if (register > 31) return error.InvalidRegister; + try writer.writeByte(OP.reg0 + register); + try leb.writeILEB128(writer, offset); + } + + pub fn writeBregx(writer: anytype, register: anytype, offset: anytype) !void { + try writer.writeByte(OP.bregx); + try leb.writeULEB128(writer, register); + try leb.writeILEB128(writer, offset); + } + + pub fn writeRegvalType(writer: anytype, register: anytype, offset: anytype) !void { + try writer.writeByte(OP.bregx); + try leb.writeULEB128(writer, register); + try leb.writeULEB128(writer, offset); + } + + // 2.5.1.3: Stack Operations + pub fn writePick(writer: anytype, index: u8) !void { + try writer.writeByte(OP.pick); + try writer.writeByte(index); + } + + pub fn writeDerefSize(writer: anytype, size: u8) !void { + try writer.writeByte(OP.deref_size); + try writer.writeByte(size); + } + + pub fn writeXDerefSize(writer: anytype, size: u8) !void { + try writer.writeByte(OP.xderef_size); + try writer.writeByte(size); + } + + pub fn writeDerefType(writer: anytype, size: u8, die_offset: anytype) !void { + try writer.writeByte(OP.deref_type); + try writer.writeByte(size); + try leb.writeULEB128(writer, die_offset); + } + + pub fn writeXDerefType(writer: anytype, size: u8, die_offset: anytype) !void { + try writer.writeByte(OP.xderef_type); + try writer.writeByte(size); + try leb.writeULEB128(writer, die_offset); + } + + // 2.5.1.4: Arithmetic and Logical Operations + + pub fn writePlusUconst(writer: anytype, uint_value: anytype) !void { + try writer.writeByte(OP.plus_uconst); + try leb.writeULEB128(writer, uint_value); + } + + // 2.5.1.5: Control Flow Operations + + pub fn writeSkip(writer: anytype, offset: i16) !void { + try writer.writeByte(OP.skip); + try writer.writeInt(i16, offset, options.endian); + } + + pub fn writeBra(writer: anytype, offset: i16) !void { + try writer.writeByte(OP.bra); + try writer.writeInt(i16, offset, options.endian); + } + + pub fn writeCall(writer: anytype, comptime T: type, offset: T) !void { + switch (T) { + u16 => try writer.writeByte(OP.call2), + u32 => try writer.writeByte(OP.call4), + else => @compileError("Call operand must be a 2 or 4 byte offset"), + } + + try writer.writeInt(T, offset, options.endian); + } + + pub fn writeCallRef(writer: anytype, debug_info_offset: addr_type) !void { + try writer.writeByte(OP.call_ref); + try writer.writeInt(addr_type, debug_info_offset, options.endian); + } + + pub fn writeConvert(writer: anytype, die_offset: anytype) !void { + try writer.writeByte(OP.convert); + try leb.writeULEB128(writer, die_offset); + } + + pub fn writeReinterpret(writer: anytype, die_offset: anytype) !void { + try writer.writeByte(OP.reinterpret); + try leb.writeULEB128(writer, die_offset); + } + + // 2.5.1.7: Special Operations + + pub fn writeEntryValue(writer: anytype, expression: []const u8) !void { + try writer.writeByte(OP.entry_value); + try leb.writeULEB128(writer, expression.len); + try writer.writeAll(expression); + } + + // 2.6: Location Descriptions + + // TODO + + }; +} + +test "DWARF expressions" { + const allocator = std.testing.allocator; + + const options = ExpressionOptions{}; + const stack_machine = StackMachine(options){}; + defer stack_machine.deinit(allocator); +} From 8547c42ba542f77c55ce50253446fd2bf4f3592b Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 6 Jul 2023 20:04:33 -0400 Subject: [PATCH 48/81] dwarf: expression fixups for non-64bit arches, check call_frame_context when writing expressions --- lib/std/dwarf.zig | 2 +- lib/std/dwarf/expressions.zig | 71 ++++++++++++++++++++--------------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 3a8ea847bf22..2d71885e9b55 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1783,7 +1783,7 @@ pub const UnwindContext = struct { reg_ctx: abi.RegisterContext, isValidMemory: *const fn (address: usize) bool, vm: call_frame.VirtualMachine = .{}, - stack_machine: expressions.StackMachine(.{ .call_frame_mode = true }) = .{}, + stack_machine: expressions.StackMachine(.{ .call_frame_context = true }) = .{}, pub fn init(allocator: mem.Allocator, ucontext: *const os.ucontext_t, isValidMemory: *const fn (address: usize) bool) !UnwindContext { const pc = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, abi.ipRegNum(), null)); diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index cc6b68793094..beff1e6754ac 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -33,7 +33,7 @@ pub const ExpressionOptions = struct { endian: std.builtin.Endian = .Little, /// Restrict the stack machine to a subset of opcodes used in call frame instructions - call_frame_mode: bool = false, + call_frame_context: bool = false, }; /// A stack machine that can decode and run DWARF expressions. @@ -111,13 +111,15 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // TODO: For these two prongs, look up the type and assert it's integral? .regval_type => |regval_type| regval_type.value, .const_type => |const_type| { - return switch (const_type.value_bytes.len) { + const value: u64 = switch (const_type.value_bytes.len) { 1 => mem.readIntSliceNative(u8, const_type.value_bytes), 2 => mem.readIntSliceNative(u16, const_type.value_bytes), 4 => mem.readIntSliceNative(u32, const_type.value_bytes), 8 => mem.readIntSliceNative(u64, const_type.value_bytes), - else => error.InvalidIntegralTypeLength, + else => return error.InvalidIntegralTypeSize, }; + + return std.math.cast(addr_type, value) orelse error.TruncatedIntegralType; }, }; } @@ -278,26 +280,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { @compileError("Execution of non-native address sizees / endianness is not supported"); const opcode = try stream.reader().readByte(); - if (options.call_frame_mode) { - // Certain opcodes are not allowed in a CFA context, see 6.4.2 - switch (opcode) { - OP.addrx, - OP.call2, - OP.call4, - OP.call_ref, - OP.const_type, - OP.constx, - OP.convert, - OP.deref_type, - OP.regval_type, - OP.reinterpret, - OP.push_object_address, - OP.call_frame_cfa, - => return error.InvalidCFAExpression, - else => {}, - } - } - + if (options.call_frame_context and !opcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; switch (opcode) { // 2.5.1.1: Literal Encodings @@ -420,7 +403,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.xderef_type, => { if (self.stack.items.len == 0) return error.InvalidExpression; - var addr = try self.stack.pop().asIntegral(); + var addr = try self.stack.items[self.stack.items.len - 1].asIntegral(); const addr_space_identifier: ?usize = switch (opcode) { OP.xderef, OP.xderef_size, @@ -447,24 +430,24 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { else => unreachable, }; - const value: u64 = switch (size) { + const value: addr_type = std.math.cast(addr_type, @as(u64, switch (size) { 1 => @as(*const u8, @ptrFromInt(addr)).*, 2 => @as(*const u16, @ptrFromInt(addr)).*, 4 => @as(*const u32, @ptrFromInt(addr)).*, 8 => @as(*const u64, @ptrFromInt(addr)).*, else => return error.InvalidExpression, - }; + })) orelse return error.InvalidExpression; if (opcode == OP.deref_type) { - try self.stack.append(allocator, .{ + self.stack.items[self.stack.items.len - 1] = .{ .regval_type = .{ .type_offset = operand.?.deref_type.type_offset, .type_size = operand.?.deref_type.size, .value = value, }, - }); + }; } else { - try self.stack.append(allocator, .{ .generic = value }); + self.stack.items[self.stack.items.len - 1] = .{ .generic = value }; } }, OP.push_object_address, @@ -738,6 +721,7 @@ pub fn Writer(options: ExpressionOptions) type { return struct { /// Zero-operand instructions pub fn writeOpcode(writer: anytype, comptime opcode: u8) !void { + if (options.call_frame_context and !comptime opcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; switch (opcode) { OP.dup, OP.drop, @@ -820,6 +804,7 @@ pub fn Writer(options: ExpressionOptions) type { } pub fn writeConstType(writer: anytype, die_offset: anytype, size: u8, value_bytes: []const u8) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; if (size != value_bytes.len) return error.InvalidValueSize; try writer.writeByte(OP.const_type); try leb.writeULEB128(writer, die_offset); @@ -833,6 +818,7 @@ pub fn Writer(options: ExpressionOptions) type { } pub fn writeAddrx(writer: anytype, debug_addr_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; try writer.writeByte(OP.addrx); try leb.writeULEB128(writer, debug_addr_offset); } @@ -856,6 +842,7 @@ pub fn Writer(options: ExpressionOptions) type { } pub fn writeRegvalType(writer: anytype, register: anytype, offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; try writer.writeByte(OP.bregx); try leb.writeULEB128(writer, register); try leb.writeULEB128(writer, offset); @@ -878,6 +865,7 @@ pub fn Writer(options: ExpressionOptions) type { } pub fn writeDerefType(writer: anytype, size: u8, die_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; try writer.writeByte(OP.deref_type); try writer.writeByte(size); try leb.writeULEB128(writer, die_offset); @@ -909,6 +897,7 @@ pub fn Writer(options: ExpressionOptions) type { } pub fn writeCall(writer: anytype, comptime T: type, offset: T) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; switch (T) { u16 => try writer.writeByte(OP.call2), u32 => try writer.writeByte(OP.call4), @@ -919,16 +908,19 @@ pub fn Writer(options: ExpressionOptions) type { } pub fn writeCallRef(writer: anytype, debug_info_offset: addr_type) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; try writer.writeByte(OP.call_ref); try writer.writeInt(addr_type, debug_info_offset, options.endian); } pub fn writeConvert(writer: anytype, die_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; try writer.writeByte(OP.convert); try leb.writeULEB128(writer, die_offset); } pub fn writeReinterpret(writer: anytype, die_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; try writer.writeByte(OP.reinterpret); try leb.writeULEB128(writer, die_offset); } @@ -942,12 +934,31 @@ pub fn Writer(options: ExpressionOptions) type { } // 2.6: Location Descriptions - // TODO }; } +// Certain opcodes are not allowed in a CFA context, see 6.4.2 +fn opcodeValidInCFA(opcode: u8) bool { + return switch (opcode) { + OP.addrx, + OP.call2, + OP.call4, + OP.call_ref, + OP.const_type, + OP.constx, + OP.convert, + OP.deref_type, + OP.regval_type, + OP.reinterpret, + OP.push_object_address, + OP.call_frame_cfa, + => false, + else => true, + }; +} + test "DWARF expressions" { const allocator = std.testing.allocator; From 5f72c6508d78291d8f0358d06d407a8d6b4a28c9 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 6 Jul 2023 22:02:47 -0400 Subject: [PATCH 49/81] debug: rename StackTraceContext to ThreadContext dwarf: use ThreadContext instead of os.ucontext_t dwarf: add regBytes impl for windows dwarf: fixup expression types for non-native --- lib/std/debug.zig | 12 +-- lib/std/dwarf.zig | 46 ++++++----- lib/std/dwarf/abi.zig | 81 +++++++++++++++---- lib/std/dwarf/call_frame.zig | 12 +-- lib/std/dwarf/expressions.zig | 77 +++++++++++------- src/crash_report.zig | 2 +- .../dwarf_unwinding/shared_lib_unwind.zig | 2 +- .../standalone/dwarf_unwinding/zig_unwind.zig | 2 +- 8 files changed, 157 insertions(+), 77 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 38b2cfc59b5a..7fac16085576 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -133,7 +133,7 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { } } -pub const StackTraceContext = blk: { +pub const ThreadContext = blk: { if (native_os == .windows) { break :blk std.os.windows.CONTEXT; } else if (have_ucontext) { @@ -146,7 +146,7 @@ pub const StackTraceContext = blk: { /// Tries to print the stack trace starting from the supplied base pointer to stderr, /// unbuffered, and ignores any error returned. /// TODO multithreaded awareness -pub fn dumpStackTraceFromBase(context: *const StackTraceContext) void { +pub fn dumpStackTraceFromBase(context: *const ThreadContext) void { nosuspend { if (comptime builtin.target.isWasm()) { if (native_os == .wasi) { @@ -437,7 +437,7 @@ pub const have_ucontext = @hasDecl(os.system, "ucontext_t") and else => true, }); -pub inline fn getContext(context: *StackTraceContext) bool { +pub inline fn getContext(context: *ThreadContext) bool { if (native_os == .windows) { context.* = std.mem.zeroes(windows.CONTEXT); windows.ntdll.RtlCaptureContext(context); @@ -606,8 +606,8 @@ pub const StackIterator = struct { fn next_dwarf(self: *StackIterator) !usize { const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); if (try module.getDwarfInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc)) |di| { - self.dwarf_context.reg_ctx.eh_frame = true; - self.dwarf_context.reg_ctx.is_macho = di.is_macho; + self.dwarf_context.reg_context.eh_frame = true; + self.dwarf_context.reg_context.is_macho = di.is_macho; return di.unwindFrame(&self.dwarf_context, module.base_address); } else return error.MissingDebugInfo; } @@ -663,7 +663,7 @@ pub fn writeCurrentStackTrace( tty_config: io.tty.Config, start_addr: ?usize, ) !void { - var context: StackTraceContext = undefined; + var context: ThreadContext = undefined; const has_context = getContext(&context); if (native_os == .windows) { return writeStackTraceWindows(out_stream, debug_info, tty_config, &context, start_addr); diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 2d71885e9b55..0909b6eafb1a 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -146,11 +146,11 @@ pub const CC = enum(u8) { pass_by_reference = 0x4, pass_by_value = 0x5, - lo_user = 0x40, - hi_user = 0xff, - GNU_renesas_sh = 0x40, GNU_borland_fastcall_i386 = 0x41, + + pub const lo_user = 0x40; + pub const hi_user = 0xff; }; pub const Format = enum { @"32", @"64" }; @@ -1676,13 +1676,13 @@ pub const DwarfInfo = struct { var expression_context = .{ .isValidMemory = context.isValidMemory, .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, - .ucontext = &context.ucontext, - .reg_ctx = context.reg_ctx, + .thread_context = &context.thread_context, + .reg_context = context.reg_context, .cfa = context.cfa, }; context.vm.reset(); - context.reg_ctx.eh_frame = cie.version != 4; + context.reg_context.eh_frame = cie.version != 4; _ = try context.vm.runToNative(context.allocator, mapped_pc, cie, fde); const row = &context.vm.current_row; @@ -1690,7 +1690,7 @@ pub const DwarfInfo = struct { context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; - const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, register, context.reg_ctx)); + const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.thread_context, register, context.reg_context)); break :blk try call_frame.applyOffset(value, offset); }, .expression => |expression| blk: { @@ -1711,14 +1711,14 @@ pub const DwarfInfo = struct { if (!context.isValidMemory(context.cfa.?)) return error.InvalidCFA; expression_context.cfa = context.cfa; - // Buffering the modifications is done because copying the ucontext is not portable, + // Buffering the modifications is done because copying the thread context is not portable, // some implementations (ie. darwin) use internal pointers to the mcontext. var arena = std.heap.ArenaAllocator.init(context.allocator); defer arena.deinit(); const update_allocator = arena.allocator(); const RegisterUpdate = struct { - // Backed by ucontext + // Backed by thread_context old_value: []u8, // Backed by arena new_value: []const u8, @@ -1733,7 +1733,7 @@ pub const DwarfInfo = struct { has_next_ip = column.rule != .undefined; } - const old_value = try abi.regBytes(&context.ucontext, register, context.reg_ctx); + const old_value = try abi.regBytes(&context.thread_context, register, context.reg_context); const new_value = try update_allocator.alloc(u8, old_value.len); const prev = update_tail; @@ -1758,12 +1758,12 @@ pub const DwarfInfo = struct { } if (has_next_ip) { - context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.ucontext, comptime abi.ipRegNum(), context.reg_ctx)); + context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.thread_context, comptime abi.ipRegNum(), context.reg_context)); } else { context.pc = 0; } - mem.writeIntSliceNative(usize, try abi.regBytes(&context.ucontext, abi.spRegNum(context.reg_ctx), context.reg_ctx), context.cfa.?); + mem.writeIntSliceNative(usize, try abi.regBytes(&context.thread_context, abi.spRegNum(context.reg_context), context.reg_context), context.cfa.?); // The call instruction will have pushed the address of the instruction that follows the call as the return address // However, this return address may be past the end of the function if the caller was `noreturn`. @@ -1779,20 +1779,24 @@ pub const UnwindContext = struct { allocator: mem.Allocator, cfa: ?usize, pc: usize, - ucontext: os.ucontext_t, - reg_ctx: abi.RegisterContext, + thread_context: debug.ThreadContext, + reg_context: abi.RegisterContext, isValidMemory: *const fn (address: usize) bool, vm: call_frame.VirtualMachine = .{}, stack_machine: expressions.StackMachine(.{ .call_frame_context = true }) = .{}, - pub fn init(allocator: mem.Allocator, ucontext: *const os.ucontext_t, isValidMemory: *const fn (address: usize) bool) !UnwindContext { - const pc = mem.readIntSliceNative(usize, try abi.regBytes(ucontext, abi.ipRegNum(), null)); + pub fn init(allocator: mem.Allocator, thread_context: *const debug.ThreadContext, isValidMemory: *const fn (address: usize) bool) !UnwindContext { + const pc = mem.readIntSliceNative(usize, try abi.regBytes(thread_context, abi.ipRegNum(), null)); + + if (builtin.os.tag == .macos) @compileError("Fix below TODO"); + return .{ .allocator = allocator, .cfa = null, .pc = pc, - .ucontext = ucontext.*, - .reg_ctx = undefined, + // TODO: This is broken on macos, need a function that knows how to copy the OSs mcontext properly + .thread_context = thread_context.*, + .reg_context = undefined, .isValidMemory = isValidMemory, }; } @@ -1803,7 +1807,7 @@ pub const UnwindContext = struct { } pub fn getFp(self: *const UnwindContext) !usize { - return mem.readIntSliceNative(usize, try abi.regBytes(&self.ucontext, abi.fpRegNum(self.reg_ctx), self.reg_ctx)); + return mem.readIntSliceNative(usize, try abi.regBytes(&self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)); } }; @@ -2388,3 +2392,7 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { return math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } + +test { + std.testing.refAllDecls(@This()); +} diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 7b1418a293ae..1927e3df1ad0 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -3,11 +3,6 @@ const std = @import("../std.zig"); const os = std.os; const mem = std.mem; -pub const RegisterContext = struct { - eh_frame: bool, - is_macho: bool, -}; - pub fn isSupportedArch(arch: std.Target.Cpu.Arch) bool { return switch (arch) { .x86, @@ -29,10 +24,10 @@ pub fn ipRegNum() u8 { }; } -pub fn fpRegNum(reg_ctx: RegisterContext) u8 { +pub fn fpRegNum(reg_context: RegisterContext) u8 { return switch (builtin.cpu.arch) { // GCC on OS X did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO - .x86 => if (reg_ctx.eh_frame and reg_ctx.is_macho) 4 else 5, + .x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5, .x86_64 => 6, .arm => 11, .aarch64 => 29, @@ -40,9 +35,9 @@ pub fn fpRegNum(reg_ctx: RegisterContext) u8 { }; } -pub fn spRegNum(reg_ctx: RegisterContext) u8 { +pub fn spRegNum(reg_context: RegisterContext) u8 { return switch (builtin.cpu.arch) { - .x86 => if (reg_ctx.eh_frame and reg_ctx.is_macho) 5 else 4, + .x86 => if (reg_context.eh_frame and reg_context.is_macho) 5 else 4, .x86_64 => 7, .arm => 13, .aarch64 => 31, @@ -52,21 +47,76 @@ pub fn spRegNum(reg_ctx: RegisterContext) u8 { fn RegBytesReturnType(comptime ContextPtrType: type) type { const info = @typeInfo(ContextPtrType); - if (info != .Pointer or info.Pointer.child != os.ucontext_t) { - @compileError("Expected a pointer to ucontext_t, got " ++ @typeName(@TypeOf(ContextPtrType))); + if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { + @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); } return if (info.Pointer.is_const) return []const u8 else []u8; } +pub const RegisterContext = struct { + eh_frame: bool, + is_macho: bool, +}; + /// Returns a slice containing the backing storage for `reg_number`. /// -/// `reg_ctx` describes in what context the register number is used, as it can have different +/// `reg_context` describes in what context the register number is used, as it can have different /// meanings depending on the DWARF container. It is only required when getting the stack or /// frame pointer register on some architectures. -pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext) !RegBytesReturnType(@TypeOf(ucontext_ptr)) { - var m = &ucontext_ptr.mcontext; +pub fn regBytes(thread_context_ptr: anytype, reg_number: u8, reg_context: ?RegisterContext) !RegBytesReturnType(@TypeOf(thread_context_ptr)) { + if (builtin.os.tag == .windows) { + return switch (builtin.cpu.arch) { + .x86 => switch (reg_number) { + 0 => mem.asBytes(&thread_context_ptr.Eax), + 1 => mem.asBytes(&thread_context_ptr.Ecx), + 2 => mem.asBytes(&thread_context_ptr.Edx), + 3 => mem.asBytes(&thread_context_ptr.Ebx), + 4 => mem.asBytes(&thread_context_ptr.Esp), + 5 => mem.asBytes(&thread_context_ptr.Ebp), + 6 => mem.asBytes(&thread_context_ptr.Esi), + 7 => mem.asBytes(&thread_context_ptr.Edi), + 8 => mem.asBytes(&thread_context_ptr.Eip), + 9 => mem.asBytes(&thread_context_ptr.EFlags), + 10 => mem.asBytes(&thread_context_ptr.SegCs), + 11 => mem.asBytes(&thread_context_ptr.SegSs), + 12 => mem.asBytes(&thread_context_ptr.SegDs), + 13 => mem.asBytes(&thread_context_ptr.SegEs), + 14 => mem.asBytes(&thread_context_ptr.SegFs), + 15 => mem.asBytes(&thread_context_ptr.SegGs), + else => error.InvalidRegister, + }, + .x86_64 => switch (reg_number) { + 0 => mem.asBytes(&thread_context_ptr.Rax), + 1 => mem.asBytes(&thread_context_ptr.Rdx), + 2 => mem.asBytes(&thread_context_ptr.Rcx), + 3 => mem.asBytes(&thread_context_ptr.Rbx), + 4 => mem.asBytes(&thread_context_ptr.Rsi), + 5 => mem.asBytes(&thread_context_ptr.Rdi), + 6 => mem.asBytes(&thread_context_ptr.Rbp), + 7 => mem.asBytes(&thread_context_ptr.Rsp), + 8 => mem.asBytes(&thread_context_ptr.R8), + 9 => mem.asBytes(&thread_context_ptr.R9), + 10 => mem.asBytes(&thread_context_ptr.R10), + 11 => mem.asBytes(&thread_context_ptr.R11), + 12 => mem.asBytes(&thread_context_ptr.R12), + 13 => mem.asBytes(&thread_context_ptr.R13), + 14 => mem.asBytes(&thread_context_ptr.R14), + 15 => mem.asBytes(&thread_context_ptr.R15), + 16 => mem.asBytes(&thread_context_ptr.Rip), + else => error.InvalidRegister, + }, + .aarch64 => switch (reg_number) { + 0...30 => mem.asBytes(&thread_context_ptr.DUMMYUNIONNAME.X[reg_number]), + 31 => mem.asBytes(&thread_context_ptr.Sp), + 32 => mem.asBytes(&thread_context_ptr.Pc), + }, + else => error.UnimplementedArch, + }; + } + const ucontext_ptr = thread_context_ptr; + var m = &ucontext_ptr.mcontext; return switch (builtin.cpu.arch) { .x86 => switch (builtin.os.tag) { .linux, .netbsd, .solaris => switch (reg_number) { @@ -74,7 +124,7 @@ pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.ECX]), 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EDX]), 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBX]), - 4...5 => if (reg_ctx) |r| bytes: { + 4...5 => if (reg_context) |r| bytes: { if (reg_number == 4) { break :bytes if (r.eh_frame and r.is_macho) mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.EBP]) @@ -98,7 +148,6 @@ pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.FS]), 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.GS]), 16...23 => error.InvalidRegister, // TODO: Support loading ST0-ST7 from mcontext.fpregs - // TODO: Map TRAPNO, ERR, UESP 32...39 => error.InvalidRegister, // TODO: Support loading XMM0-XMM7 from mcontext.fpregs else => error.InvalidRegister, }, diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 45c947d88b4b..49772e2ae5a7 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -2,10 +2,10 @@ const builtin = @import("builtin"); const std = @import("../std.zig"); const mem = std.mem; const debug = std.debug; -const leb = @import("../leb128.zig"); -const abi = @import("abi.zig"); -const dwarf = @import("../dwarf.zig"); -const expressions = @import("expressions.zig"); +const leb = std.leb; +const dwarf = std.dwarf; +const abi = dwarf.abi; +const expressions = dwarf.expressions; const assert = std.debug.assert; const Opcode = enum(u8) { @@ -315,9 +315,9 @@ pub const VirtualMachine = struct { } else return error.InvalidCFA; }, .register => |register| { - const src = try abi.regBytes(&context.ucontext, register, context.reg_ctx); + const src = try abi.regBytes(&context.thread_context, register, context.reg_context); if (src.len != out.len) return error.RegisterTypeMismatch; - @memcpy(out, try abi.regBytes(&context.ucontext, register, context.reg_ctx)); + @memcpy(out, try abi.regBytes(&context.thread_context, register, context.reg_context)); }, .expression => |expression| { context.stack_machine.reset(); diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index beff1e6754ac..a617244db284 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -1,8 +1,8 @@ const std = @import("std"); const builtin = @import("builtin"); const OP = @import("OP.zig"); -const leb = @import("../leb128.zig"); -const dwarf = @import("../dwarf.zig"); +const leb = std.leb; +const dwarf = std.dwarf; const abi = dwarf.abi; const mem = std.mem; const assert = std.debug.assert; @@ -17,12 +17,12 @@ pub const ExpressionContext = struct { /// The compilation unit this expression relates to, if any compile_unit: ?*const dwarf.CompileUnit = null, - /// Register context - ucontext: ?*std.os.ucontext_t, - reg_ctx: ?abi.RegisterContext, + /// Thread context + thread_context: ?*std.debug.ThreadContext = null, + reg_context: ?abi.RegisterContext = null, /// Call frame address, if in a CFI context - cfa: ?usize, + cfa: ?usize = null, }; pub const ExpressionOptions = struct { @@ -344,13 +344,13 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.breg0...OP.breg31, OP.bregx, => { - if (context.ucontext == null) return error.IncompleteExpressionContext; + if (context.thread_context == null) return error.IncompleteExpressionContext; const base_register = (try readOperand(stream, opcode)).?.base_register; var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes( - context.ucontext.?, + context.thread_context.?, base_register.base_register, - context.reg_ctx, + context.reg_context, ))); value += base_register.offset; try self.stack.append(allocator, .{ .generic = @intCast(value) }); @@ -358,9 +358,9 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.regval_type => { const register_type = (try readOperand(stream, opcode)).?.register_type; const value = mem.readIntSliceNative(usize, try abi.regBytes( - context.ucontext.?, + context.thread_context.?, register_type.register, - context.reg_ctx, + context.reg_context, )); try self.stack.append(allocator, .{ .regval_type = .{ @@ -464,7 +464,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // 2.5.1.4: Arithmetic and Logical Operations OP.abs => { if (self.stack.items.len == 0) return error.InvalidExpression; - const value: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const value: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ .generic = std.math.absCast(value), }; @@ -478,10 +478,10 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, OP.div => { if (self.stack.items.len < 2) return error.InvalidExpression; - const a: isize = @bitCast(try self.stack.pop().asIntegral()); - const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const a: addr_type_signed = @bitCast(try self.stack.pop().asIntegral()); + const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ - .generic = @bitCast(try std.math.divTrunc(isize, b, a)), + .generic = @bitCast(try std.math.divTrunc(addr_type_signed, b, a)), }; }, OP.minus => { @@ -493,16 +493,16 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, OP.mod => { if (self.stack.items.len < 2) return error.InvalidExpression; - const a: isize = @bitCast(try self.stack.pop().asIntegral()); - const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const a: addr_type_signed = @bitCast(try self.stack.pop().asIntegral()); + const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ .generic = @bitCast(@mod(b, a)), }; }, OP.mul => { if (self.stack.items.len < 2) return error.InvalidExpression; - const a: isize = @bitCast(try self.stack.pop().asIntegral()); - const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const a: addr_type_signed = @bitCast(try self.stack.pop().asIntegral()); + const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ .generic = @bitCast(@mulWithOverflow(a, b)[0]), }; @@ -512,7 +512,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { self.stack.items[self.stack.items.len - 1] = .{ .generic = @bitCast( try std.math.negate( - @as(isize, @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral())), + @as(addr_type_signed, @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral())), ), ), }; @@ -563,9 +563,9 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.shra => { if (self.stack.items.len < 2) return error.InvalidExpression; const a = try self.stack.pop().asIntegral(); - const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ - .generic = @bitCast(std.math.shr(isize, b, a)), + .generic = @bitCast(std.math.shr(addr_type_signed, b, a)), }; }, OP.xor => { @@ -589,8 +589,8 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { const b = self.stack.items[self.stack.items.len - 1]; if (a == .generic and b == .generic) { - const a_int: isize = @bitCast(a.asIntegral() catch unreachable); - const b_int: isize = @bitCast(b.asIntegral() catch unreachable); + const a_int: addr_type_signed = @bitCast(a.asIntegral() catch unreachable); + const b_int: addr_type_signed = @bitCast(b.asIntegral() catch unreachable); const result = @intFromBool(switch (opcode) { OP.le => b_int < a_int, OP.ge => b_int >= a_int, @@ -617,7 +617,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { if (condition) { const new_pos = std.math.cast( usize, - try std.math.add(isize, @as(isize, @intCast(stream.pos)), branch_offset), + try std.math.add(addr_type_signed, @as(addr_type_signed, @intCast(stream.pos)), branch_offset), ) orelse return error.InvalidExpression; if (new_pos < 0 or new_pos >= stream.buffer.len) return error.InvalidExpression; @@ -710,7 +710,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }; } -pub fn Writer(options: ExpressionOptions) type { +pub fn Builder(comptime options: ExpressionOptions) type { const addr_type = switch (options.addr_size) { 2 => u16, 4 => u32, @@ -959,10 +959,33 @@ fn opcodeValidInCFA(opcode: u8) bool { }; } +const testing = std.testing; test "DWARF expressions" { const allocator = std.testing.allocator; const options = ExpressionOptions{}; - const stack_machine = StackMachine(options){}; + var stack_machine = StackMachine(options){}; defer stack_machine.deinit(allocator); + + const b = Builder(options); + + var program = std.ArrayList(u8).init(allocator); + defer program.deinit(); + + const writer = program.writer(); + + // Literals + { + const context = ExpressionContext{}; + for (0..32) |i| { + try b.writeLiteral(writer, @intCast(i)); + } + + _ = try stack_machine.run(program.items, allocator, context, 0); + + for (0..32) |i| { + const expected = 31 - i; + try testing.expectEqual(expected, stack_machine.stack.popOrNull().?.generic); + } + } } diff --git a/src/crash_report.zig b/src/crash_report.zig index f09fce14f9a2..82be4211c7b0 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -271,7 +271,7 @@ const StackContext = union(enum) { current: struct { ret_addr: ?usize, }, - exception: *const debug.StackTraceContext, + exception: *const debug.ThreadContext, not_supported: void, pub fn dumpStackTrace(ctx: @This()) void { diff --git a/test/standalone/dwarf_unwinding/shared_lib_unwind.zig b/test/standalone/dwarf_unwinding/shared_lib_unwind.zig index 8f4219797295..543654d24f82 100644 --- a/test/standalone/dwarf_unwinding/shared_lib_unwind.zig +++ b/test/standalone/dwarf_unwinding/shared_lib_unwind.zig @@ -5,7 +5,7 @@ const testing = std.testing; noinline fn frame4(expected: *[4]usize, unwound: *[4]usize) void { expected[0] = @returnAddress(); - var context: debug.StackTraceContext = undefined; + var context: debug.ThreadContext = undefined; testing.expect(debug.getContext(&context)) catch @panic("failed to getContext"); var debug_info = debug.getSelfDebugInfo() catch @panic("failed to openSelfDebugInfo"); diff --git a/test/standalone/dwarf_unwinding/zig_unwind.zig b/test/standalone/dwarf_unwinding/zig_unwind.zig index 707c2b763211..3b13de24a1de 100644 --- a/test/standalone/dwarf_unwinding/zig_unwind.zig +++ b/test/standalone/dwarf_unwinding/zig_unwind.zig @@ -5,7 +5,7 @@ const testing = std.testing; noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { expected[0] = @returnAddress(); - var context: debug.StackTraceContext = undefined; + var context: debug.ThreadContext = undefined; testing.expect(debug.getContext(&context)) catch @panic("failed to getContext"); var debug_info = debug.getSelfDebugInfo() catch @panic("failed to openSelfDebugInfo"); From 463bbe7807b236e6e3493fb8551c585620ae266b Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 7 Jul 2023 01:03:43 -0400 Subject: [PATCH 50/81] dwarf: implement constx,addrx, begin adding DWARF expression tests --- lib/std/dwarf/expressions.zig | 142 +++++++++++++++++++++++++++------- 1 file changed, 114 insertions(+), 28 deletions(-) diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index a617244db284..98b06c0cd832 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -17,6 +17,9 @@ pub const ExpressionContext = struct { /// The compilation unit this expression relates to, if any compile_unit: ?*const dwarf.CompileUnit = null, + // .debug_addr section + debug_addr: ?[]const u8 = null, + /// Thread context thread_context: ?*std.debug.ThreadContext = null, reg_context: ?abi.RegisterContext = null, @@ -73,15 +76,15 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { block: []const u8, register_type: struct { register: u8, - type_offset: u64, + type_offset: addr_type, }, const_type: struct { - type_offset: u64, + type_offset: addr_type, value_bytes: []const u8, }, deref_type: struct { size: u8, - type_offset: u64, + type_offset: addr_type, }, }; @@ -91,7 +94,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // Typed value with a maximum size of a register regval_type: struct { // Offset of DW_TAG_base_type DIE - type_offset: u64, + type_offset: addr_type, type_size: u8, value: addr_type, }, @@ -99,7 +102,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // Typed value specified directly in the instruction stream const_type: struct { // Offset of DW_TAG_base_type DIE - type_offset: u64, + type_offset: addr_type, // Backed by the instruction stream value_bytes: []const u8, }, @@ -202,7 +205,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, OP.regval_type => blk: { const register = try leb.readULEB128(u8, reader); - const type_offset = try leb.readULEB128(u64, reader); + const type_offset = try leb.readULEB128(addr_type, reader); break :blk .{ .register_type = .{ .register = register, .type_offset = type_offset, @@ -232,7 +235,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }; }, OP.const_type => blk: { - const type_offset = try leb.readULEB128(u8, reader); + const type_offset = try leb.readULEB128(addr_type, reader); const size = try reader.readByte(); if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; const value_bytes = stream.buffer[stream.pos..][0..size]; @@ -247,7 +250,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { => .{ .deref_type = .{ .size = try reader.readByte(), - .type_offset = try leb.readULEB128(u64, reader), + .type_offset = try leb.readULEB128(addr_type, reader), }, }, OP.lo_user...OP.hi_user => return error.UnimplementedUserOpcode, @@ -277,7 +280,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { context: ExpressionContext, ) !bool { if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian()) - @compileError("Execution of non-native address sizees / endianness is not supported"); + @compileError("Execution of non-native address sizes / endianness is not supported"); const opcode = try stream.reader().readByte(); if (options.call_frame_context and !opcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; @@ -309,12 +312,13 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.addrx, OP.constx, => { + if (context.compile_unit == null) return error.ExpressionRequiresCompileUnit; + if (context.debug_addr == null) return error.ExpressionRequiresDebugAddr; const debug_addr_index = (try readOperand(stream, opcode)).?.generic; - - // TODO: Read item from .debug_addr, this requires need DW_AT_addr_base of the compile unit, push onto stack as generic - - _ = debug_addr_index; - unreachable; + const offset = context.compile_unit.?.addr_base + debug_addr_index; + if (offset >= context.debug_addr.?.len) return error.InvalidExpression; + const value = mem.readIntSliceNative(usize, context.debug_addr.?[offset..][0..@sizeOf(usize)]); + try self.stack.append(allocator, .{ .generic = value }); }, // 2.5.1.2: Register Values @@ -464,7 +468,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // 2.5.1.4: Arithmetic and Logical Operations OP.abs => { if (self.stack.items.len == 0) return error.InvalidExpression; - const value: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const value: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ .generic = std.math.absCast(value), }; @@ -478,10 +482,10 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, OP.div => { if (self.stack.items.len < 2) return error.InvalidExpression; - const a: addr_type_signed = @bitCast(try self.stack.pop().asIntegral()); - const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ - .generic = @bitCast(try std.math.divTrunc(addr_type_signed, b, a)), + .generic = @bitCast(try std.math.divTrunc(isize, b, a)), }; }, OP.minus => { @@ -493,16 +497,16 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, OP.mod => { if (self.stack.items.len < 2) return error.InvalidExpression; - const a: addr_type_signed = @bitCast(try self.stack.pop().asIntegral()); - const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ .generic = @bitCast(@mod(b, a)), }; }, OP.mul => { if (self.stack.items.len < 2) return error.InvalidExpression; - const a: addr_type_signed = @bitCast(try self.stack.pop().asIntegral()); - const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ .generic = @bitCast(@mulWithOverflow(a, b)[0]), }; @@ -512,7 +516,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { self.stack.items[self.stack.items.len - 1] = .{ .generic = @bitCast( try std.math.negate( - @as(addr_type_signed, @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral())), + @as(isize, @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral())), ), ), }; @@ -563,9 +567,9 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.shra => { if (self.stack.items.len < 2) return error.InvalidExpression; const a = try self.stack.pop().asIntegral(); - const b: addr_type_signed = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); self.stack.items[self.stack.items.len - 1] = .{ - .generic = @bitCast(std.math.shr(addr_type_signed, b, a)), + .generic = @bitCast(std.math.shr(isize, b, a)), }; }, OP.xor => { @@ -589,8 +593,8 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { const b = self.stack.items[self.stack.items.len - 1]; if (a == .generic and b == .generic) { - const a_int: addr_type_signed = @bitCast(a.asIntegral() catch unreachable); - const b_int: addr_type_signed = @bitCast(b.asIntegral() catch unreachable); + const a_int: isize = @bitCast(a.asIntegral() catch unreachable); + const b_int: isize = @bitCast(b.asIntegral() catch unreachable); const result = @intFromBool(switch (opcode) { OP.le => b_int < a_int, OP.ge => b_int >= a_int, @@ -617,7 +621,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { if (condition) { const new_pos = std.math.cast( usize, - try std.math.add(addr_type_signed, @as(addr_type_signed, @intCast(stream.pos)), branch_offset), + try std.math.add(isize, @as(isize, @intCast(stream.pos)), branch_offset), ) orelse return error.InvalidExpression; if (new_pos < 0 or new_pos >= stream.buffer.len) return error.InvalidExpression; @@ -781,6 +785,7 @@ pub fn Builder(comptime options: ExpressionOptions) type { i32 => OP.const4s, u64 => OP.const8u, i64 => OP.const8s, + else => unreachable, }); try writer.writeInt(T, value, options.endian); @@ -988,4 +993,85 @@ test "DWARF expressions" { try testing.expectEqual(expected, stack_machine.stack.popOrNull().?.generic); } } + + // Constants + { + program.clearRetainingCapacity(); + + const expected = [_]comptime_int{ + 1, + -1, + 0x0fff, + -0x0fff, + 0x0fffffff, + -0x0fffffff, + 0x0fffffffffffffff, + -0x0fffffffffffffff, + 0x8000000, + -0x8000000, + @as(usize, @truncate(0x12345678_12345678)), + @as(usize, @truncate(0xffffffff_ffffffff)), + @as(usize, @truncate(0xeeeeeeee_eeeeeeee)), + }; + + try b.writeConst(writer, u8, expected[0]); + try b.writeConst(writer, i8, expected[1]); + try b.writeConst(writer, u16, expected[2]); + try b.writeConst(writer, i16, expected[3]); + try b.writeConst(writer, u32, expected[4]); + try b.writeConst(writer, i32, expected[5]); + try b.writeConst(writer, u64, expected[6]); + try b.writeConst(writer, i64, expected[7]); + try b.writeConst(writer, u28, expected[8]); + try b.writeConst(writer, i28, expected[9]); + try b.writeAddr(writer, expected[10]); + + var mock_compile_unit: dwarf.CompileUnit = undefined; + mock_compile_unit.addr_base = 1; + + var mock_debug_addr = std.ArrayList(u8).init(allocator); + defer mock_debug_addr.deinit(); + + try mock_debug_addr.writer().writeIntNative(u16, 0); + try mock_debug_addr.writer().writeIntNative(usize, expected[11]); + try mock_debug_addr.writer().writeIntNative(usize, expected[12]); + + const context = ExpressionContext{ + .compile_unit = &mock_compile_unit, + .debug_addr = mock_debug_addr.items, + }; + + try b.writeConstx(writer, @as(usize, 1)); + try b.writeAddrx(writer, @as(usize, 1 + @sizeOf(usize))); + + const die_offset: usize = @truncate(0xaabbccdd); + const type_bytes: []const u8 = &.{ 1, 2, 3, 4 }; + try b.writeConstType(writer, die_offset, type_bytes.len, type_bytes); + + _ = try stack_machine.run(program.items, allocator, context, 0); + + const const_type = stack_machine.stack.popOrNull().?.const_type; + try testing.expectEqual(die_offset, const_type.type_offset); + try testing.expectEqualSlices(u8, type_bytes, const_type.value_bytes); + + try testing.expectEqual(@as(usize, expected[12]), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(usize, expected[11]), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(usize, expected[10]), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(isize, @truncate(expected[9])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(usize, @truncate(expected[8])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(isize, @truncate(expected[7])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(usize, @truncate(expected[6])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(isize, @truncate(expected[5])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(usize, @truncate(expected[4])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(isize, @truncate(expected[3])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(usize, @truncate(expected[2])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(isize, @truncate(expected[1])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + try testing.expectEqual(@as(usize, @truncate(expected[0])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + } + + // Register values + var thread_context: std.debug.ThreadContext = undefined; + if (std.debug.getContext(&thread_context)) { + // TODO: Test fbreg, breg0..31, bregx, regval_type + } } From 5c0d4cef1afda3e01bded01636ef71846522909b Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 7 Jul 2023 10:13:48 -0400 Subject: [PATCH 51/81] debug: add dupeContext, store a pointer to a copy of ThreadContext on UnwindContext --- lib/std/debug.zig | 16 ++++++++++++++++ lib/std/dwarf.zig | 21 +++++++++++---------- lib/std/dwarf/call_frame.zig | 4 ++-- lib/std/dwarf/expressions.zig | 6 ++++-- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 7fac16085576..7e26b6a4b007 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -133,6 +133,9 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { } } +/// Platform-specific thread state. This contains register state, and on some platforms +/// information about the stack. This is not safe to trivially copy, because some platforms +/// use internal pointers within this structure. To make a copy, use `dupeContext`. pub const ThreadContext = blk: { if (native_os == .windows) { break :blk std.os.windows.CONTEXT; @@ -457,6 +460,19 @@ pub inline fn getContext(context: *ThreadContext) bool { return result; } +pub fn dupeContext(source: *const ThreadContext, dest: *ThreadContext) void { + if (native_os == .windows) dest.* = source.*; + if (!have_ucontext) return {}; + + return switch (native_os) { + .macos => { + dest.* = source.*; + dest.mcontext = &dest.__mcontext_data; + }, + else => dest.* = source.*, + }; +} + pub const UnwindError = if (have_ucontext) @typeInfo(@typeInfo(@TypeOf(StackIterator.next_dwarf)).Fn.return_type.?).ErrorUnion.error_set else diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 0909b6eafb1a..4b4b0587d540 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1676,7 +1676,7 @@ pub const DwarfInfo = struct { var expression_context = .{ .isValidMemory = context.isValidMemory, .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, - .thread_context = &context.thread_context, + .thread_context = context.thread_context, .reg_context = context.reg_context, .cfa = context.cfa, }; @@ -1690,7 +1690,7 @@ pub const DwarfInfo = struct { context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; - const value = mem.readIntSliceNative(usize, try abi.regBytes(&context.thread_context, register, context.reg_context)); + const value = mem.readIntSliceNative(usize, try abi.regBytes(context.thread_context, register, context.reg_context)); break :blk try call_frame.applyOffset(value, offset); }, .expression => |expression| blk: { @@ -1733,7 +1733,7 @@ pub const DwarfInfo = struct { has_next_ip = column.rule != .undefined; } - const old_value = try abi.regBytes(&context.thread_context, register, context.reg_context); + const old_value = try abi.regBytes(context.thread_context, register, context.reg_context); const new_value = try update_allocator.alloc(u8, old_value.len); const prev = update_tail; @@ -1758,12 +1758,12 @@ pub const DwarfInfo = struct { } if (has_next_ip) { - context.pc = mem.readIntSliceNative(usize, try abi.regBytes(&context.thread_context, comptime abi.ipRegNum(), context.reg_context)); + context.pc = mem.readIntSliceNative(usize, try abi.regBytes(context.thread_context, comptime abi.ipRegNum(), context.reg_context)); } else { context.pc = 0; } - mem.writeIntSliceNative(usize, try abi.regBytes(&context.thread_context, abi.spRegNum(context.reg_context), context.reg_context), context.cfa.?); + mem.writeIntSliceNative(usize, try abi.regBytes(context.thread_context, abi.spRegNum(context.reg_context), context.reg_context), context.cfa.?); // The call instruction will have pushed the address of the instruction that follows the call as the return address // However, this return address may be past the end of the function if the caller was `noreturn`. @@ -1779,7 +1779,7 @@ pub const UnwindContext = struct { allocator: mem.Allocator, cfa: ?usize, pc: usize, - thread_context: debug.ThreadContext, + thread_context: *debug.ThreadContext, reg_context: abi.RegisterContext, isValidMemory: *const fn (address: usize) bool, vm: call_frame.VirtualMachine = .{}, @@ -1788,14 +1788,14 @@ pub const UnwindContext = struct { pub fn init(allocator: mem.Allocator, thread_context: *const debug.ThreadContext, isValidMemory: *const fn (address: usize) bool) !UnwindContext { const pc = mem.readIntSliceNative(usize, try abi.regBytes(thread_context, abi.ipRegNum(), null)); - if (builtin.os.tag == .macos) @compileError("Fix below TODO"); + const context_copy = try allocator.create(debug.ThreadContext); + debug.dupeContext(thread_context, context_copy); return .{ .allocator = allocator, .cfa = null, .pc = pc, - // TODO: This is broken on macos, need a function that knows how to copy the OSs mcontext properly - .thread_context = thread_context.*, + .thread_context = context_copy, .reg_context = undefined, .isValidMemory = isValidMemory, }; @@ -1804,10 +1804,11 @@ pub const UnwindContext = struct { pub fn deinit(self: *UnwindContext) void { self.vm.deinit(self.allocator); self.stack_machine.deinit(self.allocator); + self.allocator.destroy(self.thread_context); } pub fn getFp(self: *const UnwindContext) !usize { - return mem.readIntSliceNative(usize, try abi.regBytes(&self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)); + return mem.readIntSliceNative(usize, try abi.regBytes(self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)); } }; diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 49772e2ae5a7..c2c95b8104ff 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -315,9 +315,9 @@ pub const VirtualMachine = struct { } else return error.InvalidCFA; }, .register => |register| { - const src = try abi.regBytes(&context.thread_context, register, context.reg_context); + const src = try abi.regBytes(context.thread_context, register, context.reg_context); if (src.len != out.len) return error.RegisterTypeMismatch; - @memcpy(out, try abi.regBytes(&context.thread_context, register, context.reg_context)); + @memcpy(out, try abi.regBytes(context.thread_context, register, context.reg_context)); }, .expression => |expression| { context.stack_machine.reset(); diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 98b06c0cd832..8f07b6f50092 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -1070,8 +1070,10 @@ test "DWARF expressions" { } // Register values - var thread_context: std.debug.ThreadContext = undefined; - if (std.debug.getContext(&thread_context)) { + if (@TypeOf(std.debug.ThreadContext) != void) { + var thread_context: std.debug.ThreadContext = undefined; + _ = thread_context; + // TODO: Test fbreg, breg0..31, bregx, regval_type } } From 54ca62fef4a621e0d64b6461706c8b5fe5a80348 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 7 Jul 2023 18:37:10 -0400 Subject: [PATCH 52/81] dwarf: fixup regBytes for the case where there is no context support expressions: add more tests, fix tests for mipsel debug: add lookupModuleName implementation for macos --- lib/std/debug.zig | 42 +++- lib/std/dwarf.zig | 8 +- lib/std/dwarf/abi.zig | 3 + lib/std/dwarf/call_frame.zig | 17 +- lib/std/dwarf/expressions.zig | 411 +++++++++++++++++++++++++++++----- 5 files changed, 412 insertions(+), 69 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 7e26b6a4b007..c60b9f87763f 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1518,10 +1518,10 @@ pub const DebugInfo = struct { // Returns the module name for a given address. // This can be called when getModuleForAddress fails, so implementations should provide - // a path that doesn't rely on any side-effects of successful module lookup. + // a path that doesn't rely on any side-effects of a prior successful module lookup. pub fn getModuleNameForAddress(self: *DebugInfo, address: usize) ?[]const u8 { if (comptime builtin.target.isDarwin()) { - return null; + return self.lookupModuleNameDyld(address); } else if (native_os == .windows) { return self.lookupModuleNameWin32(address); } else if (native_os == .haiku) { @@ -1590,6 +1590,44 @@ pub const DebugInfo = struct { return error.MissingDebugInfo; } + fn lookupModuleNameDyld(self: *DebugInfo, address: usize) ?[]const u8 { + _ = self; + const image_count = std.c._dyld_image_count(); + + var i: u32 = 0; + while (i < image_count) : (i += 1) { + const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); + + var it = macho.LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = @alignCast(@as( + [*]u8, + @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), + )[0..header.sizeofcmds]), + }; + + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + const segment_cmd = cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const original_address = address - vmaddr_slide; + const seg_start = segment_cmd.vmaddr; + const seg_end = seg_start + segment_cmd.vmsize; + if (original_address >= seg_start and original_address < seg_end) { + return mem.sliceTo(std.c._dyld_get_image_name(i), 0); + } + }, + else => {}, + }; + } + + return null; + } + fn lookupModuleWin32(self: *DebugInfo, address: usize) !*ModuleDebugInfo { for (self.modules.items) |*module| { if (address >= module.base_address and address < module.base_address + module.size) { diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 4b4b0587d540..44c9a90e1b40 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1699,11 +1699,13 @@ pub const DwarfInfo = struct { expression, context.allocator, expression_context, - context.cfa orelse 0, + context.cfa, ); - if (value != .generic) return error.InvalidExpressionValue; - break :blk value.generic; + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; }, else => return error.InvalidCFARule, }; diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 1927e3df1ad0..d005e4982708 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -110,11 +110,14 @@ pub fn regBytes(thread_context_ptr: anytype, reg_number: u8, reg_context: ?Regis 0...30 => mem.asBytes(&thread_context_ptr.DUMMYUNIONNAME.X[reg_number]), 31 => mem.asBytes(&thread_context_ptr.Sp), 32 => mem.asBytes(&thread_context_ptr.Pc), + else => error.InvalidRegister, }, else => error.UnimplementedArch, }; } + if (!std.debug.have_ucontext) return error.ThreadContextNotSupported; + const ucontext_ptr = thread_context_ptr; var m = &ucontext_ptr.mcontext; return switch (builtin.cpu.arch) { diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index c2c95b8104ff..0b6f45d938bb 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -322,19 +322,22 @@ pub const VirtualMachine = struct { .expression => |expression| { context.stack_machine.reset(); const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + const addr = if (value) |v| blk: { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; - if (value != .generic) return error.InvalidExpressionValue; - if (!context.isValidMemory(value.generic)) return error.InvalidExpressionAddress; - - const ptr: *usize = @ptrFromInt(value.generic); + if (!context.isValidMemory(addr)) return error.InvalidExpressionAddress; + const ptr: *usize = @ptrFromInt(addr); mem.writeIntSliceNative(usize, out, ptr.*); }, .val_expression => |expression| { context.stack_machine.reset(); const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - - if (value != .generic) return error.InvalidExpressionValue; - mem.writeIntSliceNative(usize, out, value.generic); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + mem.writeIntSliceNative(usize, out, v.generic); + } else return error.NoExpressionValue; }, .architectural => return error.UnimplementedRegisterRule, } diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 8f07b6f50092..dc3cc139b899 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -263,12 +263,12 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { expression: []const u8, allocator: std.mem.Allocator, context: ExpressionContext, - initial_value: usize, - ) !Value { - try self.stack.append(allocator, .{ .generic = initial_value }); + initial_value: ?usize, + ) !?Value { + if (initial_value) |i| try self.stack.append(allocator, .{ .generic = i }); var stream = std.io.fixedBufferStream(expression); while (try self.step(&stream, allocator, context)) {} - if (self.stack.items.len == 0) return error.InvalidExpression; + if (self.stack.items.len == 0) return null; return self.stack.items[self.stack.items.len - 1]; } @@ -412,7 +412,11 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.xderef, OP.xderef_size, OP.xderef_type, - => try self.stack.pop().asIntegral(), + => blk: { + _ = self.stack.pop(); + if (self.stack.items.len == 0) return error.InvalidExpression; + break :blk try self.stack.items[self.stack.items.len - 1].asIntegral(); + }, else => null, }; @@ -424,7 +428,9 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { const operand = try readOperand(stream, opcode); const size = switch (opcode) { - OP.deref => @sizeOf(addr_type), + OP.deref, + OP.xderef, + => @sizeOf(addr_type), OP.deref_size, OP.xderef_size, => operand.?.type_size, @@ -442,16 +448,21 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { else => return error.InvalidExpression, })) orelse return error.InvalidExpression; - if (opcode == OP.deref_type) { - self.stack.items[self.stack.items.len - 1] = .{ - .regval_type = .{ - .type_offset = operand.?.deref_type.type_offset, - .type_size = operand.?.deref_type.size, - .value = value, - }, - }; - } else { - self.stack.items[self.stack.items.len - 1] = .{ .generic = value }; + switch (opcode) { + OP.deref_type, + OP.xderef_type, + => { + self.stack.items[self.stack.items.len - 1] = .{ + .regval_type = .{ + .type_offset = operand.?.deref_type.type_offset, + .type_size = operand.?.deref_type.size, + .value = value, + }, + }; + }, + else => { + self.stack.items[self.stack.items.len - 1] = .{ .generic = value }; + }, } }, OP.push_object_address, @@ -759,7 +770,7 @@ pub fn Builder(comptime options: ExpressionOptions) type { OP.ne, OP.nop, => try writer.writeByte(opcode), - else => @compileError("This opcode requires operands, use write() instead"), + else => @compileError("This opcode requires operands, use `write()` instead"), } } @@ -836,7 +847,7 @@ pub fn Builder(comptime options: ExpressionOptions) type { pub fn writeBreg(writer: anytype, register: u8, offset: anytype) !void { if (register > 31) return error.InvalidRegister; - try writer.writeByte(OP.reg0 + register); + try writer.writeByte(OP.breg0 + register); try leb.writeILEB128(writer, offset); } @@ -848,7 +859,7 @@ pub fn Builder(comptime options: ExpressionOptions) type { pub fn writeRegvalType(writer: anytype, register: anytype, offset: anytype) !void { if (options.call_frame_context) return error.InvalidCFAOpcode; - try writer.writeByte(OP.bregx); + try writer.writeByte(OP.regval_type); try leb.writeULEB128(writer, register); try leb.writeULEB128(writer, offset); } @@ -996,35 +1007,36 @@ test "DWARF expressions" { // Constants { + stack_machine.reset(); program.clearRetainingCapacity(); - const expected = [_]comptime_int{ + const input = [_]comptime_int{ 1, -1, - 0x0fff, - -0x0fff, - 0x0fffffff, - -0x0fffffff, - 0x0fffffffffffffff, - -0x0fffffffffffffff, - 0x8000000, - -0x8000000, + @as(usize, @truncate(0x0fff)), + @as(isize, @truncate(-0x0fff)), + @as(usize, @truncate(0x0fffffff)), + @as(isize, @truncate(-0x0fffffff)), + @as(usize, @truncate(0x0fffffffffffffff)), + @as(isize, @truncate(-0x0fffffffffffffff)), + @as(usize, @truncate(0x8000000)), + @as(isize, @truncate(-0x8000000)), @as(usize, @truncate(0x12345678_12345678)), @as(usize, @truncate(0xffffffff_ffffffff)), @as(usize, @truncate(0xeeeeeeee_eeeeeeee)), }; - try b.writeConst(writer, u8, expected[0]); - try b.writeConst(writer, i8, expected[1]); - try b.writeConst(writer, u16, expected[2]); - try b.writeConst(writer, i16, expected[3]); - try b.writeConst(writer, u32, expected[4]); - try b.writeConst(writer, i32, expected[5]); - try b.writeConst(writer, u64, expected[6]); - try b.writeConst(writer, i64, expected[7]); - try b.writeConst(writer, u28, expected[8]); - try b.writeConst(writer, i28, expected[9]); - try b.writeAddr(writer, expected[10]); + try b.writeConst(writer, u8, input[0]); + try b.writeConst(writer, i8, input[1]); + try b.writeConst(writer, u16, input[2]); + try b.writeConst(writer, i16, input[3]); + try b.writeConst(writer, u32, input[4]); + try b.writeConst(writer, i32, input[5]); + try b.writeConst(writer, u64, input[6]); + try b.writeConst(writer, i64, input[7]); + try b.writeConst(writer, u28, input[8]); + try b.writeConst(writer, i28, input[9]); + try b.writeAddr(writer, input[10]); var mock_compile_unit: dwarf.CompileUnit = undefined; mock_compile_unit.addr_base = 1; @@ -1033,8 +1045,8 @@ test "DWARF expressions" { defer mock_debug_addr.deinit(); try mock_debug_addr.writer().writeIntNative(u16, 0); - try mock_debug_addr.writer().writeIntNative(usize, expected[11]); - try mock_debug_addr.writer().writeIntNative(usize, expected[12]); + try mock_debug_addr.writer().writeIntNative(usize, input[11]); + try mock_debug_addr.writer().writeIntNative(usize, input[12]); const context = ExpressionContext{ .compile_unit = &mock_compile_unit, @@ -1054,26 +1066,311 @@ test "DWARF expressions" { try testing.expectEqual(die_offset, const_type.type_offset); try testing.expectEqualSlices(u8, type_bytes, const_type.value_bytes); - try testing.expectEqual(@as(usize, expected[12]), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(usize, expected[11]), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(usize, expected[10]), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(isize, @truncate(expected[9])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(usize, @truncate(expected[8])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(isize, @truncate(expected[7])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(usize, @truncate(expected[6])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(isize, @truncate(expected[5])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(usize, @truncate(expected[4])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(isize, @truncate(expected[3])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(usize, @truncate(expected[2])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(isize, @truncate(expected[1])), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); - try testing.expectEqual(@as(usize, @truncate(expected[0])), @as(usize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + const expected = .{ + .{ usize, input[12], usize }, + .{ usize, input[11], usize }, + .{ usize, input[10], usize }, + .{ isize, input[9], isize }, + .{ usize, input[8], usize }, + .{ isize, input[7], isize }, + .{ usize, input[6], usize }, + .{ isize, input[5], isize }, + .{ usize, input[4], usize }, + .{ isize, input[3], isize }, + .{ usize, input[2], usize }, + .{ isize, input[1], isize }, + .{ usize, input[0], usize }, + }; + + inline for (expected) |e| { + try testing.expectEqual(@as(e[0], e[1]), @as(e[2], @bitCast(stack_machine.stack.popOrNull().?.generic))); + } } // Register values - if (@TypeOf(std.debug.ThreadContext) != void) { + if (@sizeOf(std.debug.ThreadContext) != 0) { + stack_machine.reset(); + program.clearRetainingCapacity(); + + const reg_context = abi.RegisterContext{ + .eh_frame = true, + .is_macho = builtin.os.tag == .macos, + }; var thread_context: std.debug.ThreadContext = undefined; - _ = thread_context; + const context = ExpressionContext{ + .thread_context = &thread_context, + .reg_context = reg_context, + }; + + // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it + + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, 0, reg_context), 0xee); + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.fpRegNum(reg_context), reg_context), 1); + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.spRegNum(reg_context), reg_context), 2); + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.ipRegNum(), reg_context), 3); - // TODO: Test fbreg, breg0..31, bregx, regval_type + try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100)); + try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200)); + try b.writeBregx(writer, abi.ipRegNum(), @as(usize, 300)); + try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); + + _ = try stack_machine.run(program.items, allocator, context, 0); + + const regval_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(@as(usize, 400), regval_type.type_offset); + try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); + try testing.expectEqual(@as(usize, 0xee), regval_type.value); + + try testing.expectEqual(@as(usize, 303), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 202), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 101), stack_machine.stack.popOrNull().?.generic); + } + + // Stack operations + { + var context = ExpressionContext{}; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 1); + try b.writeOpcode(writer, OP.dup); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 1), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 1), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 1); + try b.writeOpcode(writer, OP.drop); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expect(stack_machine.stack.popOrNull() == null); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 4); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writePick(writer, 2); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 4), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 4); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writeOpcode(writer, OP.over); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writeOpcode(writer, OP.swap); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 6), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 4); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writeOpcode(writer, OP.rot); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 4), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 6), stack_machine.stack.popOrNull().?.generic); + + const deref_target: usize = @truncate(0xffeeffee_ffeeffee); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeOpcode(writer, OP.deref); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(deref_target, stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeOpcode(writer, OP.xderef); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(deref_target, stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeDerefSize(writer, 1); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeXDerefSize(writer, 1); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), stack_machine.stack.popOrNull().?.generic); + + const type_offset: usize = @truncate(0xaabbaabb_aabbaabb); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeDerefType(writer, 1, type_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const deref_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(type_offset, deref_type.type_offset); + try testing.expectEqual(@as(u8, 1), deref_type.type_size); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), deref_type.value); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeXDerefType(writer, 1, type_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const xderef_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(type_offset, xderef_type.type_offset); + try testing.expectEqual(@as(u8, 1), xderef_type.type_size); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), xderef_type.value); + + // TODO: Test OP.push_object_address + // TODO: Test OP.form_tls_address + + context.cfa = @truncate(0xccddccdd_ccddccdd); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeOpcode(writer, OP.call_frame_cfa); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(context.cfa.?, stack_machine.stack.popOrNull().?.generic); + } + + // Arithmetic and Logical Operations + { + var context = ExpressionContext{}; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, i16, -4096); + try b.writeOpcode(writer, OP.abs); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 4096), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff0f); + try b.writeConst(writer, u16, 0xf0ff); + try b.writeOpcode(writer, OP.@"and"); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xf00f), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, i16, -404); + try b.writeConst(writer, i16, 100); + try b.writeOpcode(writer, OP.div); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(isize, -404 / 100), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 200); + try b.writeConst(writer, u16, 50); + try b.writeOpcode(writer, OP.minus); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 150), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 123); + try b.writeConst(writer, u16, 100); + try b.writeOpcode(writer, OP.mod); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 23), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff); + try b.writeConst(writer, u16, 0xee); + try b.writeOpcode(writer, OP.mul); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xed12), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 5); + try b.writeOpcode(writer, OP.neg); + try b.writeConst(writer, i16, -6); + try b.writeOpcode(writer, OP.neg); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 6), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(isize, -5), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff0f); + try b.writeOpcode(writer, OP.not); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(~@as(usize, 0xff0f), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff0f); + try b.writeConst(writer, u16, 0xf0ff); + try b.writeOpcode(writer, OP.@"or"); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xffff), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, i16, 402); + try b.writeConst(writer, i16, 100); + try b.writeOpcode(writer, OP.plus); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 502), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 4096); + try b.writePlusUconst(writer, @as(usize, 8192)); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 4096 + 8192), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xfff); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, OP.shl); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xfff << 1), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xfff); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, OP.shr); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xfff >> 1), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xfff); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, OP.shr); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @bitCast(@as(isize, 0xfff) >> 1)), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xf0ff); + try b.writeConst(writer, u16, 0xff0f); + try b.writeOpcode(writer, OP.xor); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0x0ff0), stack_machine.stack.popOrNull().?.generic); } } From 021f5378630f8acb5525974305a52314e227c276 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 8 Jul 2023 00:20:37 -0400 Subject: [PATCH 53/81] dwarf: fixup default endianness in ExpressionOptions, add control flow tests --- lib/std/dwarf/expressions.zig | 84 +++++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 9 deletions(-) diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index dc3cc139b899..5b0e97cf4578 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -33,7 +33,7 @@ pub const ExpressionOptions = struct { addr_size: u8 = @sizeOf(usize), /// Endianess of the target architecture - endian: std.builtin.Endian = .Little, + endian: std.builtin.Endian = builtin.target.cpu.arch.endian(), /// Restrict the stack machine to a subset of opcodes used in call frame instructions call_frame_context: bool = false, @@ -272,7 +272,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { return self.stack.items[self.stack.items.len - 1]; } - /// Reads an opcode and its operands from the stream and executes it + /// Reads an opcode and its operands from `stream`, then executes it pub fn step( self: *Self, stream: *std.io.FixedBufferStream([]const u8), @@ -607,7 +607,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { const a_int: isize = @bitCast(a.asIntegral() catch unreachable); const b_int: isize = @bitCast(b.asIntegral() catch unreachable); const result = @intFromBool(switch (opcode) { - OP.le => b_int < a_int, + OP.le => b_int <= a_int, OP.ge => b_int >= a_int, OP.eq => b_int == a_int, OP.lt => b_int < a_int, @@ -635,7 +635,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { try std.math.add(isize, @as(isize, @intCast(stream.pos)), branch_offset), ) orelse return error.InvalidExpression; - if (new_pos < 0 or new_pos >= stream.buffer.len) return error.InvalidExpression; + if (new_pos < 0 or new_pos > stream.buffer.len) return error.InvalidExpression; stream.pos = new_pos; } }, @@ -657,12 +657,16 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.convert => { if (self.stack.items.len == 0) return error.InvalidExpression; const type_offset = (try readOperand(stream, opcode)).?.generic; - _ = type_offset; - // TODO: Load the DW_TAG_base_type entry in context.compile_unit, find a conversion operator - // from the old type to the new type, run it. - - return error.UnimplementedTypeConversion; + // TODO: Load the DW_TAG_base_type entries in context.compile_unit and verify both types are the same size + const value = self.stack.items[self.stack.items.len - 1]; + if (type_offset == 0) { + self.stack.items[self.stack.items.len - 1] = .{ .generic = try value.asIntegral() }; + } else { + // TODO: Load the DW_TAG_base_type entry in context.compile_unit, find a conversion operator + // from the old type to the new type, run it. + return error.UnimplementedTypeConversion; + } }, OP.reinterpret => { if (self.stack.items.len == 0) return error.InvalidExpression; @@ -1373,4 +1377,66 @@ test "DWARF expressions" { _ = try stack_machine.run(program.items, allocator, context, null); try testing.expectEqual(@as(usize, 0x0ff0), stack_machine.stack.popOrNull().?.generic); } + + + // Control Flow Operations + { + var context = ExpressionContext{}; + const expected = .{ + .{ OP.le, 1, 1, 0 }, + .{ OP.ge, 1, 0, 1 }, + .{ OP.eq, 1, 0, 0 }, + .{ OP.lt, 0, 1, 0 }, + .{ OP.gt, 0, 0, 1 }, + .{ OP.ne, 0, 1, 1 }, + }; + + inline for (expected) |e| { + stack_machine.reset(); + program.clearRetainingCapacity(); + + try b.writeConst(writer, u16, 0); + try b.writeConst(writer, u16, 0); + try b.writeOpcode(writer, e[0]); + try b.writeConst(writer, u16, 0); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, e[0]); + try b.writeConst(writer, u16, 1); + try b.writeConst(writer, u16, 0); + try b.writeOpcode(writer, e[0]); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, e[3]), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, e[2]), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, e[1]), stack_machine.stack.popOrNull().?.generic); + } + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 2); + try b.writeSkip(writer, 1); + try b.writeLiteral(writer, 3); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 2), stack_machine.stack.popOrNull().?.generic); + + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 2); + try b.writeBra(writer, 1); + try b.writeLiteral(writer, 3); + try b.writeLiteral(writer, 0); + try b.writeBra(writer, 1); + try b.writeLiteral(writer, 4); + try b.writeLiteral(writer, 5); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 4), stack_machine.stack.popOrNull().?.generic); + try testing.expect(stack_machine.stack.popOrNull() == null); + + // TODO: Test call2, call4, call_ref once implemented + + } + + } + From 21d0154139fbbbb218dc7734ba0cd44b5baf2876 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 8 Jul 2023 01:16:11 -0400 Subject: [PATCH 54/81] dwarf: skip register tests on unimplemented arch / os, add tests for type convesions debug: dupeContext -> copyContext --- lib/std/debug.zig | 53 ++++++++------ lib/std/dwarf.zig | 3 +- lib/std/dwarf/expressions.zig | 125 ++++++++++++++++++++++++++++------ 3 files changed, 139 insertions(+), 42 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index c60b9f87763f..1907fdc4a1e1 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -135,7 +135,7 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { /// Platform-specific thread state. This contains register state, and on some platforms /// information about the stack. This is not safe to trivially copy, because some platforms -/// use internal pointers within this structure. To make a copy, use `dupeContext`. +/// use internal pointers within this structure. To make a copy, use `copyContext`. pub const ThreadContext = blk: { if (native_os == .windows) { break :blk std.os.windows.CONTEXT; @@ -460,7 +460,7 @@ pub inline fn getContext(context: *ThreadContext) bool { return result; } -pub fn dupeContext(source: *const ThreadContext, dest: *ThreadContext) void { +pub fn copyContext(source: *const ThreadContext, dest: *ThreadContext) void { if (native_os == .windows) dest.* = source.*; if (!have_ucontext) return {}; @@ -474,7 +474,7 @@ pub fn dupeContext(source: *const ThreadContext, dest: *ThreadContext) void { } pub const UnwindError = if (have_ucontext) - @typeInfo(@typeInfo(@TypeOf(StackIterator.next_dwarf)).Fn.return_type.?).ErrorUnion.error_set + @typeInfo(@typeInfo(@TypeOf(StackIterator.next_unwind)).Fn.return_type.?).ErrorUnion.error_set else void; @@ -619,11 +619,21 @@ pub const StackIterator = struct { } } - fn next_dwarf(self: *StackIterator) !usize { + fn next_unwind(self: *StackIterator) !usize { const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); + switch (native_os) { + .macos, .ios, .watchos, .tvos => { + const o_file_info = try module.getOFileInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc); + if (o_file_info.unwind_info == null) return error.MissingUnwindInfo; + + // TODO: Unwind using __unwind_info, + unreachable; + + }, + else => {}, + } + if (try module.getDwarfInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc)) |di| { - self.dwarf_context.reg_context.eh_frame = true; - self.dwarf_context.reg_context.is_macho = di.is_macho; return di.unwindFrame(&self.dwarf_context, module.base_address); } else return error.MissingDebugInfo; } @@ -631,7 +641,7 @@ pub const StackIterator = struct { fn next_internal(self: *StackIterator) ?usize { if (have_ucontext and self.debug_info != null) { if (self.dwarf_context.pc == 0) return null; - if (self.next_dwarf()) |return_address| { + if (self.next_unwind()) |return_address| { return return_address; } else |err| { self.last_error = err; @@ -1882,6 +1892,7 @@ pub const ModuleDebugInfo = switch (native_os) { const OFileInfo = struct { di: DW.DwarfInfo, addr_table: std.StringHashMap(u64), + unwind_info: ?[]const u8, }; fn deinit(self: *@This(), allocator: mem.Allocator) void { @@ -1939,21 +1950,24 @@ pub const ModuleDebugInfo = switch (native_os) { addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); } + var unwind_info: ?[]const u8 = null; var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; for (segcmd.?.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); + } else if (std.mem.eql(u8, "__DWARF", sect.segName())) { + var section_index: ?usize = null; + inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; + } + if (section_index == null) continue; - var section_index: ?usize = null; - inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; + const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index.?] = .{ + .data = section_bytes, + .owned = false, + }; } - if (section_index == null) continue; - - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .owned = false, - }; } const missing_debug_info = @@ -1973,6 +1987,7 @@ pub const ModuleDebugInfo = switch (native_os) { var info = OFileInfo{ .di = di, .addr_table = addr_table, + .unwind_info = unwind_info, }; // Add the debug info to the cache @@ -2030,7 +2045,7 @@ pub const ModuleDebugInfo = switch (native_os) { } } - fn getOFileInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !struct { + pub fn getOFileInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !struct { relocated_address: usize, symbol: ?*const MachoSymbol = null, o_file_info: ?*OFileInfo = null, diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 44c9a90e1b40..c8208140311a 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1683,6 +1683,7 @@ pub const DwarfInfo = struct { context.vm.reset(); context.reg_context.eh_frame = cie.version != 4; + context.reg_context.is_macho = di.is_macho; _ = try context.vm.runToNative(context.allocator, mapped_pc, cie, fde); const row = &context.vm.current_row; @@ -1791,7 +1792,7 @@ pub const UnwindContext = struct { const pc = mem.readIntSliceNative(usize, try abi.regBytes(thread_context, abi.ipRegNum(), null)); const context_copy = try allocator.create(debug.ThreadContext); - debug.dupeContext(thread_context, context_copy); + debug.copyContext(thread_context, context_copy); return .{ .allocator = allocator, diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 5b0e97cf4578..38a70e3dbd1f 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -17,7 +17,10 @@ pub const ExpressionContext = struct { /// The compilation unit this expression relates to, if any compile_unit: ?*const dwarf.CompileUnit = null, - // .debug_addr section + /// When evaluating a user-presented expression, this is the address of the object being evaluated + object_address: ?*const anyopaque = null, + + /// .debug_addr section debug_addr: ?[]const u8 = null, /// Thread context @@ -465,9 +468,11 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, } }, - OP.push_object_address, - OP.form_tls_address, - => { + OP.push_object_address => { + if (context.object_address == null) return error.IncompleteExpressionContext; + try self.stack.append(allocator, .{ .generic = @intFromPtr(context.object_address.?) }); + }, + OP.form_tls_address => { return error.UnimplementedExpressionOpcode; }, OP.call_frame_cfa => { @@ -1106,28 +1111,34 @@ test "DWARF expressions" { .reg_context = reg_context, }; - // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it + // Only test register operations on arch / os that have them implemented + if (abi.regBytes(&thread_context, 0, reg_context)) |_| { - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, 0, reg_context), 0xee); - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.fpRegNum(reg_context), reg_context), 1); - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.spRegNum(reg_context), reg_context), 2); - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.ipRegNum(), reg_context), 3); + // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it - try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100)); - try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200)); - try b.writeBregx(writer, abi.ipRegNum(), @as(usize, 300)); - try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, 0, reg_context), 0xee); + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.fpRegNum(reg_context), reg_context), 1); + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.spRegNum(reg_context), reg_context), 2); + mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.ipRegNum(), reg_context), 3); - _ = try stack_machine.run(program.items, allocator, context, 0); + try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100)); + try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200)); + try b.writeBregx(writer, abi.ipRegNum(), @as(usize, 300)); + try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); - const regval_type = stack_machine.stack.popOrNull().?.regval_type; - try testing.expectEqual(@as(usize, 400), regval_type.type_offset); - try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); - try testing.expectEqual(@as(usize, 0xee), regval_type.value); + _ = try stack_machine.run(program.items, allocator, context, 0); - try testing.expectEqual(@as(usize, 303), stack_machine.stack.popOrNull().?.generic); - try testing.expectEqual(@as(usize, 202), stack_machine.stack.popOrNull().?.generic); - try testing.expectEqual(@as(usize, 101), stack_machine.stack.popOrNull().?.generic); + const regval_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(@as(usize, 400), regval_type.type_offset); + try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); + try testing.expectEqual(@as(usize, 0xee), regval_type.value); + + try testing.expectEqual(@as(usize, 303), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 202), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 101), stack_machine.stack.popOrNull().?.generic); + } else |err| { + if (err != error.UnimplementedArch and err != error.UnimplementedOs) return err; + } } // Stack operations @@ -1242,7 +1253,14 @@ test "DWARF expressions" { try testing.expectEqual(@as(u8, 1), xderef_type.type_size); try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), xderef_type.value); - // TODO: Test OP.push_object_address + context.object_address = &deref_target; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeOpcode(writer, OP.push_object_address); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @intFromPtr(context.object_address.?)), stack_machine.stack.popOrNull().?.generic); + // TODO: Test OP.form_tls_address context.cfa = @truncate(0xccddccdd_ccddccdd); @@ -1437,6 +1455,69 @@ test "DWARF expressions" { } + // Type conversions + { + var context = ExpressionContext{}; + stack_machine.reset(); + program.clearRetainingCapacity(); + + // TODO: Test typed OP.convert once implemented + + const value: usize = @truncate(0xffeeffee_ffeeffee); + var value_bytes: [options.addr_size]u8 = undefined; + mem.writeIntSliceNative(usize, &value_bytes, value); + + // Convert to generic type + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConstType(writer, @as(usize, 0), options.addr_size, &value_bytes); + try b.writeConvert(writer, @as(usize, 0)); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(value, stack_machine.stack.popOrNull().?.generic); + + // Reinterpret to generic type + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConstType(writer, @as(usize, 0), options.addr_size, &value_bytes); + try b.writeReinterpret(writer, @as(usize, 0)); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(value, stack_machine.stack.popOrNull().?.generic); + + // Reinterpret to new type + const die_offset: usize = 0xffee; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConstType(writer, @as(usize, 0), options.addr_size, &value_bytes); + try b.writeReinterpret(writer, die_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const const_type = stack_machine.stack.popOrNull().?.const_type; + try testing.expectEqual(die_offset, const_type.type_offset); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeReinterpret(writer, die_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const regval_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(die_offset, regval_type.type_offset); + } + + // Special operations + { + var context = ExpressionContext{}; + stack_machine.reset(); + program.clearRetainingCapacity(); + + try b.writeOpcode(writer, OP.nop); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expect(stack_machine.stack.popOrNull() == null); + + + + + } + } From d226b74ae8a408ca6d363295e00fdc2876d77fb0 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 8 Jul 2023 02:52:42 -0400 Subject: [PATCH 55/81] dwarf: add ExpressionError to work around the compiler not being able to infer it dwarf: implement OP.entry_value, add tests --- lib/std/debug.zig | 1 - lib/std/dwarf.zig | 13 +- lib/std/dwarf/abi.zig | 13 +- lib/std/dwarf/expressions.zig | 221 ++++++++++++++++++++++++++-------- 4 files changed, 190 insertions(+), 58 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 1907fdc4a1e1..4c5b3f312142 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -628,7 +628,6 @@ pub const StackIterator = struct { // TODO: Unwind using __unwind_info, unreachable; - }, else => {}, } diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index c8208140311a..b284ac44437a 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1592,6 +1592,7 @@ pub const DwarfInfo = struct { entry_header.entry_bytes, -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), true, + entry_header.is_64, frame_section, entry_header.length_offset, @sizeOf(usize), @@ -1674,6 +1675,7 @@ pub const DwarfInfo = struct { } var expression_context = .{ + .is_64 = cie.is_64, .isValidMemory = context.isValidMemory, .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, .thread_context = context.thread_context, @@ -2042,6 +2044,7 @@ pub const ExceptionFrameHeader = struct { cie_entry_header.entry_bytes, 0, true, + cie_entry_header.is_64, .eh_frame, cie_entry_header.length_offset, @sizeOf(usize), @@ -2135,8 +2138,8 @@ pub const CommonInformationEntry = struct { // This is the key that FDEs use to reference CIEs. length_offset: u64, version: u8, - address_size: u8, + is_64: bool, // Only present in version 4 segment_selector_size: ?u8, @@ -2175,11 +2178,12 @@ pub const CommonInformationEntry = struct { /// of `pc_rel_offset` and `is_runtime`. /// /// `length_offset` specifies the offset of this CIE's length field in the - /// .eh_frame / .debug_framesection. + /// .eh_frame / .debug_frame section. pub fn parse( cie_bytes: []const u8, pc_rel_offset: i64, is_runtime: bool, + is_64: bool, dwarf_section: DwarfSection, length_offset: u64, addr_size_bytes: u8, @@ -2280,6 +2284,7 @@ pub const CommonInformationEntry = struct { .length_offset = length_offset, .version = version, .address_size = address_size, + .is_64 = is_64, .segment_selector_size = segment_selector_size, .code_alignment_factor = code_alignment_factor, .data_alignment_factor = data_alignment_factor, @@ -2316,8 +2321,8 @@ pub const FrameDescriptionEntry = struct { /// where the section is currently stored in memory, to where it *would* be /// stored at runtime: section runtime offset - backing section data base ptr. /// - /// Similarly, `is_runtime` specifies this function is being called on a runtime section, and so - /// indirect pointers can be followed. + /// Similarly, `is_runtime` specifies this function is being called on a runtime + /// section, and so indirect pointers can be followed. pub fn parse( fde_bytes: []const u8, pc_rel_offset: i64, diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index d005e4982708..f8a434dd7e4d 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -59,12 +59,23 @@ pub const RegisterContext = struct { is_macho: bool, }; +pub const AbiError = error{ + InvalidRegister, + UnimplementedArch, + UnimplementedOs, + ThreadContextNotSupported, +}; + /// Returns a slice containing the backing storage for `reg_number`. /// /// `reg_context` describes in what context the register number is used, as it can have different /// meanings depending on the DWARF container. It is only required when getting the stack or /// frame pointer register on some architectures. -pub fn regBytes(thread_context_ptr: anytype, reg_number: u8, reg_context: ?RegisterContext) !RegBytesReturnType(@TypeOf(thread_context_ptr)) { +pub fn regBytes( + thread_context_ptr: anytype, + reg_number: u8, + reg_context: ?RegisterContext, +) AbiError!RegBytesReturnType(@TypeOf(thread_context_ptr)) { if (builtin.os.tag == .windows) { return switch (builtin.cpu.arch) { .x86 => switch (reg_number) { diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 38a70e3dbd1f..5708f12dfd53 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -11,6 +11,9 @@ const assert = std.debug.assert; /// Callers should specify all the fields relevant to their context. If a field is required /// by the expression and it isn't in the context, error.IncompleteExpressionContext is returned. pub const ExpressionContext = struct { + /// This expression is from a DWARF64 section + is_64: bool = false, + /// If specified, any addresses will pass through this function before being isValidMemory: ?*const fn (address: usize) bool = null, @@ -29,6 +32,9 @@ pub const ExpressionContext = struct { /// Call frame address, if in a CFI context cfa: ?usize = null, + + /// This expression is a sub-expression from an OP.entry_value instruction + entry_value_context: bool = false, }; pub const ExpressionOptions = struct { @@ -42,6 +48,28 @@ pub const ExpressionOptions = struct { call_frame_context: bool = false, }; +pub const ExpressionError = error{ + UnimplementedExpressionCall, + UnimplementedOpcode, + UnimplementedUserOpcode, + UnimplementedTypedComparison, + UnimplementedTypeConversion, + + UnknownExpressionOpcode, + + IncompleteExpressionContext, + + InvalidCFAOpcode, + InvalidExpression, + InvalidFrameBase, + InvalidIntegralTypeSize, + InvalidRegister, + InvalidSubExpression, + InvalidTypeLength, + + TruncatedIntegralType, +} || abi.AbiError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero }; + /// A stack machine that can decode and run DWARF expressions. /// Expressions can be decoded for non-native address size and endianness, /// but can only be executed if the current target matches the configuration. @@ -156,12 +184,14 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { } } - pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8) !?Operand { + pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8, context: ExpressionContext) !?Operand { const reader = stream.reader(); return switch (opcode) { - OP.addr, - OP.call_ref, - => generic(try reader.readInt(addr_type, options.endian)), + OP.addr => generic(try reader.readInt(addr_type, options.endian)), + OP.call_ref => if (context.is_64) + generic(try reader.readInt(u64, options.endian)) + else + generic(try reader.readInt(u32, options.endian)), OP.const1u, OP.pick, => generic(try reader.readByte()), @@ -267,7 +297,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { allocator: std.mem.Allocator, context: ExpressionContext, initial_value: ?usize, - ) !?Value { + ) ExpressionError!?Value { if (initial_value) |i| try self.stack.append(allocator, .{ .generic = i }); var stream = std.io.fixedBufferStream(expression); while (try self.step(&stream, allocator, context)) {} @@ -281,12 +311,12 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { stream: *std.io.FixedBufferStream([]const u8), allocator: std.mem.Allocator, context: ExpressionContext, - ) !bool { + ) ExpressionError!bool { if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian()) @compileError("Execution of non-native address sizes / endianness is not supported"); const opcode = try stream.reader().readByte(); - if (options.call_frame_context and !opcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; + if (options.call_frame_context and !isOpcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; switch (opcode) { // 2.5.1.1: Literal Encodings @@ -302,10 +332,10 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.const8s, OP.constu, OP.consts, - => try self.stack.append(allocator, .{ .generic = (try readOperand(stream, opcode)).?.generic }), + => try self.stack.append(allocator, .{ .generic = (try readOperand(stream, opcode, context)).?.generic }), OP.const_type => { - const const_type = (try readOperand(stream, opcode)).?.const_type; + const const_type = (try readOperand(stream, opcode, context)).?.const_type; try self.stack.append(allocator, .{ .const_type = .{ .type_offset = const_type.type_offset, .value_bytes = const_type.value_bytes, @@ -315,9 +345,9 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.addrx, OP.constx, => { - if (context.compile_unit == null) return error.ExpressionRequiresCompileUnit; - if (context.debug_addr == null) return error.ExpressionRequiresDebugAddr; - const debug_addr_index = (try readOperand(stream, opcode)).?.generic; + if (context.compile_unit == null) return error.IncompleteExpressionContext; + if (context.debug_addr == null) return error.IncompleteExpressionContext; + const debug_addr_index = (try readOperand(stream, opcode, context)).?.generic; const offset = context.compile_unit.?.addr_base + debug_addr_index; if (offset >= context.debug_addr.?.len) return error.InvalidExpression; const value = mem.readIntSliceNative(usize, context.debug_addr.?[offset..][0..@sizeOf(usize)]); @@ -326,10 +356,10 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // 2.5.1.2: Register Values OP.fbreg => { - if (context.compile_unit == null) return error.ExpressionRequiresCompileUnit; - if (context.compile_unit.?.frame_base == null) return error.ExpressionRequiresFrameBase; + if (context.compile_unit == null) return error.IncompleteExpressionContext; + if (context.compile_unit.?.frame_base == null) return error.IncompleteExpressionContext; - const offset: i64 = @intCast((try readOperand(stream, opcode)).?.generic); + const offset: i64 = @intCast((try readOperand(stream, opcode, context)).?.generic); _ = offset; switch (context.compile_unit.?.frame_base.?.*) { @@ -353,7 +383,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { => { if (context.thread_context == null) return error.IncompleteExpressionContext; - const base_register = (try readOperand(stream, opcode)).?.base_register; + const base_register = (try readOperand(stream, opcode, context)).?.base_register; var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes( context.thread_context.?, base_register.base_register, @@ -363,7 +393,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { try self.stack.append(allocator, .{ .generic = @intCast(value) }); }, OP.regval_type => { - const register_type = (try readOperand(stream, opcode)).?.register_type; + const register_type = (try readOperand(stream, opcode, context)).?.register_type; const value = mem.readIntSliceNative(usize, try abi.regBytes( context.thread_context.?, register_type.register, @@ -387,7 +417,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { _ = self.stack.pop(); }, OP.pick, OP.over => { - const stack_index = if (opcode == OP.over) 1 else (try readOperand(stream, opcode)).?.generic; + const stack_index = if (opcode == OP.over) 1 else (try readOperand(stream, opcode, context)).?.generic; if (stack_index >= self.stack.items.len) return error.InvalidExpression; try self.stack.append(allocator, self.stack.items[self.stack.items.len - 1 - stack_index]); }, @@ -429,7 +459,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { if (context.isValidMemory) |isValidMemory| if (!isValidMemory(addr)) return error.InvalidExpression; - const operand = try readOperand(stream, opcode); + const operand = try readOperand(stream, opcode, context); const size = switch (opcode) { OP.deref, OP.xderef, @@ -469,11 +499,15 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { } }, OP.push_object_address => { - if (context.object_address == null) return error.IncompleteExpressionContext; - try self.stack.append(allocator, .{ .generic = @intFromPtr(context.object_address.?) }); + // In sub-expressions, `push_object_address` is not meaningful (as per the + // spec), so treat it like a nop + if (!context.entry_value_context) { + if (context.object_address == null) return error.IncompleteExpressionContext; + try self.stack.append(allocator, .{ .generic = @intFromPtr(context.object_address.?) }); + } }, OP.form_tls_address => { - return error.UnimplementedExpressionOpcode; + return error.UnimplementedOpcode; }, OP.call_frame_cfa => { if (context.cfa) |cfa| { @@ -559,7 +593,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, OP.plus_uconst => { if (self.stack.items.len == 0) return error.InvalidExpression; - const constant = (try readOperand(stream, opcode)).?.generic; + const constant = (try readOperand(stream, opcode, context)).?.generic; self.stack.items[self.stack.items.len - 1] = .{ .generic = try std.math.add(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), constant), }; @@ -628,7 +662,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { } }, OP.skip, OP.bra => { - const branch_offset = (try readOperand(stream, opcode)).?.branch_offset; + const branch_offset = (try readOperand(stream, opcode, context)).?.branch_offset; const condition = if (opcode == OP.bra) blk: { if (self.stack.items.len == 0) return error.InvalidExpression; break :blk try self.stack.pop().asIntegral() != 0; @@ -648,7 +682,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { OP.call4, OP.call_ref, => { - const debug_info_offset = (try readOperand(stream, opcode)).?.generic; + const debug_info_offset = (try readOperand(stream, opcode, context)).?.generic; _ = debug_info_offset; // TODO: Load a DIE entry at debug_info_offset in a .debug_info section (the spec says that it @@ -661,7 +695,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // 2.5.1.6: Type Conversions OP.convert => { if (self.stack.items.len == 0) return error.InvalidExpression; - const type_offset = (try readOperand(stream, opcode)).?.generic; + const type_offset = (try readOperand(stream, opcode, context)).?.generic; // TODO: Load the DW_TAG_base_type entries in context.compile_unit and verify both types are the same size const value = self.stack.items[self.stack.items.len - 1]; @@ -675,7 +709,7 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { }, OP.reinterpret => { if (self.stack.items.len == 0) return error.InvalidExpression; - const type_offset = (try readOperand(stream, opcode)).?.generic; + const type_offset = (try readOperand(stream, opcode, context)).?.generic; // TODO: Load the DW_TAG_base_type entries in context.compile_unit and verify both types are the same size const value = self.stack.items[self.stack.items.len - 1]; @@ -710,15 +744,29 @@ pub fn StackMachine(comptime options: ExpressionOptions) type { // 2.5.1.7: Special Operations OP.nop => {}, OP.entry_value => { - const block = (try readOperand(stream, opcode)).?.block; - _ = block; + const block = (try readOperand(stream, opcode, context)).?.block; + if (block.len == 0) return error.InvalidSubExpression; + + // TODO: The spec states that this sub-expression needs to observe the state (ie. registers) + // as it was upon entering the current subprogram. If this isn't being called at the + // end of a frame unwind operation, an additional ThreadContext with this state will be needed. - // TODO: If block is an expression, run it on a new stack. Push the resulting value onto this stack. - // TODO: If block is a register location, push the value that location had before running this program onto this stack. - // This implies capturing all register values before executing this block, in case this program modifies them. - // TODO: If the block contains, OP.push_object_address, treat it as OP.nop + if (isOpcodeRegisterLocation(block[0])) { + if (context.thread_context == null) return error.IncompleteExpressionContext; - return error.UnimplementedSubExpression; + var block_stream = std.io.fixedBufferStream(block); + const register = (try readOperand(&block_stream, block[0], context)).?.register; + const value = mem.readIntSliceNative(usize, try abi.regBytes(context.thread_context.?, register, context.reg_context)); + try self.stack.append(allocator, .{ .generic = value }); + } else { + var stack_machine: Self = .{}; + defer stack_machine.deinit(allocator); + + var sub_context = context; + sub_context.entry_value_context = true; + const result = try stack_machine.run(block, allocator, sub_context, null); + try self.stack.append(allocator, result orelse return error.InvalidSubExpression); + } }, // These have already been handled by readOperand @@ -745,7 +793,7 @@ pub fn Builder(comptime options: ExpressionOptions) type { return struct { /// Zero-operand instructions pub fn writeOpcode(writer: anytype, comptime opcode: u8) !void { - if (options.call_frame_context and !comptime opcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; + if (options.call_frame_context and !comptime isOpcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; switch (opcode) { OP.dup, OP.drop, @@ -778,6 +826,7 @@ pub fn Builder(comptime options: ExpressionOptions) type { OP.gt, OP.ne, OP.nop, + OP.stack_value, => try writer.writeByte(opcode), else => @compileError("This opcode requires operands, use `write()` instead"), } @@ -828,12 +877,12 @@ pub fn Builder(comptime options: ExpressionOptions) type { try leb.writeULEB128(writer, debug_addr_offset); } - pub fn writeConstType(writer: anytype, die_offset: anytype, size: u8, value_bytes: []const u8) !void { + pub fn writeConstType(writer: anytype, die_offset: anytype, value_bytes: []const u8) !void { if (options.call_frame_context) return error.InvalidCFAOpcode; - if (size != value_bytes.len) return error.InvalidValueSize; + if (value_bytes.len > 0xff) return error.InvalidTypeLength; try writer.writeByte(OP.const_type); try leb.writeULEB128(writer, die_offset); - try writer.writeByte(size); + try writer.writeByte(@intCast(value_bytes.len)); try writer.writeAll(value_bytes); } @@ -932,10 +981,10 @@ pub fn Builder(comptime options: ExpressionOptions) type { try writer.writeInt(T, offset, options.endian); } - pub fn writeCallRef(writer: anytype, debug_info_offset: addr_type) !void { + pub fn writeCallRef(writer: anytype, comptime is_64: bool, value: if (is_64) u64 else u32) !void { if (options.call_frame_context) return error.InvalidCFAOpcode; try writer.writeByte(OP.call_ref); - try writer.writeInt(addr_type, debug_info_offset, options.endian); + try writer.writeInt(if (is_64) u64 else u32, value, options.endian); } pub fn writeConvert(writer: anytype, die_offset: anytype) !void { @@ -959,13 +1008,29 @@ pub fn Builder(comptime options: ExpressionOptions) type { } // 2.6: Location Descriptions - // TODO + pub fn writeReg(writer: anytype, register: u8) !void { + try writer.writeByte(OP.reg0 + register); + } + + pub fn writeRegx(writer: anytype, register: anytype) !void { + try writer.writeByte(OP.regx); + try leb.writeULEB128(writer, register); + } + + pub fn writeImplicitValue(writer: anytype, value_bytes: []const u8) !void { + try writer.writeByte(OP.implicit_value); + try leb.writeULEB128(writer, value_bytes.len); + try writer.writeAll(value_bytes); + } + + // pub fn writeImplicitPointer(writer: anytype, ) void { + // } }; } // Certain opcodes are not allowed in a CFA context, see 6.4.2 -fn opcodeValidInCFA(opcode: u8) bool { +fn isOpcodeValidInCFA(opcode: u8) bool { return switch (opcode) { OP.addrx, OP.call2, @@ -984,6 +1049,13 @@ fn opcodeValidInCFA(opcode: u8) bool { }; } +fn isOpcodeRegisterLocation(opcode: u8) bool { + return switch (opcode) { + OP.reg0...OP.reg31, OP.regx => true, + else => false, + }; +} + const testing = std.testing; test "DWARF expressions" { const allocator = std.testing.allocator; @@ -1067,7 +1139,7 @@ test "DWARF expressions" { const die_offset: usize = @truncate(0xaabbccdd); const type_bytes: []const u8 = &.{ 1, 2, 3, 4 }; - try b.writeConstType(writer, die_offset, type_bytes.len, type_bytes); + try b.writeConstType(writer, die_offset, type_bytes); _ = try stack_machine.run(program.items, allocator, context, 0); @@ -1137,7 +1209,13 @@ test "DWARF expressions" { try testing.expectEqual(@as(usize, 202), stack_machine.stack.popOrNull().?.generic); try testing.expectEqual(@as(usize, 101), stack_machine.stack.popOrNull().?.generic); } else |err| { - if (err != error.UnimplementedArch and err != error.UnimplementedOs) return err; + switch (err) { + error.UnimplementedArch, + error.UnimplementedOs, + error.ThreadContextNotSupported, + => {}, + else => return err, + } } } @@ -1396,7 +1474,6 @@ test "DWARF expressions" { try testing.expectEqual(@as(usize, 0x0ff0), stack_machine.stack.popOrNull().?.generic); } - // Control Flow Operations { var context = ExpressionContext{}; @@ -1436,7 +1513,6 @@ test "DWARF expressions" { _ = try stack_machine.run(program.items, allocator, context, null); try testing.expectEqual(@as(usize, 2), stack_machine.stack.popOrNull().?.generic); - stack_machine.reset(); program.clearRetainingCapacity(); try b.writeLiteral(writer, 2); @@ -1470,7 +1546,7 @@ test "DWARF expressions" { // Convert to generic type stack_machine.reset(); program.clearRetainingCapacity(); - try b.writeConstType(writer, @as(usize, 0), options.addr_size, &value_bytes); + try b.writeConstType(writer, @as(usize, 0), &value_bytes); try b.writeConvert(writer, @as(usize, 0)); _ = try stack_machine.run(program.items, allocator, context, null); try testing.expectEqual(value, stack_machine.stack.popOrNull().?.generic); @@ -1478,7 +1554,7 @@ test "DWARF expressions" { // Reinterpret to generic type stack_machine.reset(); program.clearRetainingCapacity(); - try b.writeConstType(writer, @as(usize, 0), options.addr_size, &value_bytes); + try b.writeConstType(writer, @as(usize, 0), &value_bytes); try b.writeReinterpret(writer, @as(usize, 0)); _ = try stack_machine.run(program.items, allocator, context, null); try testing.expectEqual(value, stack_machine.stack.popOrNull().?.generic); @@ -1488,7 +1564,7 @@ test "DWARF expressions" { stack_machine.reset(); program.clearRetainingCapacity(); - try b.writeConstType(writer, @as(usize, 0), options.addr_size, &value_bytes); + try b.writeConstType(writer, @as(usize, 0), &value_bytes); try b.writeReinterpret(writer, die_offset); _ = try stack_machine.run(program.items, allocator, context, null); const const_type = stack_machine.stack.popOrNull().?.const_type; @@ -1506,18 +1582,59 @@ test "DWARF expressions" { // Special operations { var context = ExpressionContext{}; + stack_machine.reset(); program.clearRetainingCapacity(); - try b.writeOpcode(writer, OP.nop); _ = try stack_machine.run(program.items, allocator, context, null); try testing.expect(stack_machine.stack.popOrNull() == null); + // Sub-expression + { + var sub_program = std.ArrayList(u8).init(allocator); + defer sub_program.deinit(); + const sub_writer = sub_program.writer(); + try b.writeLiteral(sub_writer, 3); + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeEntryValue(writer, sub_program.items); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 3), stack_machine.stack.popOrNull().?.generic); + } + // Register location description + const reg_context = abi.RegisterContext{ + .eh_frame = true, + .is_macho = builtin.os.tag == .macos, + }; + var thread_context: std.debug.ThreadContext = undefined; + context = ExpressionContext{ + .thread_context = &thread_context, + .reg_context = reg_context, + }; - } + if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { + mem.writeIntSliceNative(usize, reg_bytes, 0xee); + var sub_program = std.ArrayList(u8).init(allocator); + defer sub_program.deinit(); + const sub_writer = sub_program.writer(); + try b.writeReg(sub_writer, 0); + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeEntryValue(writer, sub_program.items); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xee), stack_machine.stack.popOrNull().?.generic); + } else |err| { + switch (err) { + error.UnimplementedArch, + error.UnimplementedOs, + error.ThreadContextNotSupported, + => {}, + else => return err, + } + } + } } - From 94354aa6aa16274070e49cc261778f1924432ecc Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 8 Jul 2023 16:39:38 -0400 Subject: [PATCH 56/81] macho: add unwindFrame which can unwind stack frames using the __unwind_info section dwarf: fixup missing error --- lib/std/debug.zig | 53 +-- lib/std/dwarf.zig | 16 +- lib/std/dwarf/abi.zig | 47 ++- lib/std/macho.zig | 312 ++++++++++++++++++ test/standalone/dwarf_unwinding/build.zig | 2 + .../standalone/dwarf_unwinding/zig_unwind.zig | 19 ++ 6 files changed, 409 insertions(+), 40 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4c5b3f312142..9c22ee7f1284 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -623,11 +623,15 @@ pub const StackIterator = struct { const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); switch (native_os) { .macos, .ios, .watchos, .tvos => { - const o_file_info = try module.getOFileInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc); - if (o_file_info.unwind_info == null) return error.MissingUnwindInfo; - - // TODO: Unwind using __unwind_info, - unreachable; + // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding + // via DWARF before attempting to use the compact unwind info will produce incorrect results. + if (module.unwind_info) |unwind_info| { + if (macho.unwindFrame(&self.dwarf_context, unwind_info, module.base_address)) |return_address| { + return return_address; + } else |err| { + if (err != error.RequiresDWARFUnwind) return err; + } + } else return error.MissingUnwindInfo; }, else => {}, } @@ -1236,7 +1240,16 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .ncmds = hdr.ncmds, .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; + var unwind_info: ?[]const u8 = null; const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + for (cmd.getSections()) |sect| { + if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); + break; + } + } + }, .SYMTAB => break cmd.cast(macho.symtab_command).?, else => {}, } else return error.MissingDebugInfo; @@ -1346,6 +1359,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .ofiles = ModuleDebugInfo.OFileTable.init(allocator), .symbols = symbols, .strings = strings, + .unwind_info = unwind_info, }; } @@ -1886,12 +1900,13 @@ pub const ModuleDebugInfo = switch (native_os) { symbols: []const MachoSymbol, strings: [:0]const u8, ofiles: OFileTable, + // Backed by mapped_memory + unwind_info: ?[]const u8, const OFileTable = std.StringHashMap(OFileInfo); const OFileInfo = struct { di: DW.DwarfInfo, addr_table: std.StringHashMap(u64), - unwind_info: ?[]const u8, }; fn deinit(self: *@This(), allocator: mem.Allocator) void { @@ -1949,24 +1964,21 @@ pub const ModuleDebugInfo = switch (native_os) { addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); } - var unwind_info: ?[]const u8 = null; var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; for (segcmd.?.getSections()) |sect| { - if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); - } else if (std.mem.eql(u8, "__DWARF", sect.segName())) { - var section_index: ?usize = null; - inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; - } - if (section_index == null) continue; + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .owned = false, - }; + var section_index: ?usize = null; + inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; } + if (section_index == null) continue; + + const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index.?] = .{ + .data = section_bytes, + .owned = false, + }; } const missing_debug_info = @@ -1986,7 +1998,6 @@ pub const ModuleDebugInfo = switch (native_os) { var info = OFileInfo{ .di = di, .addr_table = addr_table, - .unwind_info = unwind_info, }; // Add the debug info to the cache diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index b284ac44437a..e51b883a999d 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1641,7 +1641,6 @@ pub const DwarfInfo = struct { // instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly // as pointers will be decoded relative to the alreayd-mapped .eh_frame. var mapped_pc: usize = undefined; - if (di.eh_frame_hdr) |header| { const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; mapped_pc = context.pc; @@ -1657,16 +1656,12 @@ pub const DwarfInfo = struct { mapped_pc = context.pc - module_base_address; const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct { pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { - if (pc < mid_item.pc_begin) { - return .lt; - } else { - const range_end = mid_item.pc_begin + mid_item.pc_range; - if (pc < range_end) { - return .eq; - } + if (pc < mid_item.pc_begin) return .lt; - return .gt; - } + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) return .eq; + + return .gt; } }.compareFn); @@ -2000,6 +1995,7 @@ pub const ExceptionFrameHeader = struct { } } + if (len == 0) return badDwarf(); try stream.seekTo(left * entry_size); // Read past the pc_begin field of the entry diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index f8a434dd7e4d..0857732e9bc0 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -45,15 +45,6 @@ pub fn spRegNum(reg_context: RegisterContext) u8 { }; } -fn RegBytesReturnType(comptime ContextPtrType: type) type { - const info = @typeInfo(ContextPtrType); - if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { - @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); - } - - return if (info.Pointer.is_const) return []const u8 else []u8; -} - pub const RegisterContext = struct { eh_frame: bool, is_macho: bool, @@ -63,9 +54,47 @@ pub const AbiError = error{ InvalidRegister, UnimplementedArch, UnimplementedOs, + RegisterContextRequired, ThreadContextNotSupported, }; +fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type { + const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType); + const info = @typeInfo(reg_bytes_type).Pointer; + return @Type(.{ + .Pointer = .{ + .size = .One, + .is_const = info.is_const, + .is_volatile = info.is_volatile, + .is_allowzero = info.is_allowzero, + .alignment = info.alignment, + .address_space = info.address_space, + .child = T, + .sentinel = null, + }, + }); +} + +pub fn regValueNative( + comptime T: type, + thread_context_ptr: anytype, + reg_number: u8, + reg_context: ?RegisterContext, +) !RegValueReturnType(@TypeOf(thread_context_ptr), T) { + const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); + if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize; + return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]); +} + +fn RegBytesReturnType(comptime ContextPtrType: type) type { + const info = @typeInfo(ContextPtrType); + if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { + @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); + } + + return if (info.Pointer.is_const) return []const u8 else []u8; +} + /// Returns a slice containing the backing storage for `reg_number`. /// /// `reg_context` describes in what context the register number is used, as it can have different diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 1b886e2d903a..03cb02e3e829 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -2064,3 +2064,315 @@ pub const UNWIND_ARM64_FRAME_D14_D15_PAIR: u32 = 0x00000800; pub const UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK: u32 = 0x00FFF000; pub const UNWIND_ARM64_DWARF_SECTION_OFFSET: u32 = 0x00FFFFFF; + +pub const CompactUnwindEncoding = packed struct(u32) { + value: packed union { + x86_64: packed union { + frame: packed struct(u24) { + reg4: u3, + reg3: u3, + reg2: u3, + reg1: u3, + reg0: u3, + unused: u1 = 0, + frame_offset: u8, + }, + frameless: packed struct(u24) { + stack_reg_permutation: u10, + stack_reg_count: u3, + stack_adjust: u3, + stack_size: u8, + }, + dwarf: u24, + }, + arm64: packed union { + frame: packed struct(u24) { + x_reg_pairs: packed struct { + x19_x20: u1, + x21_x22: u1, + x23_x24: u1, + x25_x26: u1, + x27_x28: u1, + }, + d_reg_pairs: packed struct { + d8_d9: u1, + d10_d11: u1, + d12_d13: u1, + d14_d15: u1, + }, + unused: u15, + }, + frameless: packed struct(u24) { + unused: u12 = 0, + stack_size: u12, + }, + dwarf: u24, + }, + }, + mode: packed union { + x86_64: UNWIND_X86_64_MODE, + arm64: UNWIND_ARM64_MODE, + }, + personality_index: u2, + has_lsda: u1, + start: u1, +}; + +/// Returns the DWARF register number for an x86_64 register number found in compact unwind info +fn dwarfRegNumber(unwind_reg_number: u3) !u8 { + return switch (unwind_reg_number) { + 1 => 3, // RBX + 2 => 12, // R12 + 3 => 13, // R13 + 4 => 14, // R14 + 5 => 15, // R15 + 6 => 6, // RBP + else => error.InvalidUnwindRegisterNumber, + }; +} + +const dwarf = std.dwarf; +const abi = dwarf.abi; + +pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, module_base_address: usize) !usize { + const header = mem.bytesAsValue( + unwind_info_section_header, + unwind_info[0..@sizeOf(unwind_info_section_header)], + ); + const indices = mem.bytesAsSlice( + unwind_info_section_header_index_entry, + unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(unwind_info_section_header_index_entry)], + ); + if (indices.len == 0) return error.MissingUnwindInfo; + + const mapped_pc = context.pc - module_base_address; + const second_level_index = blk: { + var left: usize = 0; + var len: usize = indices.len; + + while (len > 1) { + const mid = left + len / 2; + const offset = indices[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + // Last index is a sentinel containing the highest address as its functionOffset + if (len == 0 or indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + break :blk &indices[left]; + }; + + const common_encodings = mem.bytesAsSlice( + compact_unwind_encoding_t, + unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(compact_unwind_encoding_t)], + ); + + const start_offset = second_level_index.secondLevelPagesSectionOffset; + const kind = mem.bytesAsValue( + UNWIND_SECOND_LEVEL, + unwind_info[start_offset..][0..@sizeOf(UNWIND_SECOND_LEVEL)], + ); + const raw_encoding = switch (kind.*) { + .REGULAR => blk: { + const page_header = mem.bytesAsValue( + unwind_info_regular_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(unwind_info_regular_second_level_page_header)], + ); + + const entries = mem.bytesAsSlice( + unwind_info_regular_second_level_entry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(unwind_info_regular_second_level_entry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = entries[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + if (len == 0) return error.InvalidUnwindInfo; + break :blk entries[left].encoding; + }, + .COMPRESSED => blk: { + const page_header = mem.bytesAsValue( + unwind_info_compressed_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(unwind_info_compressed_second_level_page_header)], + ); + + const entries = mem.bytesAsSlice( + UnwindInfoCompressedEntry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(UnwindInfoCompressedEntry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = second_level_index.functionOffset + entries[mid].funcOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + if (len == 0) return error.InvalidUnwindInfo; + const entry = entries[left]; + if (entry.encodingIndex < header.commonEncodingsArrayCount) { + if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; + break :blk common_encodings[entry.encodingIndex]; + } else { + const local_index = try std.math.sub( + u8, + entry.encodingIndex, + std.math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, + ); + const local_encodings = mem.bytesAsSlice( + compact_unwind_encoding_t, + unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(compact_unwind_encoding_t)], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :blk local_encodings[local_index]; + } + }, + else => return error.InvalidUnwindInfo, + }; + + if (raw_encoding == 0) return error.NoUnwindInfo; + const reg_context = dwarf.abi.RegisterContext{ + .eh_frame = false, + .is_macho = true, + }; + + const encoding: CompactUnwindEncoding = @bitCast(raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => blk: { + const regs: [5]u3 = .{ + encoding.value.x86_64.frame.reg0, + encoding.value.x86_64.frame.reg1, + encoding.value.x86_64.frame.reg2, + encoding.value.x86_64.frame.reg3, + encoding.value.x86_64.frame.reg4, + }; + + const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); + var max_reg: usize = 0; + inline for (regs, 0..) |reg, i| { + if (reg > 0) max_reg = i; + } + + const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + // Verify the stack range we're about to read register values from is valid + if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo; + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame_offset + i * @sizeOf(usize); + const reg_number = try dwarfRegNumber(reg); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :blk new_ip; + }, + .STACK_IMMD => blk: { + const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + + // Decode Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = encoding.value.x86_64.frameless.stack_reg_count; + const ip_ptr = if (reg_count > 0) reg_blk: { + var digits: [6]u3 = undefined; + var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; + var registers: [reg_numbers.len]u3 = undefined; + var used_indices = [_]bool{false} ** reg_numbers.len; + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + + registers[i] = reg_numbers[unused_index]; + used_indices[unused_index] = true; + } + + var reg_addr = sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - reg_count - 1)) * @sizeOf(usize); + if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo; + for (0..reg_count) |i| { + const reg_number = try dwarfRegNumber(registers[i]); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :reg_blk reg_addr; + } else sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - 1)) * @sizeOf(usize); + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo; + + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + break :blk new_ip; + }, + .STACK_IND => { + return error.UnimplementedUnwindEncoding; // TODO + }, + .DWARF => return error.RequiresDWARFUnwind, + }, + .aarch64 => switch (encoding.mode.x86_64) { + .DWARF => return error.RequiresDWARFUnwind, + else => return error.UnimplementedUnwindEncoding, + }, + else => return error.UnimplementedArch, + }; + + context.pc = new_ip; + if (context.pc > 0) context.pc -= 1; + return new_ip; +} diff --git a/test/standalone/dwarf_unwinding/build.zig b/test/standalone/dwarf_unwinding/build.zig index c59effda9fde..885207f068a1 100644 --- a/test/standalone/dwarf_unwinding/build.zig +++ b/test/standalone/dwarf_unwinding/build.zig @@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + if (target.isDarwin()) exe.unwind_tables = true; exe.omit_frame_pointer = true; const run_cmd = b.addRunArtifact(exe); @@ -43,6 +44,7 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + if (target.isDarwin()) exe.unwind_tables = true; exe.omit_frame_pointer = true; exe.linkLibrary(c_shared_lib); diff --git a/test/standalone/dwarf_unwinding/zig_unwind.zig b/test/standalone/dwarf_unwinding/zig_unwind.zig index 3b13de24a1de..d82bdaa7db10 100644 --- a/test/standalone/dwarf_unwinding/zig_unwind.zig +++ b/test/standalone/dwarf_unwinding/zig_unwind.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const debug = std.debug; const testing = std.testing; @@ -18,6 +19,24 @@ noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { } noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { + if (builtin.os.tag == .macos) { + // Excercise different __unwind_info encodings by forcing some registers to be restored + switch (builtin.cpu.arch) { + .x86_64 => { + asm volatile ( + \\movq $3, %%rbx + \\movq $12, %%r12 + \\movq $13, %%r13 + \\movq $14, %%r14 + \\movq $15, %%r15 + \\movq $6, %%rbp + ::: "rbx", "r12", "r13", "r14", "r15", "rbp"); + }, + .aarch64 => {}, + else => {}, + } + } + expected[1] = @returnAddress(); frame3(expected, unwound); } From 203d96ae97682703f39d7d7ea4e45f971a3a6043 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 9 Jul 2023 14:45:34 -0400 Subject: [PATCH 57/81] debug: add relocateContext dwarf: fixup tests that used a ThreadContext --- lib/std/debug.zig | 96 ++++++++++--------- lib/std/dwarf/expressions.zig | 13 ++- .../standalone/dwarf_unwinding/zig_unwind.zig | 15 ++- 3 files changed, 69 insertions(+), 55 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 9c22ee7f1284..26ba36784a24 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -133,6 +133,12 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { } } +pub const have_ucontext = @hasDecl(os.system, "ucontext_t") and + (builtin.os.tag != .linux or switch (builtin.cpu.arch) { + .mips, .mipsel, .mips64, .mips64el, .riscv64 => false, + else => true, +}); + /// Platform-specific thread state. This contains register state, and on some platforms /// information about the stack. This is not safe to trivially copy, because some platforms /// use internal pointers within this structure. To make a copy, use `copyContext`. @@ -146,6 +152,47 @@ pub const ThreadContext = blk: { } }; +/// Copies one context to another, updating any internal pointers +pub fn copyContext(source: *const ThreadContext, dest: *ThreadContext) void { + if (!have_ucontext) return {}; + dest.* = source.*; + relocateContext(dest); +} + +/// Updates any internal points in the context to reflect its current location +pub fn relocateContext(context: *ThreadContext) void { + return switch (native_os) { + .macos => { + context.mcontext = &context.__mcontext_data; + }, + else => {}, + }; +} + +pub const have_getcontext = @hasDecl(os.system, "getcontext") and + (builtin.os.tag != .linux or switch (builtin.cpu.arch) { + .x86, .x86_64 => true, + else => false, +}); + +/// Capture the current context. The register values in the context will reflect the +/// state after the platform `getcontext` function returned. +/// +/// It is valid to call this if the platform doesn't have context capturing support, +/// in that case false will be returned. +pub inline fn getContext(context: *ThreadContext) bool { + if (native_os == .windows) { + context.* = std.mem.zeroes(windows.CONTEXT); + windows.ntdll.RtlCaptureContext(context); + return true; + } + + const result = have_getcontext and os.system.getcontext(context) == 0; + if (native_os == .macos) assert(context.mcsize == @sizeOf(std.c.mcontext_t)); + + return result; +} + /// Tries to print the stack trace starting from the supplied base pointer to stderr, /// unbuffered, and ignores any error returned. /// TODO multithreaded awareness @@ -428,51 +475,6 @@ pub fn writeStackTrace( } } -pub const have_getcontext = @hasDecl(os.system, "getcontext") and - (builtin.os.tag != .linux or switch (builtin.cpu.arch) { - .x86, .x86_64 => true, - else => false, -}); - -pub const have_ucontext = @hasDecl(os.system, "ucontext_t") and - (builtin.os.tag != .linux or switch (builtin.cpu.arch) { - .mips, .mipsel, .mips64, .mips64el, .riscv64 => false, - else => true, -}); - -pub inline fn getContext(context: *ThreadContext) bool { - if (native_os == .windows) { - context.* = std.mem.zeroes(windows.CONTEXT); - windows.ntdll.RtlCaptureContext(context); - return true; - } - - const result = have_getcontext and os.system.getcontext(context) == 0; - if (native_os == .macos) { - // TODO: Temp, to discover this size via aarch64 CI - if (context.mcsize != @sizeOf(std.c.mcontext_t)) { - print("context.mcsize does not match! {} vs {}\n", .{ context.mcsize, @sizeOf(std.c.mcontext_t) }); - } - - assert(context.mcsize == @sizeOf(std.c.mcontext_t)); - } - - return result; -} - -pub fn copyContext(source: *const ThreadContext, dest: *ThreadContext) void { - if (native_os == .windows) dest.* = source.*; - if (!have_ucontext) return {}; - - return switch (native_os) { - .macos => { - dest.* = source.*; - dest.mcontext = &dest.__mcontext_data; - }, - else => dest.* = source.*, - }; -} - pub const UnwindError = if (have_ucontext) @typeInfo(@typeInfo(@TypeOf(StackIterator.next_unwind)).Fn.return_type.?).ErrorUnion.error_set else @@ -855,7 +857,7 @@ fn printUnknownSource(debug_info: *DebugInfo, out_stream: anytype, address: usiz pub fn printUnwindError(debug_info: *DebugInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); - try out_stream.print("Unwind information for {s} was not available ({}), trace may be incomplete\n\n", .{ module_name, err }); + try out_stream.print("Unwind information for `{s}` was not available ({}), trace may be incomplete\n\n", .{ module_name, err }); try tty_config.setColor(out_stream, .reset); } @@ -1641,7 +1643,7 @@ pub const DebugInfo = struct { const seg_start = segment_cmd.vmaddr; const seg_end = seg_start + segment_cmd.vmsize; if (original_address >= seg_start and original_address < seg_end) { - return mem.sliceTo(std.c._dyld_get_image_name(i), 0); + return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); } }, else => {}, diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 5708f12dfd53..a395c95a89af 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -48,6 +48,7 @@ pub const ExpressionOptions = struct { call_frame_context: bool = false, }; +// Explcitly defined to support executing sub-expressions pub const ExpressionError = error{ UnimplementedExpressionCall, UnimplementedOpcode, @@ -1178,20 +1179,21 @@ test "DWARF expressions" { .is_macho = builtin.os.tag == .macos, }; var thread_context: std.debug.ThreadContext = undefined; + std.debug.relocateContext(&thread_context); const context = ExpressionContext{ .thread_context = &thread_context, .reg_context = reg_context, }; // Only test register operations on arch / os that have them implemented - if (abi.regBytes(&thread_context, 0, reg_context)) |_| { + if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, 0, reg_context), 0xee); - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.fpRegNum(reg_context), reg_context), 1); - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.spRegNum(reg_context), reg_context), 2); - mem.writeIntSliceNative(usize, try abi.regBytes(&thread_context, abi.ipRegNum(), reg_context), 3); + mem.writeIntSliceNative(usize, reg_bytes, 0xee); + (try abi.regValueNative(usize, &thread_context, abi.fpRegNum(reg_context), reg_context)).* = 1; + (try abi.regValueNative(usize, &thread_context, abi.spRegNum(reg_context), reg_context)).* = 2; + (try abi.regValueNative(usize, &thread_context, abi.ipRegNum(), reg_context)).* = 3; try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100)); try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200)); @@ -1609,6 +1611,7 @@ test "DWARF expressions" { .is_macho = builtin.os.tag == .macos, }; var thread_context: std.debug.ThreadContext = undefined; + std.debug.relocateContext(&thread_context); context = ExpressionContext{ .thread_context = &thread_context, .reg_context = reg_context, diff --git a/test/standalone/dwarf_unwinding/zig_unwind.zig b/test/standalone/dwarf_unwinding/zig_unwind.zig index d82bdaa7db10..9ef1b5719754 100644 --- a/test/standalone/dwarf_unwinding/zig_unwind.zig +++ b/test/standalone/dwarf_unwinding/zig_unwind.zig @@ -19,9 +19,19 @@ noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { } noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { - if (builtin.os.tag == .macos) { - // Excercise different __unwind_info encodings by forcing some registers to be restored + // Excercise different __unwind_info / DWARF CFI encodings by forcing some registers to be restored + if (builtin.target.ofmt != .c) { switch (builtin.cpu.arch) { + .x86 => { + asm volatile ( + \\movl $3, %%ebx + \\movl $1, %%ecx + \\movl $2, %%edx + \\movl $7, %%edi + \\movl $6, %%esi + \\movl $5, %%ebp + ::: "ebx", "ecx", "edx", "edi", "esi", "ebp"); + }, .x86_64 => { asm volatile ( \\movq $3, %%rbx @@ -32,7 +42,6 @@ noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { \\movq $6, %%rbp ::: "rbx", "r12", "r13", "r14", "r15", "rbp"); }, - .aarch64 => {}, else => {}, } } From 5dfb159e15dc7c66118d47a06536d61f65522bb9 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 9 Jul 2023 19:32:29 -0400 Subject: [PATCH 58/81] macho: add aarch64 implementation to unwindFrame dwarf: map the V registers in abi.regBytes test: add test case that exercises the stack-indirect __unwind_info mode in x86_64 --- lib/std/debug.zig | 4 +- lib/std/dwarf/abi.zig | 2 + lib/std/macho.zig | 133 +++++++++++++++--- .../standalone/dwarf_unwinding/zig_unwind.zig | 6 + 4 files changed, 120 insertions(+), 25 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 26ba36784a24..caab6005721c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -517,9 +517,7 @@ pub const StackIterator = struct { } pub fn deinit(self: *StackIterator) void { - if (have_ucontext and self.debug_info != null) { - self.dwarf_context.deinit(); - } + if (have_ucontext and self.debug_info != null) self.dwarf_context.deinit(); } pub fn getLastError(self: *StackIterator) ?struct { diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 0857732e9bc0..34d9d3734fe8 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -312,6 +312,8 @@ pub fn regBytes( 30 => mem.asBytes(&ucontext_ptr.mcontext.ss.lr), 31 => mem.asBytes(&ucontext_ptr.mcontext.ss.sp), 32 => mem.asBytes(&ucontext_ptr.mcontext.ss.pc), + // V0-V31 + 64...95 => mem.asBytes(&ucontext_ptr.mcontext.ns.q[reg_number - 64]), else => error.InvalidRegister, }, .netbsd => switch (reg_number) { diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 03cb02e3e829..b5d58704eba5 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -2080,30 +2080,38 @@ pub const CompactUnwindEncoding = packed struct(u32) { frameless: packed struct(u24) { stack_reg_permutation: u10, stack_reg_count: u3, - stack_adjust: u3, - stack_size: u8, + stack: packed union { + direct: packed struct(u11) { + _: u3, + stack_size: u8, + }, + indirect: packed struct(u11) { + stack_adjust: u3, + sub_offset: u8, + }, + }, }, dwarf: u24, }, arm64: packed union { frame: packed struct(u24) { - x_reg_pairs: packed struct { + x_reg_pairs: packed struct(u5) { x19_x20: u1, x21_x22: u1, x23_x24: u1, x25_x26: u1, x27_x28: u1, }, - d_reg_pairs: packed struct { + d_reg_pairs: packed struct(u4) { d8_d9: u1, d10_d11: u1, d12_d13: u1, d14_d15: u1, }, - unused: u15, + _: u15, }, frameless: packed struct(u24) { - unused: u12 = 0, + _: u12 = 0, stack_size: u12, }, dwarf: u24, @@ -2177,7 +2185,11 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul UNWIND_SECOND_LEVEL, unwind_info[start_offset..][0..@sizeOf(UNWIND_SECOND_LEVEL)], ); - const raw_encoding = switch (kind.*) { + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { .REGULAR => blk: { const page_header = mem.bytesAsValue( unwind_info_regular_second_level_page_header, @@ -2205,7 +2217,10 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul } if (len == 0) return error.InvalidUnwindInfo; - break :blk entries[left].encoding; + break :blk .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; }, .COMPRESSED => blk: { const page_header = mem.bytesAsValue( @@ -2235,9 +2250,13 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul if (len == 0) return error.InvalidUnwindInfo; const entry = entries[left]; + const function_offset = second_level_index.functionOffset + entry.funcOffset; if (entry.encodingIndex < header.commonEncodingsArrayCount) { if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; - break :blk common_encodings[entry.encodingIndex]; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; } else { const local_index = try std.math.sub( u8, @@ -2249,19 +2268,22 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(compact_unwind_encoding_t)], ); if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :blk local_encodings[local_index]; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; } }, else => return error.InvalidUnwindInfo, }; - if (raw_encoding == 0) return error.NoUnwindInfo; + if (entry.raw_encoding == 0) return error.NoUnwindInfo; const reg_context = dwarf.abi.RegisterContext{ .eh_frame = false, .is_macho = true, }; - const encoding: CompactUnwindEncoding = @bitCast(raw_encoding); + const encoding: CompactUnwindEncoding = @bitCast(entry.raw_encoding); const new_ip = switch (builtin.cpu.arch) { .x86_64 => switch (encoding.mode.x86_64) { .OLD => return error.UnimplementedUnwindEncoding, @@ -2283,7 +2305,7 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; const new_sp = fp + 2 * @sizeOf(usize); - // Verify the stack range we're about to read register values from is valid + // Verify the stack range we're about to read register values from if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo; const ip_ptr = fp + @sizeOf(usize); @@ -2303,10 +2325,26 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul break :blk new_ip; }, - .STACK_IMMD => blk: { + .STACK_IMMD, + .STACK_IND, + => blk: { const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) + @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) + else stack_size: { + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module_base_address + + entry.function_offset + + encoding.value.x86_64.frameless.stack.indirect.sub_offset; + if (!context.isValidMemory(sub_offset_addr)) return error.InvalidUnwindInfo; + + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); + }; - // Decode Lehmer-coded sequence of registers. + // Decode the Lehmer-coded sequence of registers. // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h // Decode the variable-based permutation number into its digits. Each digit represents @@ -2340,7 +2378,7 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul used_indices[unused_index] = true; } - var reg_addr = sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - reg_count - 1)) * @sizeOf(usize); + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo; for (0..reg_count) |i| { const reg_number = try dwarfRegNumber(registers[i]); @@ -2349,7 +2387,7 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul } break :reg_blk reg_addr; - } else sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - 1)) * @sizeOf(usize); + } else sp + stack_size - @sizeOf(usize); const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; const new_sp = ip_ptr + @sizeOf(usize); @@ -2360,14 +2398,65 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul break :blk new_ip; }, - .STACK_IND => { - return error.UnimplementedUnwindEncoding; // TODO - }, .DWARF => return error.RequiresDWARFUnwind, }, - .aarch64 => switch (encoding.mode.x86_64) { + .aarch64 => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => blk: { + const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try abi.regValueNative(usize, context.thread_context, 30, reg_context)).*; + if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo; + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + break :blk new_ip; + }, .DWARF => return error.RequiresDWARFUnwind, - else => return error.UnimplementedUnwindEncoding, + .FRAME => { + const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 16; + const ip_ptr = fp + @sizeOf(usize); + + const num_restored_pairs: usize = + @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + + @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); + const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); + + if (!context.isValidMemory(new_sp) or !context.isValidMemory(min_reg_addr)) return error.InvalidUnwindInfo; + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { + (try abi.regValueNative(usize, context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try abi.regValueNative(usize, context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + @memcpy( + try abi.regBytes(context.thread_context, 64 + 8 + i, context.reg_context), + mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + @memcpy( + try abi.regBytes(context.thread_context, 64 + 9 + i, context.reg_context), + mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + return error.UnimplementedUnwindEncoding; + }, }, else => return error.UnimplementedArch, }; diff --git a/test/standalone/dwarf_unwinding/zig_unwind.zig b/test/standalone/dwarf_unwinding/zig_unwind.zig index 9ef1b5719754..2d7e098eff47 100644 --- a/test/standalone/dwarf_unwinding/zig_unwind.zig +++ b/test/standalone/dwarf_unwinding/zig_unwind.zig @@ -52,6 +52,12 @@ noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { noinline fn frame1(expected: *[4]usize, unwound: *[4]usize) void { expected[2] = @returnAddress(); + + // Use a stack frame that is too big to encode in __unwind_info's stack-immediate encoding + // to exercise the stack-indirect encoding path + var pad: [std.math.maxInt(u8) * @sizeOf(usize) + 1]u8 = undefined; + _ = pad; + frame2(expected, unwound); } From 891fa3b8b54428ad8065de8660869cd38876e429 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 10 Jul 2023 18:43:19 -0400 Subject: [PATCH 59/81] debug: fix initialization of the optional fields on StackIterator dwarf: documentation fixups target: enable unwind tables on macho --- lib/std/debug.zig | 80 ++++++++++++++++++----------------- lib/std/dwarf.zig | 6 +-- lib/std/dwarf/abi.zig | 9 ++-- lib/std/dwarf/expressions.zig | 5 +-- lib/std/os/linux.zig | 2 +- src/target.zig | 2 +- 6 files changed, 53 insertions(+), 51 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index caab6005721c..e2f890b97b62 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -159,7 +159,7 @@ pub fn copyContext(source: *const ThreadContext, dest: *ThreadContext) void { relocateContext(dest); } -/// Updates any internal points in the context to reflect its current location +/// Updates any internal pointers in the context to reflect its current location pub fn relocateContext(context: *ThreadContext) void { return switch (native_os) { .macos => { @@ -176,7 +176,7 @@ pub const have_getcontext = @hasDecl(os.system, "getcontext") and }); /// Capture the current context. The register values in the context will reflect the -/// state after the platform `getcontext` function returned. +/// state after the platform `getcontext` function returns. /// /// It is valid to call this if the platform doesn't have context capturing support, /// in that case false will be returned. @@ -229,7 +229,7 @@ pub fn dumpStackTraceFromBase(context: *const ThreadContext) void { var it = StackIterator.initWithContext(null, debug_info, context) catch return; defer it.deinit(); - printSourceAtAddress(debug_info, stderr, it.dwarf_context.pc, tty_config) catch return; + printSourceAtAddress(debug_info, stderr, it.unwind_state.?.dwarf_context.pc, tty_config) catch return; while (it.next()) |return_address| { if (it.getLastError()) |unwind_error| @@ -487,11 +487,13 @@ pub const StackIterator = struct { fp: usize, // When DebugInfo and a register context is available, this iterator can unwind - // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer). - debug_info: ?*DebugInfo, - dwarf_context: if (have_ucontext) DW.UnwindContext else void = undefined, - last_error: if (have_ucontext) ?UnwindError else void = undefined, - last_error_address: if (have_ucontext) usize else void = undefined, + // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer), + // using DWARF and MachO unwind info. + unwind_state: if (have_ucontext) ?struct { + debug_info: *DebugInfo, + dwarf_context: DW.UnwindContext, + last_error: ?UnwindError = null, + } else void = if (have_ucontext) null else {}, pub fn init(first_address: ?usize, fp: ?usize) StackIterator { if (native_arch == .sparc64) { @@ -504,32 +506,33 @@ pub const StackIterator = struct { return StackIterator{ .first_address = first_address, .fp = fp orelse @frameAddress(), - .debug_info = null, }; } pub fn initWithContext(first_address: ?usize, debug_info: *DebugInfo, context: *const os.ucontext_t) !StackIterator { var iterator = init(first_address, null); - iterator.debug_info = debug_info; - iterator.dwarf_context = try DW.UnwindContext.init(debug_info.allocator, context, &isValidMemory); - iterator.last_error = null; + iterator.unwind_state = .{ + .debug_info = debug_info, + .dwarf_context = try DW.UnwindContext.init(debug_info.allocator, context, &isValidMemory), + }; + return iterator; } pub fn deinit(self: *StackIterator) void { - if (have_ucontext and self.debug_info != null) self.dwarf_context.deinit(); + if (have_ucontext and self.unwind_state != null) self.unwind_state.?.dwarf_context.deinit(); } pub fn getLastError(self: *StackIterator) ?struct { - address: usize, err: UnwindError, + address: usize, } { - if (have_ucontext) { - if (self.last_error) |err| { - self.last_error = null; + if (!have_ucontext) return null; + if (self.unwind_state) |*unwind_state| { + if (unwind_state.last_error) |err| { return .{ - .address = self.last_error_address, .err = err, + .address = unwind_state.dwarf_context.pc, }; } } @@ -620,13 +623,14 @@ pub const StackIterator = struct { } fn next_unwind(self: *StackIterator) !usize { - const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); + const unwind_state = &self.unwind_state.?; + const module = try unwind_state.debug_info.getModuleForAddress(unwind_state.dwarf_context.pc); switch (native_os) { .macos, .ios, .watchos, .tvos => { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. if (module.unwind_info) |unwind_info| { - if (macho.unwindFrame(&self.dwarf_context, unwind_info, module.base_address)) |return_address| { + if (macho.unwindFrame(&unwind_state.dwarf_context, unwind_info, module.base_address)) |return_address| { return return_address; } else |err| { if (err != error.RequiresDWARFUnwind) return err; @@ -636,23 +640,25 @@ pub const StackIterator = struct { else => {}, } - if (try module.getDwarfInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc)) |di| { - return di.unwindFrame(&self.dwarf_context, module.base_address); + if (try module.getDwarfInfoForAddress(unwind_state.debug_info.allocator, unwind_state.dwarf_context.pc)) |di| { + return di.unwindFrame(&unwind_state.dwarf_context, module.base_address); } else return error.MissingDebugInfo; } fn next_internal(self: *StackIterator) ?usize { - if (have_ucontext and self.debug_info != null) { - if (self.dwarf_context.pc == 0) return null; - if (self.next_unwind()) |return_address| { - return return_address; - } else |err| { - self.last_error = err; - self.last_error_address = self.dwarf_context.pc; - - // Fall back to fp unwinding on the first failure, as the register context won't have been updated - self.fp = self.dwarf_context.getFp() catch 0; - self.debug_info = null; + if (have_ucontext) { + if (self.unwind_state) |*unwind_state| { + if (unwind_state.dwarf_context.pc == 0) return null; + if (unwind_state.last_error == null) { + if (self.next_unwind()) |return_address| { + return return_address; + } else |err| { + unwind_state.last_error = err; + + // Fall back to fp-based unwinding on the first failure + self.fp = unwind_state.dwarf_context.getFp() catch 0; + } + } } } @@ -862,16 +868,12 @@ pub fn printUnwindError(debug_info: *DebugInfo, out_stream: anytype, address: us pub fn printSourceAtAddress(debug_info: *DebugInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), - else => { - return err; - }, + else => return err, }; const symbol_info = module.getSymbolAtAddress(debug_info.allocator, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), - else => { - return err; - }, + else => return err, }; defer symbol_info.deinit(debug_info.allocator); diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index e51b883a999d..67ea342cbfeb 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1639,7 +1639,7 @@ pub const DwarfInfo = struct { // In order to support reading .eh_frame from the ELF file (vs using the already-mapped section), // scanAllUnwindInfo has already mapped any pc-relative offsets such that they we be relative to zero // instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly - // as pointers will be decoded relative to the alreayd-mapped .eh_frame. + // as pointers will be decoded relative to the already-mapped .eh_frame. var mapped_pc: usize = undefined; if (di.eh_frame_hdr) |header| { const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; @@ -1766,8 +1766,8 @@ pub const DwarfInfo = struct { mem.writeIntSliceNative(usize, try abi.regBytes(context.thread_context, abi.spRegNum(context.reg_context), context.reg_context), context.cfa.?); // The call instruction will have pushed the address of the instruction that follows the call as the return address - // However, this return address may be past the end of the function if the caller was `noreturn`. - // TODO: Check this on non-x86_64 + // However, this return address may be past the end of the function if the caller was `noreturn`. By subtracting one, + // then `context.pc` will always point to an instruction within the FDE for the previous function. const return_address = context.pc; if (context.pc > 0) context.pc -= 1; diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 34d9d3734fe8..9b13db8535bd 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -26,7 +26,7 @@ pub fn ipRegNum() u8 { pub fn fpRegNum(reg_context: RegisterContext) u8 { return switch (builtin.cpu.arch) { - // GCC on OS X did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO + // GCC on OS X historicaly did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO .x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5, .x86_64 => 6, .arm => 11, @@ -75,6 +75,7 @@ fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type { }); } +/// Returns a pointer to a register stored in a ThreadContext, preserving the pointer attributes of the context. pub fn regValueNative( comptime T: type, thread_context_ptr: anytype, @@ -343,9 +344,11 @@ pub fn regBytes( /// Returns the ABI-defined default value this register has in the unwinding table /// before running any of the CIE instructions. The DWARF spec defines these values -// to be undefined, but allows ABI authors to override that default. +/// to be undefined, but allows ABI authors to override that default. pub fn getRegDefaultValue(reg_number: u8, out: []u8) void { - // TODO: Implement any ABI-specific rules for the default value for registers + + // Implement any ABI-specific rules here + _ = reg_number; @memset(out, undefined); } diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index a395c95a89af..708aa224f604 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -14,7 +14,7 @@ pub const ExpressionContext = struct { /// This expression is from a DWARF64 section is_64: bool = false, - /// If specified, any addresses will pass through this function before being + /// If specified, any addresses will pass through this function before being acccessed isValidMemory: ?*const fn (address: usize) bool = null, /// The compilation unit this expression relates to, if any @@ -1024,9 +1024,6 @@ pub fn Builder(comptime options: ExpressionOptions) type { try writer.writeAll(value_bytes); } - // pub fn writeImplicitPointer(writer: anytype, ) void { - // } - }; } diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 4f5cffc75303..bbcf649f5dd9 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -4695,7 +4695,7 @@ else /// processes. RTPRIO, - /// Maximum CPU time in µs that a process scheduled under a real-time + /// Maximum CPU time in µs that a process scheduled under a real-time /// scheduling policy may consume without making a blocking system /// call before being forcibly descheduled. RTTIME, diff --git a/src/target.zig b/src/target.zig index f07dcc43d21e..a7af9aef2231 100644 --- a/src/target.zig +++ b/src/target.zig @@ -510,7 +510,7 @@ pub fn clangAssemblerSupportsMcpuArg(target: std.Target) bool { } pub fn needUnwindTables(target: std.Target) bool { - return target.os.tag == .windows; + return target.os.tag == .windows or target.ofmt == .macho; } pub fn defaultAddressSpace( From e5aa2bb2246e79f47c39c281d51cc9b5a6d89d04 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 10 Jul 2023 20:18:58 -0400 Subject: [PATCH 60/81] debug: fixup last_error being printed too many times --- lib/std/debug.zig | 25 ++++++++++++++++--------- lib/std/dwarf/expressions.zig | 1 - 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index e2f890b97b62..e5be444c72f2 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -493,6 +493,7 @@ pub const StackIterator = struct { debug_info: *DebugInfo, dwarf_context: DW.UnwindContext, last_error: ?UnwindError = null, + failed: bool = false, } else void = if (have_ucontext) null else {}, pub fn init(first_address: ?usize, fp: ?usize) StackIterator { @@ -530,6 +531,7 @@ pub const StackIterator = struct { if (!have_ucontext) return null; if (self.unwind_state) |*unwind_state| { if (unwind_state.last_error) |err| { + unwind_state.last_error = null; return .{ .err = err, .address = unwind_state.dwarf_context.pc, @@ -648,15 +650,20 @@ pub const StackIterator = struct { fn next_internal(self: *StackIterator) ?usize { if (have_ucontext) { if (self.unwind_state) |*unwind_state| { - if (unwind_state.dwarf_context.pc == 0) return null; - if (unwind_state.last_error == null) { - if (self.next_unwind()) |return_address| { - return return_address; - } else |err| { - unwind_state.last_error = err; - - // Fall back to fp-based unwinding on the first failure - self.fp = unwind_state.dwarf_context.getFp() catch 0; + if (!unwind_state.failed) { + if (unwind_state.dwarf_context.pc == 0) return null; + if (unwind_state.last_error == null) { + if (self.next_unwind()) |return_address| { + return return_address; + } else |err| { + unwind_state.last_error = err; + unwind_state.failed = true; + + // Fall back to fp-based unwinding on the first failure. + // We can't attempt it for other modules later in the + // stack because the full register state won't be unwound. + self.fp = unwind_state.dwarf_context.getFp() catch 0; + } } } } diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig index 708aa224f604..88291eab0b8a 100644 --- a/lib/std/dwarf/expressions.zig +++ b/lib/std/dwarf/expressions.zig @@ -1023,7 +1023,6 @@ pub fn Builder(comptime options: ExpressionOptions) type { try leb.writeULEB128(writer, value_bytes.len); try writer.writeAll(value_bytes); } - }; } From 9b25bee42c9c86d47ff21580a0ee58e8e14d3989 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 11 Jul 2023 01:06:41 -0400 Subject: [PATCH 61/81] debug: fixup have_getcontext --- lib/std/debug.zig | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index e5be444c72f2..421c6f863e5c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -171,8 +171,10 @@ pub fn relocateContext(context: *ThreadContext) void { pub const have_getcontext = @hasDecl(os.system, "getcontext") and (builtin.os.tag != .linux or switch (builtin.cpu.arch) { - .x86, .x86_64 => true, - else => false, + .x86, + .x86_64, + => true, + else => builtin.link_libc and !builtin.target.isMusl(), }); /// Capture the current context. The register values in the context will reflect the @@ -652,18 +654,16 @@ pub const StackIterator = struct { if (self.unwind_state) |*unwind_state| { if (!unwind_state.failed) { if (unwind_state.dwarf_context.pc == 0) return null; - if (unwind_state.last_error == null) { - if (self.next_unwind()) |return_address| { - return return_address; - } else |err| { - unwind_state.last_error = err; - unwind_state.failed = true; - - // Fall back to fp-based unwinding on the first failure. - // We can't attempt it for other modules later in the - // stack because the full register state won't be unwound. - self.fp = unwind_state.dwarf_context.getFp() catch 0; - } + if (self.next_unwind()) |return_address| { + return return_address; + } else |err| { + unwind_state.last_error = err; + unwind_state.failed = true; + + // Fall back to fp-based unwinding on the first failure. + // We can't attempt it for other modules later in the + // stack because the full register state won't be unwound. + self.fp = unwind_state.dwarf_context.getFp() catch 0; } } } From b18031335a13d8f356c1764ebae83b5f93be8fa3 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 12 Jul 2023 02:02:21 -0400 Subject: [PATCH 62/81] dwarf: use cie.return_address_register instead of assuming it's in the IP register --- lib/std/debug.zig | 2 +- lib/std/dwarf.zig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 421c6f863e5c..6083939bde16 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -868,7 +868,7 @@ fn printUnknownSource(debug_info: *DebugInfo, out_stream: anytype, address: usiz pub fn printUnwindError(debug_info: *DebugInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); - try out_stream.print("Unwind information for `{s}` was not available ({}), trace may be incomplete\n\n", .{ module_name, err }); + try out_stream.print("Unwind information for `{s}:{}` was not available ({}), trace may be incomplete\n\n", .{ module_name, address, err }); try tty_config.setColor(out_stream, .reset); } diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 67ea342cbfeb..393973741374 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1758,7 +1758,7 @@ pub const DwarfInfo = struct { } if (has_next_ip) { - context.pc = mem.readIntSliceNative(usize, try abi.regBytes(context.thread_context, comptime abi.ipRegNum(), context.reg_context)); + context.pc = mem.readIntSliceNative(usize, try abi.regBytes(context.thread_context, cie.return_address_register, context.reg_context)); } else { context.pc = 0; } From 06bf2e048b24dcba28bcb7b00236b3cdb2504cc4 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 12 Jul 2023 18:41:53 -0400 Subject: [PATCH 63/81] tests: use a more portable way of determining the return address in the test code --- test/standalone/dwarf_unwinding/shared_lib.c | 6 ++---- .../dwarf_unwinding/shared_lib_unwind.zig | 17 +++++++++-------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/test/standalone/dwarf_unwinding/shared_lib.c b/test/standalone/dwarf_unwinding/shared_lib.c index 2329fedda9b6..c3170f2dc0f5 100644 --- a/test/standalone/dwarf_unwinding/shared_lib.c +++ b/test/standalone/dwarf_unwinding/shared_lib.c @@ -8,17 +8,15 @@ __attribute__((noinline)) void frame1( void** expected, void** unwound, void (*frame2)(void** expected, void** unwound)) { - expected[2] = &&frame_2_ret; + expected[3] = __builtin_extract_return_addr(__builtin_return_address(0)); frame2(expected, unwound); - frame_2_ret: } LIB_API void frame0( void** expected, void** unwound, void (*frame2)(void** expected, void** unwound)) { - expected[3] = &&frame_1_ret; + expected[4] = __builtin_extract_return_addr(__builtin_return_address(0)); frame1(expected, unwound, frame2); - frame_1_ret: } diff --git a/test/standalone/dwarf_unwinding/shared_lib_unwind.zig b/test/standalone/dwarf_unwinding/shared_lib_unwind.zig index 543654d24f82..22f008174429 100644 --- a/test/standalone/dwarf_unwinding/shared_lib_unwind.zig +++ b/test/standalone/dwarf_unwinding/shared_lib_unwind.zig @@ -2,7 +2,7 @@ const std = @import("std"); const debug = std.debug; const testing = std.testing; -noinline fn frame4(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame4(expected: *[5]usize, unwound: *[5]usize) void { expected[0] = @returnAddress(); var context: debug.ThreadContext = undefined; @@ -17,26 +17,27 @@ noinline fn frame4(expected: *[4]usize, unwound: *[4]usize) void { } } -noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame3(expected: *[5]usize, unwound: *[5]usize) void { expected[1] = @returnAddress(); frame4(expected, unwound); } -fn frame2(expected: *[4]usize, unwound: *[4]usize) callconv(.C) void { +fn frame2(expected: *[5]usize, unwound: *[5]usize) callconv(.C) void { + expected[2] = @returnAddress(); frame3(expected, unwound); } extern fn frame0( - expected: *[4]usize, - unwound: *[4]usize, - frame_2: *const fn (expected: *[4]usize, unwound: *[4]usize) callconv(.C) void, + expected: *[5]usize, + unwound: *[5]usize, + frame_2: *const fn (expected: *[5]usize, unwound: *[5]usize) callconv(.C) void, ) void; pub fn main() !void { if (!std.debug.have_ucontext or !std.debug.have_getcontext) return; - var expected: [4]usize = undefined; - var unwound: [4]usize = undefined; + var expected: [5]usize = undefined; + var unwound: [5]usize = undefined; frame0(&expected, &unwound, &frame2); try testing.expectEqual(expected, unwound); } From 9549b4acf67ec7dfcebf5373c11cdc7af3d41aae Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 12 Jul 2023 21:45:26 -0400 Subject: [PATCH 64/81] debug: fixup an inconsistency in the getcontext implementation on aarch64-macos --- lib/std/debug.zig | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 6083939bde16..13dd03d16dcf 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -190,7 +190,15 @@ pub inline fn getContext(context: *ThreadContext) bool { } const result = have_getcontext and os.system.getcontext(context) == 0; - if (native_os == .macos) assert(context.mcsize == @sizeOf(std.c.mcontext_t)); + if (native_os == .macos) { + assert(context.mcsize == @sizeOf(std.c.mcontext_t)); + + // On aarch64-macos, the system getcontext doesn't write anything into the pc + // register slot, it only writes lr. This makes the context consistent with + // other aarch64 getcontext implementations which write the current lr + // (where getcontext will return to) into both the lr and pc slot of the context. + if (native_arch == .aarch64) context.mcontext.ss.pc = context.mcontext.ss.lr; + } return result; } From 7d8b4234774200ff071103399613ed444280a8d0 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 13 Jul 2023 00:02:35 -0400 Subject: [PATCH 65/81] macho: remove unnecessary checks --- lib/std/macho.zig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index b5d58704eba5..0433c8d692e7 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -2171,7 +2171,7 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul } // Last index is a sentinel containing the highest address as its functionOffset - if (len == 0 or indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; break :blk &indices[left]; }; @@ -2216,7 +2216,6 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul } } - if (len == 0) return error.InvalidUnwindInfo; break :blk .{ .function_offset = entries[left].functionOffset, .raw_encoding = entries[left].encoding, @@ -2248,7 +2247,6 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul } } - if (len == 0) return error.InvalidUnwindInfo; const entry = entries[left]; const function_offset = second_level_index.functionOffset + entry.funcOffset; if (entry.encodingIndex < header.commonEncodingsArrayCount) { From ec96095efd8671ae280df15eaf73f63bf029fbfa Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 13 Jul 2023 01:14:31 -0400 Subject: [PATCH 66/81] compilation: pass omit_frame_pointer through to builtin.zig Renamed dwarf_unwinding -> stack_iterator to better reflect that it's not just DWARF unwinding. Added a test for unwinding with a frame pointer. --- src/Compilation.zig | 2 + src/target.zig | 2 +- test/standalone.zig | 4 +- .../build.zig | 22 ++++++-- .../shared_lib.c | 0 .../shared_lib_unwind.zig | 0 .../zig_unwind.zig | 52 +++++++++++++------ 7 files changed, 60 insertions(+), 22 deletions(-) rename test/standalone/{dwarf_unwinding => stack_iterator}/build.zig (70%) rename test/standalone/{dwarf_unwinding => stack_iterator}/shared_lib.c (100%) rename test/standalone/{dwarf_unwinding => stack_iterator}/shared_lib_unwind.zig (100%) rename test/standalone/{dwarf_unwinding => stack_iterator}/zig_unwind.zig (55%) diff --git a/src/Compilation.zig b/src/Compilation.zig index 383b60a66dc6..f6abfae00f75 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -5288,6 +5288,7 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: Allocator) Alloca \\pub const position_independent_executable = {}; \\pub const strip_debug_info = {}; \\pub const code_model = std.builtin.CodeModel.{}; + \\pub const omit_frame_pointer = {}; \\ , .{ std.zig.fmtId(@tagName(target.ofmt)), @@ -5301,6 +5302,7 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: Allocator) Alloca comp.bin_file.options.pie, comp.bin_file.options.strip, std.zig.fmtId(@tagName(comp.bin_file.options.machine_code_model)), + comp.bin_file.options.omit_frame_pointer, }); if (target.os.tag == .wasi) { diff --git a/src/target.zig b/src/target.zig index a7af9aef2231..030cad6bdc7e 100644 --- a/src/target.zig +++ b/src/target.zig @@ -510,7 +510,7 @@ pub fn clangAssemblerSupportsMcpuArg(target: std.Target) bool { } pub fn needUnwindTables(target: std.Target) bool { - return target.os.tag == .windows or target.ofmt == .macho; + return target.os.tag == .windows or target.isDarwin(); } pub fn defaultAddressSpace( diff --git a/test/standalone.zig b/test/standalone.zig index c9277e26a872..f52705f05fe4 100644 --- a/test/standalone.zig +++ b/test/standalone.zig @@ -231,8 +231,8 @@ pub const build_cases = [_]BuildCase{ .import = @import("standalone/zerolength_check/build.zig"), }, .{ - .build_root = "test/standalone/dwarf_unwinding", - .import = @import("standalone/dwarf_unwinding/build.zig"), + .build_root = "test/standalone/stack_iterator", + .import = @import("standalone/stack_iterator/build.zig"), }, }; diff --git a/test/standalone/dwarf_unwinding/build.zig b/test/standalone/stack_iterator/build.zig similarity index 70% rename from test/standalone/dwarf_unwinding/build.zig rename to test/standalone/stack_iterator/build.zig index 885207f068a1..6a999aecffc9 100644 --- a/test/standalone/dwarf_unwinding/build.zig +++ b/test/standalone/stack_iterator/build.zig @@ -7,10 +7,26 @@ pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); - // Test unwinding pure zig code (no libc) + // Unwinding pure zig code, with a frame pointer { const exe = b.addExecutable(.{ - .name = "zig_unwind", + .name = "zig_unwind_fp", + .root_source_file = .{ .path = "zig_unwind.zig" }, + .target = target, + .optimize = optimize, + }); + + if (target.isDarwin()) exe.unwind_tables = true; + exe.omit_frame_pointer = false; + + const run_cmd = b.addRunArtifact(exe); + test_step.dependOn(&run_cmd.step); + } + + // Unwinding pure zig code, without a frame pointer + { + const exe = b.addExecutable(.{ + .name = "zig_unwind_nofp", .root_source_file = .{ .path = "zig_unwind.zig" }, .target = target, .optimize = optimize, @@ -23,7 +39,7 @@ pub fn build(b: *std.Build) void { test_step.dependOn(&run_cmd.step); } - // Test unwinding through a C shared library + // Unwinding through a C shared library without a frame pointer (libc) { const c_shared_lib = b.addSharedLibrary(.{ .name = "c_shared_lib", diff --git a/test/standalone/dwarf_unwinding/shared_lib.c b/test/standalone/stack_iterator/shared_lib.c similarity index 100% rename from test/standalone/dwarf_unwinding/shared_lib.c rename to test/standalone/stack_iterator/shared_lib.c diff --git a/test/standalone/dwarf_unwinding/shared_lib_unwind.zig b/test/standalone/stack_iterator/shared_lib_unwind.zig similarity index 100% rename from test/standalone/dwarf_unwinding/shared_lib_unwind.zig rename to test/standalone/stack_iterator/shared_lib_unwind.zig diff --git a/test/standalone/dwarf_unwinding/zig_unwind.zig b/test/standalone/stack_iterator/zig_unwind.zig similarity index 55% rename from test/standalone/dwarf_unwinding/zig_unwind.zig rename to test/standalone/stack_iterator/zig_unwind.zig index 2d7e098eff47..5421ac052fd5 100644 --- a/test/standalone/dwarf_unwinding/zig_unwind.zig +++ b/test/standalone/stack_iterator/zig_unwind.zig @@ -23,24 +23,44 @@ noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { if (builtin.target.ofmt != .c) { switch (builtin.cpu.arch) { .x86 => { - asm volatile ( - \\movl $3, %%ebx - \\movl $1, %%ecx - \\movl $2, %%edx - \\movl $7, %%edi - \\movl $6, %%esi - \\movl $5, %%ebp - ::: "ebx", "ecx", "edx", "edi", "esi", "ebp"); + if (builtin.omit_frame_pointer) { + asm volatile ( + \\movl $3, %%ebx + \\movl $1, %%ecx + \\movl $2, %%edx + \\movl $7, %%edi + \\movl $6, %%esi + \\movl $5, %%ebp + ::: "ebx", "ecx", "edx", "edi", "esi", "ebp"); + } else { + asm volatile ( + \\movl $3, %%ebx + \\movl $1, %%ecx + \\movl $2, %%edx + \\movl $7, %%edi + \\movl $6, %%esi + ::: "ebx", "ecx", "edx", "edi", "esi"); + } }, .x86_64 => { - asm volatile ( - \\movq $3, %%rbx - \\movq $12, %%r12 - \\movq $13, %%r13 - \\movq $14, %%r14 - \\movq $15, %%r15 - \\movq $6, %%rbp - ::: "rbx", "r12", "r13", "r14", "r15", "rbp"); + if (builtin.omit_frame_pointer) { + asm volatile ( + \\movq $3, %%rbx + \\movq $12, %%r12 + \\movq $13, %%r13 + \\movq $14, %%r14 + \\movq $15, %%r15 + \\movq $6, %%rbp + ::: "rbx", "r12", "r13", "r14", "r15", "rbp"); + } else { + asm volatile ( + \\movq $3, %%rbx + \\movq $12, %%r12 + \\movq $13, %%r13 + \\movq $14, %%r14 + \\movq $15, %%r15 + ::: "rbx", "r12", "r13", "r14", "r15"); + } }, else => {}, } From ba813d00f57542483cac108223c7b2c4353c14e0 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Fri, 14 Jul 2023 02:50:43 -0400 Subject: [PATCH 67/81] dwarf: add abi.stripInstructionPtrAuthCode --- lib/std/dwarf.zig | 10 +++++++--- lib/std/dwarf/abi.zig | 28 ++++++++++++++++++++++++++-- lib/std/macho.zig | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 393973741374..f9b36f2aa2ad 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1758,12 +1758,16 @@ pub const DwarfInfo = struct { } if (has_next_ip) { - context.pc = mem.readIntSliceNative(usize, try abi.regBytes(context.thread_context, cie.return_address_register, context.reg_context)); + context.pc = abi.stripInstructionPtrAuthCode(mem.readIntSliceNative(usize, try abi.regBytes( + context.thread_context, + cie.return_address_register, + context.reg_context, + ))); } else { context.pc = 0; } - mem.writeIntSliceNative(usize, try abi.regBytes(context.thread_context, abi.spRegNum(context.reg_context), context.reg_context), context.cfa.?); + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; // The call instruction will have pushed the address of the instruction that follows the call as the return address // However, this return address may be past the end of the function if the caller was `noreturn`. By subtracting one, @@ -1786,7 +1790,7 @@ pub const UnwindContext = struct { stack_machine: expressions.StackMachine(.{ .call_frame_context = true }) = .{}, pub fn init(allocator: mem.Allocator, thread_context: *const debug.ThreadContext, isValidMemory: *const fn (address: usize) bool) !UnwindContext { - const pc = mem.readIntSliceNative(usize, try abi.regBytes(thread_context, abi.ipRegNum(), null)); + const pc = abi.stripInstructionPtrAuthCode((try abi.regValueNative(usize, thread_context, abi.ipRegNum(), null)).*); const context_copy = try allocator.create(debug.ThreadContext); debug.copyContext(thread_context, context_copy); diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index 9b13db8535bd..d56ae2733d06 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -45,6 +45,27 @@ pub fn spRegNum(reg_context: RegisterContext) u8 { }; } +/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. +/// This function clears these signature bits to make the pointer usable. +pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (builtin.cpu.arch == .aarch64) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : "x16" + ); + } + + return ptr; +} + pub const RegisterContext = struct { eh_frame: bool, is_macho: bool, @@ -160,7 +181,6 @@ pub fn regBytes( if (!std.debug.have_ucontext) return error.ThreadContextNotSupported; const ucontext_ptr = thread_context_ptr; - var m = &ucontext_ptr.mcontext; return switch (builtin.cpu.arch) { .x86 => switch (builtin.os.tag) { .linux, .netbsd, .solaris => switch (reg_number) { @@ -216,7 +236,7 @@ pub fn regBytes( 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R14]), 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.R15]), 16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[os.REG.RIP]), - 17...32 => |i| mem.asBytes(&m.fpregs.xmm[i - 17]), + 17...32 => |i| mem.asBytes(&ucontext_ptr.mcontext.fpregs.xmm[i - 17]), else => error.InvalidRegister, }, .freebsd => switch (reg_number) { @@ -313,6 +333,10 @@ pub fn regBytes( 30 => mem.asBytes(&ucontext_ptr.mcontext.ss.lr), 31 => mem.asBytes(&ucontext_ptr.mcontext.ss.sp), 32 => mem.asBytes(&ucontext_ptr.mcontext.ss.pc), + + // TODO: Find storage for this state + //34 => mem.asBytes(&ucontext_ptr.ra_sign_state), + // V0-V31 64...95 => mem.asBytes(&ucontext_ptr.mcontext.ns.q[reg_number - 64]), else => error.InvalidRegister, diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 0433c8d692e7..dbf49b5f3d92 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -2459,7 +2459,7 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul else => return error.UnimplementedArch, }; - context.pc = new_ip; + context.pc = dwarf.abi.stripInstructionPtrAuthCode(new_ip); if (context.pc > 0) context.pc -= 1; return new_ip; } From 5e399d97d7b8ed3fc4c5b3664acfa7c79e72136c Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sat, 15 Jul 2023 01:17:44 -0400 Subject: [PATCH 68/81] use eh_frame from the mapped binary if available --- lib/std/debug.zig | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 13dd03d16dcf..10480713135d 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1258,12 +1258,20 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { .SEGMENT_64 => { for (cmd.getSections()) |sect| { - if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); - break; + if (std.mem.eql(u8, "__TEXT", sect.segName())) { + if (mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); + continue; + } + + if (mem.eql(u8, "__eh_frame", sect.sectName())) { + eh_frame = try chopSlice(mapped_mem, sect.offset, sect.size); + continue; + } } } }, @@ -1377,6 +1385,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .symbols = symbols, .strings = strings, .unwind_info = unwind_info, + .eh_frame = eh_frame, }; } @@ -1919,6 +1928,7 @@ pub const ModuleDebugInfo = switch (native_os) { ofiles: OFileTable, // Backed by mapped_memory unwind_info: ?[]const u8, + eh_frame: ?[]const u8, const OFileTable = std.StringHashMap(OFileInfo); const OFileInfo = struct { @@ -1982,6 +1992,11 @@ pub const ModuleDebugInfo = switch (native_os) { } var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; + if (self.eh_frame) |eh_frame| sections[@intFromEnum(DW.DwarfSection.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + for (segcmd.?.getSections()) |sect| { if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; @@ -1989,7 +2004,7 @@ pub const ModuleDebugInfo = switch (native_os) { inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; } - if (section_index == null) continue; + if (section_index == null or sections[section_index.?] != null) continue; const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); sections[section_index.?] = .{ @@ -2011,6 +2026,8 @@ pub const ModuleDebugInfo = switch (native_os) { .is_macho = true, }; + // TODO: Don't actually need to scan unwind info in this case, since __unwind_info points us to the entries + try DW.openDwarfDebugInfo(&di, allocator, mapped_mem); var info = OFileInfo{ .di = di, From 618b0eb3d3ac5ecb84acb4296c591f53ba9c4298 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 16 Jul 2023 02:00:17 -0400 Subject: [PATCH 69/81] dwarf: fixup integer overflow in readEhPointer debug: handle the possibility of eh_frame / debug_frame being mapped in memory or loaded from disk --- lib/std/coff.zig | 8 +++---- lib/std/debug.zig | 37 ++++++++++++++++++------------ lib/std/dwarf.zig | 58 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 66 insertions(+), 37 deletions(-) diff --git a/lib/std/coff.zig b/lib/std/coff.zig index 706f888fbc60..55fe5aa2546f 100644 --- a/lib/std/coff.zig +++ b/lib/std/coff.zig @@ -1253,14 +1253,12 @@ pub const Coff = struct { return null; } - pub fn getSectionData(self: *const Coff, comptime name: []const u8) ![]const u8 { - const sec = self.getSectionByName(name) orelse return error.MissingCoffSection; + pub fn getSectionData(self: *const Coff, sec: *align(1) const SectionHeader) []const u8 { return self.data[sec.pointer_to_raw_data..][0..sec.virtual_size]; } - // Return an owned slice full of the section data - pub fn getSectionDataAlloc(self: *const Coff, comptime name: []const u8, allocator: mem.Allocator) ![]u8 { - const section_data = try self.getSectionData(name); + pub fn getSectionDataAlloc(self: *const Coff, sec: *align(1) const SectionHeader, allocator: mem.Allocator) ![]u8 { + const section_data = self.getSectionData(sec); return allocator.dupe(u8, section_data); } }; diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 10480713135d..131b13943cbd 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -987,23 +987,19 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_obj: *coff.Coff) !ModuleDebu .debug_data = undefined, }; - if (coff_obj.getSectionByName(".debug_info")) |sec| { + if (coff_obj.getSectionByName(".debug_info")) |_| { // This coff file has embedded DWARF debug info - _ = sec; - var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { - sections[i] = if (coff_obj.getSectionDataAlloc("." ++ section.name, allocator)) |data| blk: { + sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { break :blk .{ - .data = data, + .data = try coff_obj.getSectionDataAlloc(section_header, allocator), + .virtual_address = section_header.virtual_address, .owned = true, }; - } else |err| blk: { - if (err == error.MissingCoffSection) break :blk null; - return err; - }; + } else null; } var dwarf = DW.DwarfInfo{ @@ -1012,7 +1008,7 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_obj: *coff.Coff) !ModuleDebu .is_macho = false, }; - try DW.openDwarfDebugInfo(&dwarf, allocator, coff_obj.data); + try DW.openDwarfDebugInfo(&dwarf, allocator); di.debug_data = PdbOrDwarf{ .dwarf = dwarf }; return di; } @@ -1049,6 +1045,10 @@ fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 return ptr[start..end]; } +/// Reads debug info from an ELF file, or the current binary if none in specified. +/// If the required sections aren't present but a reference to external debug info is, +/// then this this function will recurse to attempt to load the debug sections from +/// an external file. pub fn readElfDebugInfo( allocator: mem.Allocator, elf_filename: ?[]const u8, @@ -1146,10 +1146,12 @@ pub fn readElfDebugInfo( break :blk .{ .data = decompressed_section, + .virtual_address = shdr.sh_addr, .owned = true, }; } else .{ .data = section_bytes, + .virtual_address = shdr.sh_addr, .owned = false, }; } @@ -1232,7 +1234,7 @@ pub fn readElfDebugInfo( .is_macho = false, }; - try DW.openDwarfDebugInfo(&di, allocator, parent_mapped_mem orelse mapped_mem); + try DW.openDwarfDebugInfo(&di, allocator); return ModuleDebugInfo{ .base_address = undefined, @@ -1900,6 +1902,10 @@ pub const DebugInfo = struct { obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); obj_di.base_address = ctx.base_address; + // TODO: Don't actually scan everything, search on demand + // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding + obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; + try self.address_map.putNoClobber(ctx.base_address, obj_di); return obj_di; @@ -2004,11 +2010,12 @@ pub const ModuleDebugInfo = switch (native_os) { inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; } - if (section_index == null or sections[section_index.?] != null) continue; + if (section_index == null) continue; const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); sections[section_index.?] = .{ .data = section_bytes, + .virtual_address = sect.addr, .owned = false, }; } @@ -2026,9 +2033,11 @@ pub const ModuleDebugInfo = switch (native_os) { .is_macho = true, }; - // TODO: Don't actually need to scan unwind info in this case, since __unwind_info points us to the entries + try DW.openDwarfDebugInfo(&di, allocator); + + // TODO: Don't actually scan everything, search on demand + di.scanAllUnwindInfo(allocator, self.base_address) catch {}; - try DW.openDwarfDebugInfo(&di, allocator, mapped_mem); var info = OFileInfo{ .di = di, .addr_table = addr_table, diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index f9b36f2aa2ad..d4a1c61611b2 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -663,7 +663,22 @@ pub const DwarfSection = enum { pub const DwarfInfo = struct { pub const Section = struct { data: []const u8, + // Module-relative virtual address. + // Only set if the section data was loaded from disk. + virtual_address: ?usize = null, + // If `data` is owned by this DwarfInfo. owned: bool, + + // For sections that are not memory mapped by the loader, this is an offset + // from `data.ptr` to where the section would have been mapped. Otherwise, + // `data` is directly backed by the section and the offset is zero. + pub fn virtualOffset(self: Section, base_address: usize) i64 { + return if (self.virtual_address) |va| + @as(i64, @intCast(base_address + va)) - + @as(i64, @intCast(@intFromPtr(self.data.ptr))) + else + 0; + } }; const num_sections = std.enums.directEnumArrayLen(DwarfSection, 0); @@ -690,6 +705,10 @@ pub const DwarfInfo = struct { return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; } + pub fn sectionVirtualOffset(di: DwarfInfo, dwarf_section: DwarfSection, base_address: usize) ?i64 { + return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; + } + pub fn deinit(di: *DwarfInfo, allocator: mem.Allocator) void { for (di.sections) |opt_section| { if (opt_section) |s| if (s.owned) allocator.free(s.data); @@ -1540,7 +1559,12 @@ pub const DwarfInfo = struct { }; } - pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator, binary_mem: []const u8) !void { + /// If .eh_frame_hdr is present, then only the header needs to be parsed. + /// + /// Otherwise, .eh_frame and .debug_frame are scanned and a sorted list + /// of FDEs is built. In this case, the decoded PC ranges in the FDEs + /// are all normalized to be relative to the module's base. + pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { var stream = io.fixedBufferStream(eh_frame_hdr); const reader = stream.reader(); @@ -1582,15 +1606,15 @@ pub const DwarfInfo = struct { const frame_sections = [2]DwarfSection{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { - if (di.section(frame_section)) |eh_frame| { - var stream = io.fixedBufferStream(eh_frame); + if (di.section(frame_section)) |section_data| { + var stream = io.fixedBufferStream(section_data); while (stream.pos < stream.buffer.len) { const entry_header = try EntryHeader.read(&stream, frame_section, di.endian); switch (entry_header.type) { .cie => { const cie = try CommonInformationEntry.parse( entry_header.entry_bytes, - -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), + di.sectionVirtualOffset(frame_section, base_address).?, true, entry_header.is_64, frame_section, @@ -1604,7 +1628,7 @@ pub const DwarfInfo = struct { const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); const fde = try FrameDescriptionEntry.parse( entry_header.entry_bytes, - -@as(i64, @intCast(@intFromPtr(binary_mem.ptr))), + di.sectionVirtualOffset(frame_section, base_address).?, true, cie, @sizeOf(usize), @@ -1637,7 +1661,7 @@ pub const DwarfInfo = struct { var fde: FrameDescriptionEntry = undefined; // In order to support reading .eh_frame from the ELF file (vs using the already-mapped section), - // scanAllUnwindInfo has already mapped any pc-relative offsets such that they we be relative to zero + // scanAllUnwindInfo has already mapped any pc-relative offsets such that they will be relative to zero // instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly // as pointers will be decoded relative to the already-mapped .eh_frame. var mapped_pc: usize = undefined; @@ -1653,7 +1677,8 @@ pub const DwarfInfo = struct { &fde, ); } else { - mapped_pc = context.pc - module_base_address; + //mapped_pc = context.pc - module_base_address; + mapped_pc = context.pc; const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct { pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { if (pc < mid_item.pc_begin) return .lt; @@ -1819,12 +1844,9 @@ pub const UnwindContext = struct { /// Initialize DWARF info. The caller has the responsibility to initialize most /// the DwarfInfo fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator, binary_mem: []const u8) !void { +pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator) !void { try di.scanAllFunctions(allocator); try di.scanAllCompileUnits(allocator); - - // Unwind info is not required - di.scanAllUnwindInfo(allocator, binary_mem) catch {}; } /// This function is to make it handy to comment out the return and make it @@ -1898,9 +1920,10 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo else => null, }; - const ptr = if (base) |b| switch (value) { - .signed => |s| @as(u64, @intCast(s + @as(i64, @intCast(b)))), - .unsigned => |u| u + b, + const ptr: u64 = if (base) |b| switch (value) { + .signed => |s| @intCast(try math.add(i64, s, @as(i64, @intCast(b)))), + // absptr can actually contain signed values in some cases (aarch64 MachO) + .unsigned => |u| u +% b, } else switch (value) { .signed => |s| @as(u64, @intCast(s)), .unsigned => |u| u, @@ -2311,15 +2334,14 @@ pub const FrameDescriptionEntry = struct { instructions: []const u8, /// This function expects to read the FDE starting at the PC Begin field. - /// The returned struct references memory backed by fde_bytes. + /// The returned struct references memory backed by `fde_bytes`. /// /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values /// used when decoding pointers. This should be set to zero if fde_bytes is - /// backed by the memory of the .eh_frame section in the running executable. - /// + /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. /// Otherwise, it should be the relative offset to translate addresses from /// where the section is currently stored in memory, to where it *would* be - /// stored at runtime: section runtime offset - backing section data base ptr. + /// stored at runtime: section base addr - backing data base ptr. /// /// Similarly, `is_runtime` specifies this function is being called on a runtime /// section, and so indirect pointers can be followed. From 2c76020e772787c3bc8bf4bcf56e2e0f41016b26 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 16 Jul 2023 13:16:30 -0400 Subject: [PATCH 70/81] debug: load the macho unwind sections from the already-mapped image --- lib/std/debug.zig | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 131b13943cbd..e85c37cf0a5f 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1259,24 +1259,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .ncmds = hdr.ncmds, .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - for (cmd.getSections()) |sect| { - if (std.mem.eql(u8, "__TEXT", sect.segName())) { - if (mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); - continue; - } - - if (mem.eql(u8, "__eh_frame", sect.sectName())) { - eh_frame = try chopSlice(mapped_mem, sect.offset, sect.size); - continue; - } - } - } - }, .SYMTAB => break cmd.cast(macho.symtab_command).?, else => {}, } else return error.MissingDebugInfo; @@ -1386,8 +1369,6 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .ofiles = ModuleDebugInfo.OFileTable.init(allocator), .symbols = symbols, .strings = strings, - .unwind_info = unwind_info, - .eh_frame = eh_frame, }; } @@ -1602,19 +1583,28 @@ pub const DebugInfo = struct { )[0..header.sizeofcmds]), }; + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; while (it.next()) |cmd| switch (cmd.cmd()) { .SEGMENT_64 => { const segment_cmd = cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - const original_address = address - vmaddr_slide; - const seg_start = segment_cmd.vmaddr; + const seg_start = segment_cmd.vmaddr + vmaddr_slide; const seg_end = seg_start + segment_cmd.vmsize; - if (original_address >= seg_start and original_address < seg_end) { + if (address >= seg_start and address < seg_end) { if (self.address_map.get(base_address)) |obj_di| { return obj_di; } + for (cmd.getSections()) |sect| { + if (mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { + eh_frame = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } + } + const obj_di = try self.allocator.create(ModuleDebugInfo); errdefer self.allocator.destroy(obj_di); @@ -1628,6 +1618,8 @@ pub const DebugInfo = struct { obj_di.* = try readMachODebugInfo(self.allocator, macho_file); obj_di.base_address = base_address; obj_di.vmaddr_slide = vmaddr_slide; + obj_di.unwind_info = unwind_info; + obj_di.eh_frame = eh_frame; try self.address_map.putNoClobber(base_address, obj_di); @@ -1932,9 +1924,10 @@ pub const ModuleDebugInfo = switch (native_os) { symbols: []const MachoSymbol, strings: [:0]const u8, ofiles: OFileTable, - // Backed by mapped_memory - unwind_info: ?[]const u8, - eh_frame: ?[]const u8, + + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8 = null, + eh_frame: ?[]const u8 = null, const OFileTable = std.StringHashMap(OFileInfo); const OFileInfo = struct { From bdb0a6fa77cc7eff7d03b022210328b24f9b6662 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 16 Jul 2023 16:44:00 -0400 Subject: [PATCH 71/81] test: add a test for dwarf embedded in coff --- test/standalone.zig | 4 +++ test/standalone/coff_dwarf/build.zig | 35 +++++++++++++++++++++++++ test/standalone/coff_dwarf/main.zig | 27 +++++++++++++++++++ test/standalone/coff_dwarf/shared_lib.c | 6 +++++ 4 files changed, 72 insertions(+) create mode 100644 test/standalone/coff_dwarf/build.zig create mode 100644 test/standalone/coff_dwarf/main.zig create mode 100644 test/standalone/coff_dwarf/shared_lib.c diff --git a/test/standalone.zig b/test/standalone.zig index f52705f05fe4..b7dfc9dc944b 100644 --- a/test/standalone.zig +++ b/test/standalone.zig @@ -234,6 +234,10 @@ pub const build_cases = [_]BuildCase{ .build_root = "test/standalone/stack_iterator", .import = @import("standalone/stack_iterator/build.zig"), }, + .{ + .build_root = "test/standalone/coff_dwarf", + .import = @import("standalone/coff_dwarf/build.zig"), + }, }; const std = @import("std"); diff --git a/test/standalone/coff_dwarf/build.zig b/test/standalone/coff_dwarf/build.zig new file mode 100644 index 000000000000..ffd7800a5b6c --- /dev/null +++ b/test/standalone/coff_dwarf/build.zig @@ -0,0 +1,35 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// This tests the path where DWARF information is embedded in a COFF binary +pub fn build(b: *std.Build) void { + const test_step = b.step("test", "Test it"); + b.default_step = test_step; + + const optimize: std.builtin.OptimizeMode = .Debug; + const target = b.standardTargetOptions(.{}); + + if (builtin.os.tag != .windows) return; + + const exe = b.addExecutable(.{ + .name = "main", + .root_source_file = .{ .path = "main.zig" }, + .optimize = optimize, + .target = target, + }); + + const lib = b.addSharedLibrary(.{ + .name = "shared_lib", + .optimize = optimize, + .target = target, + }); + lib.addCSourceFile("shared_lib.c", &.{"-gdwarf"}); + lib.linkLibC(); + exe.linkLibrary(lib); + + const run = b.addRunArtifact(exe); + run.expectExitCode(0); + run.skip_foreign_checks = true; + + test_step.dependOn(&run.step); +} diff --git a/test/standalone/coff_dwarf/main.zig b/test/standalone/coff_dwarf/main.zig new file mode 100644 index 000000000000..236aa1c5fa0b --- /dev/null +++ b/test/standalone/coff_dwarf/main.zig @@ -0,0 +1,27 @@ +const std = @import("std"); +const assert = std.debug.assert; +const testing = std.testing; + +extern fn add(a: u32, b: u32, addr: *usize) u32; + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer assert(gpa.deinit() == .ok); + const allocator = gpa.allocator(); + + var debug_info = try std.debug.openSelfDebugInfo(allocator); + defer debug_info.deinit(); + + var add_addr: usize = undefined; + _ = add(1, 2, &add_addr); + + const module = try debug_info.getModuleForAddress(add_addr); + const symbol = try module.getSymbolAtAddress(allocator, add_addr); + defer symbol.deinit(allocator); + + try testing.expectEqualStrings("add", symbol.symbol_name); + try testing.expect(symbol.line_info != null); + try testing.expectEqualStrings("shared_lib.c", std.fs.path.basename(symbol.line_info.?.file_name)); + try testing.expectEqual(@as(u64, 3), symbol.line_info.?.line); + try testing.expectEqual(@as(u64, 0), symbol.line_info.?.column); +} diff --git a/test/standalone/coff_dwarf/shared_lib.c b/test/standalone/coff_dwarf/shared_lib.c new file mode 100644 index 000000000000..0455a6a0ad3a --- /dev/null +++ b/test/standalone/coff_dwarf/shared_lib.c @@ -0,0 +1,6 @@ +#include + +__declspec(dllexport) uint32_t add(uint32_t a, uint32_t b, uintptr_t* addr) { + *addr = (uintptr_t)&add; + return a + b; +} From 774dc2fdb75dc7f6b7e0bd2cfe947fffb9829ba1 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Sun, 16 Jul 2023 22:07:20 -0400 Subject: [PATCH 72/81] dwarf: add explicit_fde_offset to support more optimal __unwind_info dwarf lookups --- lib/std/debug.zig | 3 +- lib/std/dwarf.zig | 66 ++++++++++++++++++------ test/standalone/stack_iterator/build.zig | 27 +++++++++- 3 files changed, 77 insertions(+), 19 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index e85c37cf0a5f..410f5db38019 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -653,7 +653,7 @@ pub const StackIterator = struct { } if (try module.getDwarfInfoForAddress(unwind_state.debug_info.allocator, unwind_state.dwarf_context.pc)) |di| { - return di.unwindFrame(&unwind_state.dwarf_context, module.base_address); + return di.unwindFrame(&unwind_state.dwarf_context, null); } else return error.MissingDebugInfo; } @@ -1894,7 +1894,6 @@ pub const DebugInfo = struct { obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); obj_di.base_address = ctx.base_address; - // TODO: Don't actually scan everything, search on demand // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index d4a1c61611b2..d4b7d6ba330c 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1562,8 +1562,7 @@ pub const DwarfInfo = struct { /// If .eh_frame_hdr is present, then only the header needs to be parsed. /// /// Otherwise, .eh_frame and .debug_frame are scanned and a sorted list - /// of FDEs is built. In this case, the decoded PC ranges in the FDEs - /// are all normalized to be relative to the module's base. + /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { var stream = io.fixedBufferStream(eh_frame_hdr); @@ -1650,7 +1649,14 @@ pub const DwarfInfo = struct { } } - pub fn unwindFrame(di: *const DwarfInfo, context: *UnwindContext, module_base_address: usize) !usize { + /// Unwind a stack frame using DWARF unwinding info, updating the register context. + /// + /// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE. + /// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. + /// + /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info + /// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. + pub fn unwindFrame(di: *const DwarfInfo, context: *UnwindContext, explicit_fde_offset: ?usize) !usize { if (!comptime abi.isSupportedArch(builtin.target.cpu.arch)) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return 0; @@ -1660,26 +1666,54 @@ pub const DwarfInfo = struct { var cie: CommonInformationEntry = undefined; var fde: FrameDescriptionEntry = undefined; - // In order to support reading .eh_frame from the ELF file (vs using the already-mapped section), - // scanAllUnwindInfo has already mapped any pc-relative offsets such that they will be relative to zero - // instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly - // as pointers will be decoded relative to the already-mapped .eh_frame. - var mapped_pc: usize = undefined; - if (di.eh_frame_hdr) |header| { + if (explicit_fde_offset) |fde_offset| { + const dwarf_section: DwarfSection = .eh_frame; + const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; + if (fde_offset >= frame_section.len) return error.MissingFDE; + + var stream = io.fixedBufferStream(frame_section); + const fde_entry_header = try EntryHeader.read(&stream, dwarf_section, di.endian); + if (fde_entry_header.type != .fde) return error.MissingFDE; + + const cie_offset = fde_entry_header.type.fde; + try stream.seekTo(cie_offset); + + const cie_entry_header = try EntryHeader.read(&stream, dwarf_section, builtin.cpu.arch.endian()); + if (cie_entry_header.type != .cie) return badDwarf(); + + cie = try CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.is_64, + dwarf_section, + cie_entry_header.length_offset, + @sizeOf(usize), + builtin.cpu.arch.endian(), + ); + + fde = try FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie, + @sizeOf(usize), + builtin.cpu.arch.endian(), + ); + } else if (di.eh_frame_hdr) |header| { + std.debug.print("EH_FRAME_HDR\n", .{}); + const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; - mapped_pc = context.pc; try header.findEntry( context.isValidMemory, eh_frame_len, @intFromPtr(di.section(.eh_frame_hdr).?.ptr), - mapped_pc, + context.pc, &cie, &fde, ); } else { - //mapped_pc = context.pc - module_base_address; - mapped_pc = context.pc; - const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct { + const index = std.sort.binarySearch(FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct { pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { if (pc < mid_item.pc_begin) return .lt; @@ -1707,7 +1741,7 @@ pub const DwarfInfo = struct { context.reg_context.eh_frame = cie.version != 4; context.reg_context.is_macho = di.is_macho; - _ = try context.vm.runToNative(context.allocator, mapped_pc, cie, fde); + _ = try context.vm.runToNative(context.allocator, context.pc, cie, fde); const row = &context.vm.current_row; context.cfa = switch (row.cfa.rule) { @@ -2056,7 +2090,7 @@ pub const ExceptionFrameHeader = struct { if (!self.isValidPtr(@intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf(); if (fde_entry_header.type != .fde) return badDwarf(); - // CIEs always come before FDEs (the offset is a subtration), so we can assume this memory is readable + // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable const cie_offset = fde_entry_header.type.fde; try eh_frame_stream.seekTo(cie_offset); const cie_entry_header = try EntryHeader.read(&eh_frame_stream, .eh_frame, builtin.cpu.arch.endian()); diff --git a/test/standalone/stack_iterator/build.zig b/test/standalone/stack_iterator/build.zig index 6a999aecffc9..f264777ae01f 100644 --- a/test/standalone/stack_iterator/build.zig +++ b/test/standalone/stack_iterator/build.zig @@ -8,6 +8,14 @@ pub fn build(b: *std.Build) void { const optimize = b.standardOptimizeOption(.{}); // Unwinding pure zig code, with a frame pointer + // + // getcontext version: zig std + // + // Unwind info type: + // - ELF: DWARF .debug_frame + // - MachO: __unwind_info encodings: + // - x86_64: RBP_FRAME + // - aarch64: FRAME, DWARF { const exe = b.addExecutable(.{ .name = "zig_unwind_fp", @@ -23,7 +31,15 @@ pub fn build(b: *std.Build) void { test_step.dependOn(&run_cmd.step); } - // Unwinding pure zig code, without a frame pointer + // Unwinding pure zig code, without a frame pointer. + // + // getcontext version: zig std + // + // Unwind info type: + // - ELF: DWARF .eh_frame_hdr + .eh_frame + // - MachO: __unwind_info encodings: + // - x86_64: STACK_IMMD, STACK_IND + // - aarch64: FRAMELESS, DWARF { const exe = b.addExecutable(.{ .name = "zig_unwind_nofp", @@ -34,12 +50,21 @@ pub fn build(b: *std.Build) void { if (target.isDarwin()) exe.unwind_tables = true; exe.omit_frame_pointer = true; + exe.unwind_tables = true; const run_cmd = b.addRunArtifact(exe); test_step.dependOn(&run_cmd.step); } // Unwinding through a C shared library without a frame pointer (libc) + // + // getcontext version: libc + // + // Unwind info type: + // - ELF: DWARF .eh_frame + .debug_frame + // - MachO: __unwind_info encodings: + // - x86_64: STACK_IMMD, STACK_IND + // - aarch64: FRAMELESS, DWARF { const c_shared_lib = b.addSharedLibrary(.{ .name = "c_shared_lib", From 97bda56306622784b2f2e4f036d817bbe5f2025e Mon Sep 17 00:00:00 2001 From: kcbanner Date: Mon, 17 Jul 2023 10:22:01 -0400 Subject: [PATCH 73/81] macho: don't scan all eh_frame entries, instead follow the offset from the __unwind_info directly --- lib/std/debug.zig | 6 +----- lib/std/dwarf.zig | 9 ++++----- lib/std/macho.zig | 29 ++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 410f5db38019..d2fc5b1338a7 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -642,7 +642,7 @@ pub const StackIterator = struct { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. if (module.unwind_info) |unwind_info| { - if (macho.unwindFrame(&unwind_state.dwarf_context, unwind_info, module.base_address)) |return_address| { + if (macho.unwindFrame(&unwind_state.dwarf_context, unwind_info, module.eh_frame, module.base_address)) |return_address| { return return_address; } else |err| { if (err != error.RequiresDWARFUnwind) return err; @@ -2026,10 +2026,6 @@ pub const ModuleDebugInfo = switch (native_os) { }; try DW.openDwarfDebugInfo(&di, allocator); - - // TODO: Don't actually scan everything, search on demand - di.scanAllUnwindInfo(allocator, self.base_address) catch {}; - var info = OFileInfo{ .di = di, .addr_table = addr_table, diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index d4b7d6ba330c..1721cc2d8852 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -686,7 +686,8 @@ pub const DwarfInfo = struct { pub const null_section_array = [_]?Section{null} ** num_sections; endian: std.builtin.Endian, - sections: SectionArray, + sections: SectionArray = null_section_array, + is_macho: bool, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(AbbrevTableHeader) = .{}, @@ -699,8 +700,6 @@ pub const DwarfInfo = struct { // Sorted by start_pc fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{}, - is_macho: bool, - pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 { return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; } @@ -1672,6 +1671,8 @@ pub const DwarfInfo = struct { if (fde_offset >= frame_section.len) return error.MissingFDE; var stream = io.fixedBufferStream(frame_section); + try stream.seekTo(fde_offset); + const fde_entry_header = try EntryHeader.read(&stream, dwarf_section, di.endian); if (fde_entry_header.type != .fde) return error.MissingFDE; @@ -1701,8 +1702,6 @@ pub const DwarfInfo = struct { builtin.cpu.arch.endian(), ); } else if (di.eh_frame_hdr) |header| { - std.debug.print("EH_FRAME_HDR\n", .{}); - const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; try header.findEntry( context.isValidMemory, diff --git a/lib/std/macho.zig b/lib/std/macho.zig index dbf49b5f3d92..3c18e1f23c33 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -2142,7 +2142,7 @@ fn dwarfRegNumber(unwind_reg_number: u3) !u8 { const dwarf = std.dwarf; const abi = dwarf.abi; -pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, module_base_address: usize) !usize { +pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, eh_frame: ?[]const u8, module_base_address: usize) !usize { const header = mem.bytesAsValue( unwind_info_section_header, unwind_info[0..@sizeOf(unwind_info_section_header)], @@ -2396,7 +2396,9 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul break :blk new_ip; }, - .DWARF => return error.RequiresDWARFUnwind, + .DWARF => { + return unwindFrameDwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); + }, }, .aarch64 => switch (encoding.mode.arm64) { .OLD => return error.UnimplementedUnwindEncoding, @@ -2408,8 +2410,10 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; break :blk new_ip; }, - .DWARF => return error.RequiresDWARFUnwind, - .FRAME => { + .DWARF => { + return unwindFrameDwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); + }, + .FRAME => blk: { const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; const new_sp = fp + 16; const ip_ptr = fp + @sizeOf(usize); @@ -2453,7 +2457,7 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - return error.UnimplementedUnwindEncoding; + break :blk new_ip; }, }, else => return error.UnimplementedArch, @@ -2463,3 +2467,18 @@ pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, modul if (context.pc > 0) context.pc -= 1; return new_ip; } + +fn unwindFrameDwarf(context: *dwarf.UnwindContext, eh_frame: []const u8, fde_offset: usize) !usize { + var di = dwarf.DwarfInfo{ + .endian = builtin.cpu.arch.endian(), + .is_macho = true, + }; + defer di.deinit(context.allocator); + + di.sections[@intFromEnum(dwarf.DwarfSection.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + return di.unwindFrame(context, fde_offset); +} From 1a2bb70956df44e9c1dd2f06e1e98b4b7ff265d6 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 18 Jul 2023 00:35:04 -0400 Subject: [PATCH 74/81] dwarf: write the CFA as SP before the register update, in case the SP itself is updated by a column rule --- lib/std/dwarf.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 1721cc2d8852..6681c5501cce 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1810,6 +1810,7 @@ pub const DwarfInfo = struct { } } + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; while (update_tail) |tail| { @memcpy(tail.old_value, tail.new_value); update_tail = tail.prev; From d99b40d38b48f046b8d7faa7698315d6c93bf685 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 18 Jul 2023 01:21:45 -0400 Subject: [PATCH 75/81] dwarf: fix the unwinder using the incorrect row from the FDE in certain cases --- lib/std/dwarf.zig | 18 +++++++++++++++--- lib/std/dwarf/call_frame.zig | 12 ++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 6681c5501cce..6dc317779ef4 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1740,8 +1740,20 @@ pub const DwarfInfo = struct { context.reg_context.eh_frame = cie.version != 4; context.reg_context.is_macho = di.is_macho; - _ = try context.vm.runToNative(context.allocator, context.pc, cie, fde); - const row = &context.vm.current_row; + if (comptime builtin.target.isDarwin()) { + std.debug.print(" state before:\n", .{}); + std.debug.print(" cfa {?x}:\n", .{context.cfa}); + for (context.thread_context.mcontext.ss.regs, 0..) |reg, i| { + std.debug.print(" {}:0x{x}\n", .{i, reg}); + } + std.debug.print(" fp:0x{x}\n", .{context.thread_context.mcontext.ss.fp}); + std.debug.print(" lr:0x{x}\n", .{context.thread_context.mcontext.ss.lr}); + std.debug.print(" sp:0x{x}\n", .{context.thread_context.mcontext.ss.sp}); + std.debug.print(" pc:0x{x}\n", .{context.thread_context.mcontext.ss.pc}); + } + + const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); + std.debug.print(" ran to 0x{x}\n", .{row.offset + fde.pc_begin}); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { @@ -1785,7 +1797,7 @@ pub const DwarfInfo = struct { var update_tail: ?*RegisterUpdate = null; var has_next_ip = false; - for (context.vm.rowColumns(row.*)) |column| { + for (context.vm.rowColumns(row)) |column| { if (column.register) |register| { if (register == cie.return_address_register) { has_next_ip = column.rule != .undefined; diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 0b6f45d938bb..1243673da631 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -395,9 +395,7 @@ pub const VirtualMachine = struct { } /// Runs the CIE instructions, then the FDE instructions. Execution halts - /// once the row that corresponds to `pc` is known (and set as `current_row`). - /// - /// The state of the row prior to the last execution step is returned. + /// once the row that corresponds to `pc` is known, and the row is returned. pub fn runTo( self: *VirtualMachine, allocator: std.mem.Allocator, @@ -419,17 +417,15 @@ pub const VirtualMachine = struct { &fde_stream, }; - outer: for (&streams, 0..) |stream, i| { + for (&streams, 0..) |stream, i| { while (stream.pos < stream.buffer.len) { const instruction = try dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); prev_row = try self.step(allocator, cie, i == 0, instruction); - if (pc < fde.pc_begin + self.current_row.offset) { - break :outer; - } + if (pc < fde.pc_begin + self.current_row.offset) return prev_row; } } - return prev_row; + return self.current_row; } pub fn runToNative( From 2bc2b01dbc0757c5381dd79c32c2f607a6bcc270 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 18 Jul 2023 02:04:48 -0400 Subject: [PATCH 76/81] dwarf: update the pc register --- lib/std/dwarf.zig | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 6dc317779ef4..08bc34759be3 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1796,12 +1796,12 @@ pub const DwarfInfo = struct { }; var update_tail: ?*RegisterUpdate = null; - var has_next_ip = false; for (context.vm.rowColumns(row)) |column| { if (column.register) |register| { if (register == cie.return_address_register) { has_next_ip = column.rule != .undefined; } + std.debug.print(" updated {}\n", .{register}); const old_value = try abi.regBytes(context.thread_context, register, context.reg_context); const new_value = try update_allocator.alloc(u8, old_value.len); @@ -1828,15 +1828,13 @@ pub const DwarfInfo = struct { update_tail = tail.prev; } - if (has_next_ip) { - context.pc = abi.stripInstructionPtrAuthCode(mem.readIntSliceNative(usize, try abi.regBytes( - context.thread_context, - cie.return_address_register, - context.reg_context, - ))); - } else { - context.pc = 0; - } + context.pc = abi.stripInstructionPtrAuthCode(mem.readIntSliceNative(usize, try abi.regBytes( + context.thread_context, + cie.return_address_register, + context.reg_context, + ))); + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; + std.debug.print(" new context.pc: 0x{x}\n", .{context.pc}); (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; From 4421b14878eb722da8a57069b2152f7b43ba7ffc Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 18 Jul 2023 14:02:09 -0400 Subject: [PATCH 77/81] dwarf: fixup rules for setting ip --- lib/std/dwarf.zig | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 08bc34759be3..7aee8e0aa38d 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1796,6 +1796,7 @@ pub const DwarfInfo = struct { }; var update_tail: ?*RegisterUpdate = null; + var has_next_ip = true; for (context.vm.rowColumns(row)) |column| { if (column.register) |register| { if (register == cie.return_address_register) { @@ -1828,11 +1829,15 @@ pub const DwarfInfo = struct { update_tail = tail.prev; } - context.pc = abi.stripInstructionPtrAuthCode(mem.readIntSliceNative(usize, try abi.regBytes( - context.thread_context, - cie.return_address_register, - context.reg_context, - ))); + if (has_next_ip) { + context.pc = abi.stripInstructionPtrAuthCode(mem.readIntSliceNative(usize, try abi.regBytes( + context.thread_context, + cie.return_address_register, + context.reg_context, + ))); + } else { + context.pc = 0; + } (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; std.debug.print(" new context.pc: 0x{x}\n", .{context.pc}); From 253e6971ad77a9665348fd6a2085b2ffd8c84219 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 18 Jul 2023 20:14:42 -0400 Subject: [PATCH 78/81] dwarf: implement aarch64 default register rules --- lib/std/dwarf.zig | 43 ++++++++++++------------------------ lib/std/dwarf/abi.zig | 21 +++++++++++++----- lib/std/dwarf/call_frame.zig | 12 +++++++--- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 7aee8e0aa38d..298f1f7ec492 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1740,21 +1740,7 @@ pub const DwarfInfo = struct { context.reg_context.eh_frame = cie.version != 4; context.reg_context.is_macho = di.is_macho; - if (comptime builtin.target.isDarwin()) { - std.debug.print(" state before:\n", .{}); - std.debug.print(" cfa {?x}:\n", .{context.cfa}); - for (context.thread_context.mcontext.ss.regs, 0..) |reg, i| { - std.debug.print(" {}:0x{x}\n", .{i, reg}); - } - std.debug.print(" fp:0x{x}\n", .{context.thread_context.mcontext.ss.fp}); - std.debug.print(" lr:0x{x}\n", .{context.thread_context.mcontext.ss.lr}); - std.debug.print(" sp:0x{x}\n", .{context.thread_context.mcontext.ss.sp}); - std.debug.print(" pc:0x{x}\n", .{context.thread_context.mcontext.ss.pc}); - } - const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); - std.debug.print(" ran to 0x{x}\n", .{row.offset + fde.pc_begin}); - context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; @@ -1789,47 +1775,48 @@ pub const DwarfInfo = struct { const RegisterUpdate = struct { // Backed by thread_context - old_value: []u8, + dest: []u8, // Backed by arena - new_value: []const u8, + src: []const u8, prev: ?*@This(), }; var update_tail: ?*RegisterUpdate = null; - var has_next_ip = true; + var has_return_address= true; for (context.vm.rowColumns(row)) |column| { if (column.register) |register| { if (register == cie.return_address_register) { - has_next_ip = column.rule != .undefined; + has_return_address = column.rule != .undefined; } - std.debug.print(" updated {}\n", .{register}); - const old_value = try abi.regBytes(context.thread_context, register, context.reg_context); - const new_value = try update_allocator.alloc(u8, old_value.len); + const dest = try abi.regBytes(context.thread_context, register, context.reg_context); + const src = try update_allocator.alloc(u8, dest.len); const prev = update_tail; update_tail = try update_allocator.create(RegisterUpdate); update_tail.?.* = .{ - .old_value = old_value, - .new_value = new_value, + .dest = dest, + .src = src, .prev = prev, }; try column.resolveValue( context, expression_context, - new_value, + src, ); } } + // On all implemented architectures, the CFA is defined as being the previous frame's SP (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; + while (update_tail) |tail| { - @memcpy(tail.old_value, tail.new_value); + @memcpy(tail.dest, tail.src); update_tail = tail.prev; } - if (has_next_ip) { + if (has_return_address) { context.pc = abi.stripInstructionPtrAuthCode(mem.readIntSliceNative(usize, try abi.regBytes( context.thread_context, cie.return_address_register, @@ -1838,10 +1825,8 @@ pub const DwarfInfo = struct { } else { context.pc = 0; } - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; - std.debug.print(" new context.pc: 0x{x}\n", .{context.pc}); - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; // The call instruction will have pushed the address of the instruction that follows the call as the return address // However, this return address may be past the end of the function if the caller was `noreturn`. By subtracting one, diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index d56ae2733d06..7f349d97ad33 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -367,12 +367,21 @@ pub fn regBytes( } /// Returns the ABI-defined default value this register has in the unwinding table -/// before running any of the CIE instructions. The DWARF spec defines these values -/// to be undefined, but allows ABI authors to override that default. -pub fn getRegDefaultValue(reg_number: u8, out: []u8) void { - - // Implement any ABI-specific rules here +/// before running any of the CIE instructions. The DWARF spec defines these as having +/// the .undefined rule by default, but allows ABI authors to override that. +pub fn getRegDefaultValue(reg_number: u8, context: *std.dwarf.UnwindContext, out: []u8) !void { + switch (builtin.cpu.arch) { + .aarch64 => { + // Callee-saved registers are initialized as if they had the .same_value rule + if (reg_number >= 19 and reg_number <= 28) { + const src = try regBytes(context.thread_context, reg_number, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return; + } + }, + else => {}, + } - _ = reg_number; @memset(out, undefined); } diff --git a/lib/std/dwarf/call_frame.zig b/lib/std/dwarf/call_frame.zig index 1243673da631..c83cbad81578 100644 --- a/lib/std/dwarf/call_frame.zig +++ b/lib/std/dwarf/call_frame.zig @@ -295,12 +295,18 @@ pub const VirtualMachine = struct { switch (self.rule) { .default => { const register = self.register orelse return error.InvalidRegister; - abi.getRegDefaultValue(register, out); + try abi.getRegDefaultValue(register, context, out); }, .undefined => { @memset(out, undefined); }, - .same_value => {}, + .same_value => { + // TODO: This copy could be eliminated if callers always copy the state then call this function to update it + const register = self.register orelse return error.InvalidRegister; + const src = try abi.regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + }, .offset => |offset| { if (context.cfa) |cfa| { const addr = try applyOffset(cfa, offset); @@ -316,7 +322,7 @@ pub const VirtualMachine = struct { }, .register => |register| { const src = try abi.regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterTypeMismatch; + if (src.len != out.len) return error.RegisterSizeMismatch; @memcpy(out, try abi.regBytes(context.thread_context, register, context.reg_context)); }, .expression => |expression| { From 6d87bb370a6d9075b2b6628f5f8c09171f25e4e9 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Tue, 18 Jul 2023 22:51:52 -0400 Subject: [PATCH 79/81] debug: disable the new unwinder on aarch64-macos --- lib/std/debug.zig | 24 +++++++++++++++--------- lib/std/dwarf.zig | 4 ++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index d2fc5b1338a7..603d651e5c0d 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -521,13 +521,19 @@ pub const StackIterator = struct { } pub fn initWithContext(first_address: ?usize, debug_info: *DebugInfo, context: *const os.ucontext_t) !StackIterator { - var iterator = init(first_address, null); - iterator.unwind_state = .{ - .debug_info = debug_info, - .dwarf_context = try DW.UnwindContext.init(debug_info.allocator, context, &isValidMemory), - }; + // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that + // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. + if (comptime builtin.target.isDarwin() and native_arch == .aarch64) { + return init(first_address, context.mcontext.ss.fp); + } else { + var iterator = init(first_address, null); + iterator.unwind_state = .{ + .debug_info = debug_info, + .dwarf_context = try DW.UnwindContext.init(debug_info.allocator, context, &isValidMemory), + }; - return iterator; + return iterator; + } } pub fn deinit(self: *StackIterator) void { @@ -663,15 +669,15 @@ pub const StackIterator = struct { if (!unwind_state.failed) { if (unwind_state.dwarf_context.pc == 0) return null; if (self.next_unwind()) |return_address| { + self.fp = unwind_state.dwarf_context.getFp() catch 0; return return_address; } else |err| { unwind_state.last_error = err; unwind_state.failed = true; // Fall back to fp-based unwinding on the first failure. - // We can't attempt it for other modules later in the - // stack because the full register state won't be unwound. - self.fp = unwind_state.dwarf_context.getFp() catch 0; + // We can't attempt it again for other modules higher in the + // stack because the full register state won't have been unwound. } } } diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 298f1f7ec492..968dab094d5f 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1782,7 +1782,7 @@ pub const DwarfInfo = struct { }; var update_tail: ?*RegisterUpdate = null; - var has_return_address= true; + var has_return_address = true; for (context.vm.rowColumns(row)) |column| { if (column.register) |register| { if (register == cie.return_address_register) { @@ -1871,7 +1871,7 @@ pub const UnwindContext = struct { } pub fn getFp(self: *const UnwindContext) !usize { - return mem.readIntSliceNative(usize, try abi.regBytes(self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)); + return (try abi.regValueNative(usize, self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)).*; } }; From 8e6a62ba10326e48eaefd40f89c9452d92f39c9d Mon Sep 17 00:00:00 2001 From: kcbanner Date: Wed, 19 Jul 2023 02:06:17 -0400 Subject: [PATCH 80/81] test: disable omit_frame_pointer unwinding tests on aarch64-macos dwarf: handle signal frame CIE flag --- lib/std/debug.zig | 2 +- lib/std/dwarf.zig | 15 +++++++++------ test/standalone/stack_iterator/build.zig | 13 ++++++------- .../stack_iterator/shared_lib_unwind.zig | 4 ++++ .../stack_iterator/{zig_unwind.zig => unwind.zig} | 3 +++ 5 files changed, 23 insertions(+), 14 deletions(-) rename test/standalone/stack_iterator/{zig_unwind.zig => unwind.zig} (94%) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 603d651e5c0d..178274f25fba 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -882,7 +882,7 @@ fn printUnknownSource(debug_info: *DebugInfo, out_stream: anytype, address: usiz pub fn printUnwindError(debug_info: *DebugInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); - try out_stream.print("Unwind information for `{s}:{}` was not available ({}), trace may be incomplete\n\n", .{ module_name, address, err }); + try out_stream.print("Unwind information for `{s}:0x{x}` was not available ({}), trace may be incomplete\n\n", .{ module_name, address, err }); try tty_config.setColor(out_stream, .reset); } diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 968dab094d5f..899976382325 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1659,8 +1659,6 @@ pub const DwarfInfo = struct { if (!comptime abi.isSupportedArch(builtin.target.cpu.arch)) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return 0; - // TODO: Handle unwinding from a signal frame (ie. use_prev_instr in libunwind) - // Find the FDE and CIE var cie: CommonInformationEntry = undefined; var fde: FrameDescriptionEntry = undefined; @@ -1828,11 +1826,16 @@ pub const DwarfInfo = struct { (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; - // The call instruction will have pushed the address of the instruction that follows the call as the return address - // However, this return address may be past the end of the function if the caller was `noreturn`. By subtracting one, - // then `context.pc` will always point to an instruction within the FDE for the previous function. + // The call instruction will have pushed the address of the instruction that follows the call as the return address. + // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in + // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up + // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, + // we subtract one so that the next lookup is guaranteed to land inside the + // + // The exception to this rule is signal frames, where we return execution would be returned to the instruction + // that triggered the handler. const return_address = context.pc; - if (context.pc > 0) context.pc -= 1; + if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; return return_address; } diff --git a/test/standalone/stack_iterator/build.zig b/test/standalone/stack_iterator/build.zig index f264777ae01f..1c5a9673ceca 100644 --- a/test/standalone/stack_iterator/build.zig +++ b/test/standalone/stack_iterator/build.zig @@ -7,7 +7,7 @@ pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); - // Unwinding pure zig code, with a frame pointer + // Unwinding with a frame pointer // // getcontext version: zig std // @@ -18,8 +18,8 @@ pub fn build(b: *std.Build) void { // - aarch64: FRAME, DWARF { const exe = b.addExecutable(.{ - .name = "zig_unwind_fp", - .root_source_file = .{ .path = "zig_unwind.zig" }, + .name = "unwind_fp", + .root_source_file = .{ .path = "unwind.zig" }, .target = target, .optimize = optimize, }); @@ -31,7 +31,7 @@ pub fn build(b: *std.Build) void { test_step.dependOn(&run_cmd.step); } - // Unwinding pure zig code, without a frame pointer. + // Unwinding without a frame pointer // // getcontext version: zig std // @@ -42,13 +42,12 @@ pub fn build(b: *std.Build) void { // - aarch64: FRAMELESS, DWARF { const exe = b.addExecutable(.{ - .name = "zig_unwind_nofp", - .root_source_file = .{ .path = "zig_unwind.zig" }, + .name = "unwind_nofp", + .root_source_file = .{ .path = "unwind.zig" }, .target = target, .optimize = optimize, }); - if (target.isDarwin()) exe.unwind_tables = true; exe.omit_frame_pointer = true; exe.unwind_tables = true; diff --git a/test/standalone/stack_iterator/shared_lib_unwind.zig b/test/standalone/stack_iterator/shared_lib_unwind.zig index 22f008174429..50e0421e2ac6 100644 --- a/test/standalone/stack_iterator/shared_lib_unwind.zig +++ b/test/standalone/stack_iterator/shared_lib_unwind.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const debug = std.debug; const testing = std.testing; @@ -34,6 +35,9 @@ extern fn frame0( ) void; pub fn main() !void { + // Disabled until the DWARF unwinder bugs on .aarch64 are solved + if (builtin.omit_frame_pointer and comptime builtin.target.isDarwin() and builtin.cpu.arch == .aarch64) return; + if (!std.debug.have_ucontext or !std.debug.have_getcontext) return; var expected: [5]usize = undefined; diff --git a/test/standalone/stack_iterator/zig_unwind.zig b/test/standalone/stack_iterator/unwind.zig similarity index 94% rename from test/standalone/stack_iterator/zig_unwind.zig rename to test/standalone/stack_iterator/unwind.zig index 5421ac052fd5..1280118173e2 100644 --- a/test/standalone/stack_iterator/zig_unwind.zig +++ b/test/standalone/stack_iterator/unwind.zig @@ -87,6 +87,9 @@ noinline fn frame0(expected: *[4]usize, unwound: *[4]usize) void { } pub fn main() !void { + // Disabled until the DWARF unwinder bugs on .aarch64 are solved + if (builtin.omit_frame_pointer and comptime builtin.target.isDarwin() and builtin.cpu.arch == .aarch64) return; + if (!std.debug.have_ucontext or !std.debug.have_getcontext) return; var expected: [4]usize = undefined; From b1d86db7b45c57b2a9d48655738bce8d77327438 Mon Sep 17 00:00:00 2001 From: kcbanner Date: Thu, 20 Jul 2023 22:57:21 -0400 Subject: [PATCH 81/81] dwarf: move macho unwind code from macho -> dwarf dwarf: fixup unchecked .eh_frame CIE offset subtraction --- lib/std/debug.zig | 2 +- lib/std/dwarf.zig | 361 +++++++++++++++++++++++++++++++++++++++++++++- lib/std/macho.zig | 357 --------------------------------------------- 3 files changed, 361 insertions(+), 359 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 178274f25fba..948a76c2df30 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -648,7 +648,7 @@ pub const StackIterator = struct { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. if (module.unwind_info) |unwind_info| { - if (macho.unwindFrame(&unwind_state.dwarf_context, unwind_info, module.eh_frame, module.base_address)) |return_address| { + if (DW.unwindFrameMachO(&unwind_state.dwarf_context, unwind_info, module.eh_frame, module.base_address)) |return_address| { return return_address; } else |err| { if (err != error.RequiresDWARFUnwind) return err; diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index 899976382325..639772cf6ed1 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1841,6 +1841,365 @@ pub const DwarfInfo = struct { } }; +/// Returns the DWARF register number for an x86_64 register number found in compact unwind info +fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { + return switch (unwind_reg_number) { + 1 => 3, // RBX + 2 => 12, // R12 + 3 => 13, // R13 + 4 => 14, // R14 + 5 => 15, // R15 + 6 => 6, // RBP + else => error.InvalidUnwindRegisterNumber, + }; +} + +const macho = std.macho; + +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +pub fn unwindFrameMachO(context: *UnwindContext, unwind_info: []const u8, eh_frame: ?[]const u8, module_base_address: usize) !usize { + const header = mem.bytesAsValue( + macho.unwind_info_section_header, + unwind_info[0..@sizeOf(macho.unwind_info_section_header)], + ); + const indices = mem.bytesAsSlice( + macho.unwind_info_section_header_index_entry, + unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], + ); + if (indices.len == 0) return error.MissingUnwindInfo; + + const mapped_pc = context.pc - module_base_address; + const second_level_index = blk: { + var left: usize = 0; + var len: usize = indices.len; + + while (len > 1) { + const mid = left + len / 2; + const offset = indices[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + // Last index is a sentinel containing the highest address as its functionOffset + if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + break :blk &indices[left]; + }; + + const common_encodings = mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + + const start_offset = second_level_index.secondLevelPagesSectionOffset; + const kind = mem.bytesAsValue( + macho.UNWIND_SECOND_LEVEL, + unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], + ); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => blk: { + const page_header = mem.bytesAsValue( + macho.unwind_info_regular_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], + ); + + const entries = mem.bytesAsSlice( + macho.unwind_info_regular_second_level_entry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = entries[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + break :blk .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => blk: { + const page_header = mem.bytesAsValue( + macho.unwind_info_compressed_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], + ); + + const entries = mem.bytesAsSlice( + macho.UnwindInfoCompressedEntry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = second_level_index.functionOffset + entries[mid].funcOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + const entry = entries[left]; + const function_offset = second_level_index.functionOffset + entry.funcOffset; + if (entry.encodingIndex < header.commonEncodingsArrayCount) { + if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } else { + const local_index = try std.math.sub( + u8, + entry.encodingIndex, + std.math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, + ); + const local_encodings = mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + } + }, + else => return error.InvalidUnwindInfo, + }; + + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context = abi.RegisterContext{ + .eh_frame = false, + .is_macho = true, + }; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => blk: { + const regs: [5]u3 = .{ + encoding.value.x86_64.frame.reg0, + encoding.value.x86_64.frame.reg1, + encoding.value.x86_64.frame.reg2, + encoding.value.x86_64.frame.reg3, + encoding.value.x86_64.frame.reg4, + }; + + const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); + var max_reg: usize = 0; + inline for (regs, 0..) |reg, i| { + if (reg > 0) max_reg = i; + } + + const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + // Verify the stack range we're about to read register values from + if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo; + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame_offset + i * @sizeOf(usize); + const reg_number = try compactUnwindToDwarfRegNumber(reg); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :blk new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => blk: { + const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) + @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) + else stack_size: { + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module_base_address + + entry.function_offset + + encoding.value.x86_64.frameless.stack.indirect.sub_offset; + if (!context.isValidMemory(sub_offset_addr)) return error.InvalidUnwindInfo; + + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = encoding.value.x86_64.frameless.stack_reg_count; + const ip_ptr = if (reg_count > 0) reg_blk: { + var digits: [6]u3 = undefined; + var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; + var registers: [reg_numbers.len]u3 = undefined; + var used_indices = [_]bool{false} ** reg_numbers.len; + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + + registers[i] = reg_numbers[unused_index]; + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo; + for (0..reg_count) |i| { + const reg_number = try compactUnwindToDwarfRegNumber(registers[i]); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :reg_blk reg_addr; + } else sp + stack_size - @sizeOf(usize); + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo; + + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); + }, + }, + .aarch64 => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => blk: { + const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try abi.regValueNative(usize, context.thread_context, 30, reg_context)).*; + if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo; + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); + }, + .FRAME => blk: { + const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 16; + const ip_ptr = fp + @sizeOf(usize); + + const num_restored_pairs: usize = + @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + + @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); + const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); + + if (!context.isValidMemory(new_sp) or !context.isValidMemory(min_reg_addr)) return error.InvalidUnwindInfo; + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { + (try abi.regValueNative(usize, context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try abi.regValueNative(usize, context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + @memcpy( + try abi.regBytes(context.thread_context, 64 + 8 + i, context.reg_context), + mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + @memcpy( + try abi.regBytes(context.thread_context, 64 + 9 + i, context.reg_context), + mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + break :blk new_ip; + }, + }, + else => return error.UnimplementedArch, + }; + + context.pc = abi.stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; +} + +fn unwindFrameMachODwarf(context: *UnwindContext, eh_frame: []const u8, fde_offset: usize) !usize { + var di = DwarfInfo{ + .endian = builtin.cpu.arch.endian(), + .is_macho = true, + }; + defer di.deinit(context.allocator); + + di.sections[@intFromEnum(DwarfSection.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + return di.unwindFrame(context, fde_offset); +} + pub const UnwindContext = struct { allocator: mem.Allocator, cfa: ?usize, @@ -2166,7 +2525,7 @@ pub const EntryHeader = struct { .is_64 = is_64, .type = if (id == cie_id) .{ .cie = {} } else .{ .fde = switch (dwarf_section) { - .eh_frame => stream.pos - id_len - id, + .eh_frame => try std.math.sub(u64, stream.pos - id_len, id), .debug_frame => id, else => unreachable, }, diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 3c18e1f23c33..d70e3448bd78 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -2125,360 +2125,3 @@ pub const CompactUnwindEncoding = packed struct(u32) { has_lsda: u1, start: u1, }; - -/// Returns the DWARF register number for an x86_64 register number found in compact unwind info -fn dwarfRegNumber(unwind_reg_number: u3) !u8 { - return switch (unwind_reg_number) { - 1 => 3, // RBX - 2 => 12, // R12 - 3 => 13, // R13 - 4 => 14, // R14 - 5 => 15, // R15 - 6 => 6, // RBP - else => error.InvalidUnwindRegisterNumber, - }; -} - -const dwarf = std.dwarf; -const abi = dwarf.abi; - -pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, eh_frame: ?[]const u8, module_base_address: usize) !usize { - const header = mem.bytesAsValue( - unwind_info_section_header, - unwind_info[0..@sizeOf(unwind_info_section_header)], - ); - const indices = mem.bytesAsSlice( - unwind_info_section_header_index_entry, - unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(unwind_info_section_header_index_entry)], - ); - if (indices.len == 0) return error.MissingUnwindInfo; - - const mapped_pc = context.pc - module_base_address; - const second_level_index = blk: { - var left: usize = 0; - var len: usize = indices.len; - - while (len > 1) { - const mid = left + len / 2; - const offset = indices[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - // Last index is a sentinel containing the highest address as its functionOffset - if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; - break :blk &indices[left]; - }; - - const common_encodings = mem.bytesAsSlice( - compact_unwind_encoding_t, - unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(compact_unwind_encoding_t)], - ); - - const start_offset = second_level_index.secondLevelPagesSectionOffset; - const kind = mem.bytesAsValue( - UNWIND_SECOND_LEVEL, - unwind_info[start_offset..][0..@sizeOf(UNWIND_SECOND_LEVEL)], - ); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => blk: { - const page_header = mem.bytesAsValue( - unwind_info_regular_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(unwind_info_regular_second_level_page_header)], - ); - - const entries = mem.bytesAsSlice( - unwind_info_regular_second_level_entry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(unwind_info_regular_second_level_entry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = entries[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - break :blk .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => blk: { - const page_header = mem.bytesAsValue( - unwind_info_compressed_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(unwind_info_compressed_second_level_page_header)], - ); - - const entries = mem.bytesAsSlice( - UnwindInfoCompressedEntry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(UnwindInfoCompressedEntry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = second_level_index.functionOffset + entries[mid].funcOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - const entry = entries[left]; - const function_offset = second_level_index.functionOffset + entry.funcOffset; - if (entry.encodingIndex < header.commonEncodingsArrayCount) { - if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } else { - const local_index = try std.math.sub( - u8, - entry.encodingIndex, - std.math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, - ); - const local_encodings = mem.bytesAsSlice( - compact_unwind_encoding_t, - unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(compact_unwind_encoding_t)], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - } - }, - else => return error.InvalidUnwindInfo, - }; - - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context = dwarf.abi.RegisterContext{ - .eh_frame = false, - .is_macho = true, - }; - - const encoding: CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => blk: { - const regs: [5]u3 = .{ - encoding.value.x86_64.frame.reg0, - encoding.value.x86_64.frame.reg1, - encoding.value.x86_64.frame.reg2, - encoding.value.x86_64.frame.reg3, - encoding.value.x86_64.frame.reg4, - }; - - const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); - var max_reg: usize = 0; - inline for (regs, 0..) |reg, i| { - if (reg > 0) max_reg = i; - } - - const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - // Verify the stack range we're about to read register values from - if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo; - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame_offset + i * @sizeOf(usize); - const reg_number = try dwarfRegNumber(reg); - (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :blk new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => blk: { - const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; - const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) - @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) - else stack_size: { - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - module_base_address + - entry.function_offset + - encoding.value.x86_64.frameless.stack.indirect.sub_offset; - if (!context.isValidMemory(sub_offset_addr)) return error.InvalidUnwindInfo; - - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = encoding.value.x86_64.frameless.stack_reg_count; - const ip_ptr = if (reg_count > 0) reg_blk: { - var digits: [6]u3 = undefined; - var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; - var registers: [reg_numbers.len]u3 = undefined; - var used_indices = [_]bool{false} ** reg_numbers.len; - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - - registers[i] = reg_numbers[unused_index]; - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo; - for (0..reg_count) |i| { - const reg_number = try dwarfRegNumber(registers[i]); - (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :reg_blk reg_addr; - } else sp + stack_size - @sizeOf(usize); - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo; - - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - break :blk new_ip; - }, - .DWARF => { - return unwindFrameDwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); - }, - }, - .aarch64 => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => blk: { - const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try abi.regValueNative(usize, context.thread_context, 30, reg_context)).*; - if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo; - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - break :blk new_ip; - }, - .DWARF => { - return unwindFrameDwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); - }, - .FRAME => blk: { - const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 16; - const ip_ptr = fp + @sizeOf(usize); - - const num_restored_pairs: usize = - @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + - @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); - const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); - - if (!context.isValidMemory(new_sp) or !context.isValidMemory(min_reg_addr)) return error.InvalidUnwindInfo; - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { - (try abi.regValueNative(usize, context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try abi.regValueNative(usize, context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - @memcpy( - try abi.regBytes(context.thread_context, 64 + 8 + i, context.reg_context), - mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - @memcpy( - try abi.regBytes(context.thread_context, 64 + 9 + i, context.reg_context), - mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - break :blk new_ip; - }, - }, - else => return error.UnimplementedArch, - }; - - context.pc = dwarf.abi.stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; -} - -fn unwindFrameDwarf(context: *dwarf.UnwindContext, eh_frame: []const u8, fde_offset: usize) !usize { - var di = dwarf.DwarfInfo{ - .endian = builtin.cpu.arch.endian(), - .is_macho = true, - }; - defer di.deinit(context.allocator); - - di.sections[@intFromEnum(dwarf.DwarfSection.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; - - return di.unwindFrame(context, fde_offset); -}