From ea9453e1b6fe5969080a219c47b27884960a1902 Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Wed, 10 Apr 2024 16:43:22 -0400 Subject: [PATCH] support darwin/arm64 This CL adds rudimentary support for reading MachO files containing 64-bit ARM code. Static symbols only; no relocations yet. --- arch/arch.go | 1 + asm/arm.go | 84 ++++++++++++++++ asm/asm.go | 2 + obj/elf.go | 4 +- obj/macho.go | 272 +++++++++++++++++++++++++++++++++++++++++++++++++++ obj/obj.go | 10 +- 6 files changed, 366 insertions(+), 7 deletions(-) create mode 100644 asm/arm.go create mode 100644 obj/macho.go diff --git a/arch/arch.go b/arch/arch.go index a7c2415..8bf7895 100644 --- a/arch/arch.go +++ b/arch/arch.go @@ -23,6 +23,7 @@ type Arch struct { var ( AMD64 = &Arch{Layout{0, 8}, "amd64", 0} + ARM64 = &Arch{Layout{0, 8}, "arm64", 0} I386 = &Arch{Layout{0, 4}, "386", 0} ) diff --git a/asm/arm.go b/asm/arm.go new file mode 100644 index 0000000..444720c --- /dev/null +++ b/asm/arm.go @@ -0,0 +1,84 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asm + +import ( + "io" + + "golang.org/x/arch/arm64/arm64asm" +) + +func disasmARM64(text []byte, pc uint64) Seq { + var out arm64Seq + for len(text) > 0 { + inst, err := arm64asm.Decode(text) + if err != nil || inst.Op == 0 { + inst = arm64asm.Inst{} + } + out = append(out, arm64Inst{inst, pc}) + + const size = 4 + text = text[size:] + pc += uint64(size) + } + return out + +} + +type arm64Seq []arm64Inst + +func (s arm64Seq) Len() int { + return len(s) +} + +func (s arm64Seq) Get(i int) Inst { + return &s[i] +} + +type arm64Inst struct { + arm64asm.Inst + pc uint64 +} + +func (i *arm64Inst) GoSyntax(symname func(uint64) (string, uint64)) string { + if i.Op == 0 { + return "?" + } + + var text io.ReaderAt = nil // TODO: populate + return arm64asm.GoSyntax(i.Inst, i.pc, symname, text) +} + +func (i *arm64Inst) PC() uint64 { + return i.pc +} + +func (i *arm64Inst) Len() int { return 4 } + +func (i *arm64Inst) Control() Control { + var c Control + c.TargetPC = ^uint64(0) + + // Handle explicit control flow instructions. + switch i.Op { + case arm64asm.B: + c.Type = ControlJump + case arm64asm.BL, arm64asm.SYSL, arm64asm.SYS: + c.Type = ControlCall + case arm64asm.RET, arm64asm.ERET: + c.Type = ControlRet + } + + for _, arg := range i.Args { + switch arg := arg.(type) { + case arm64asm.Cond: + c.Conditional = true + case arm64asm.PCRel: + c.TargetPC = uint64(int64(i.pc) + int64(arg)) + } + } + + return c +} diff --git a/asm/asm.go b/asm/asm.go index 5210b3f..284541d 100644 --- a/asm/asm.go +++ b/asm/asm.go @@ -20,6 +20,8 @@ func Disasm(arch *arch.Arch, text []byte, pc uint64) (Seq, error) { return disasmX86(text, pc, 64), nil case "386": return disasmX86(text, pc, 32), nil + case "arm64": + return disasmARM64(text, pc), nil } return nil, fmt.Errorf("unsupported assembly architecture: %s", arch) } diff --git a/obj/elf.go b/obj/elf.go index 60d5640..1ce3c27 100644 --- a/obj/elf.go +++ b/obj/elf.go @@ -67,7 +67,7 @@ var elfArches = map[elf.Machine]elfArch{ elf.EM_386: {arch.I386, rcElf386}, } -func openElf(r io.ReaderAt) (bool, File, error) { +func openELF(r io.ReaderAt) (bool, File, error) { // Is this an ELF file? var magic [4]uint8 if _, err := r.ReadAt(magic[0:], 0); err != nil { @@ -422,7 +422,7 @@ func (f *elfFile) sectionBytesUncached(s *elfSection) (data []byte, mmaped []byt // avoid bloating the Go heap. size := roundUp2(es.Size, f.pageSize) if size > 0 { - data, err = syscall.Mmap(-1, 0, int(size), syscall.PROT_READ, syscall.MAP_SHARED|syscall.MAP_ANONYMOUS) + data, err = syscall.Mmap(-1, 0, int(size), syscall.PROT_READ, syscall.MAP_SHARED|syscall.MAP_ANON) if err == nil { if testMmapSection != nil { testMmapSection(true) diff --git a/obj/macho.go b/obj/macho.go new file mode 100644 index 0000000..8d261b4 --- /dev/null +++ b/obj/macho.go @@ -0,0 +1,272 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "debug/dwarf" + "debug/macho" + "fmt" + "io" + "log" + "sort" + "sync" + "syscall" + + "github.com/aclements/go-obj/arch" +) + +// TODO: support relocations. + +type machoFile struct { + f *macho.File + arch *arch.Arch + sections []*machoSection + symbols []Sym +} + +func (f *machoFile) Sym(i SymID) Sym { return f.symbols[i] } +func (f *machoFile) NumSyms() SymID { return SymID(len(f.symbols)) } + +func openMachO(r io.ReaderAt) (bool, File, error) { + // Is this a MachO file? + var magic [4]uint8 // MachO 64 = 0xFEEDFACF (LE) + if _, err := r.ReadAt(magic[0:], 0); err != nil { + return false, nil, err // file too short + } + if magic[3] != '\xFE' || magic[2] != '\xED' || magic[1] != '\xFA' || magic[0] != '\xCF' { + return false, nil, nil // not MachO + } + + // All errors after this point should return (true, _, err). + + // Parse MachO. + ff, err := macho.NewFile(r) + if err != nil { + return true, nil, err + } + f := &machoFile{f: ff, arch: arch.ARM64} + + // Read section table. + for rawID, machoSect := range ff.Sections { + s := &Section{ + File: f, + Name: machoSect.Name, + ID: SectionID(len(f.sections)), // 0-based + RawID: rawID, // 0-based + Addr: machoSect.Addr, + Size: machoSect.Size, + } + + ms := &machoSection{Section: s, macho: machoSect} + f.sections = append(f.sections, ms) + } + + // Read symbol table. + if ff.Symtab != nil { + const stabTypeMask = 0xE0 + + // Build list of symbols, sort by Addr, + // compute sizes by subtracting each previous Addr. + var addrs []uint64 + for _, s := range f.f.Symtab.Syms { + if s.Type&stabTypeMask != 0 { + continue // Skip stab debug info. + } + addrs = append(addrs, s.Value) + } + sort.Slice(addrs, func(i, j int) bool { return addrs[i] < addrs[j] }) + + var syms []Sym + for _, s := range f.f.Symtab.Syms { + if s.Type&stabTypeMask != 0 { + continue // Skip stab debug info. + } + + sym := Sym{ + Name: s.Name, + Value: s.Value, + Kind: SymUnknown, // (initially) + } + + i := sort.Search(len(addrs), func(x int) bool { return addrs[x] > s.Value }) + if i < len(addrs) { + sym.Size = uint64(addrs[i] - s.Value) + } + + if s.Sect == 0 { + sym.Kind = SymUndef + } else if int(s.Sect) <= len(f.f.Sections) { + sect := f.f.Sections[s.Sect-1] + sym.Section = f.sections[s.Sect-1].Section + switch sect.Seg { + case "__TEXT": + if sect.Name == "__rodata" { + sym.Kind = SymData // (nm: R) + } else { + sym.Kind = SymText + } + + case "__DATA": + // section names: + // __bss (nm: B) + // __noptrbss (nm: B) + // __data + // __noptrdata + // __go_buildinfo (nm: R) + sym.Kind = SymData + + case "__DATA_CONST": + // section names: + // __rodata + // __gopclntab + // __gosymtab + // __itablink + // __typelink + sym.Kind = SymData + } + if sym.Kind == SymUnknown { + log.Printf("unknown symbol %s (Section.{Seg=%s,Name=%s})", + s.Name, sect.Seg, sect.Name) + } + } + syms = append(syms, sym) + } + f.symbols = syms + } + + return true, f, nil +} + +func (f *machoFile) Close() { + // Release mmaps. + for _, s := range f.sections { + if s.mmapped != nil { + mmapped := s.mmapped + s.data = nil + s.mmapped = nil + syscall.Munmap(mmapped) + } + } +} + +func (f *machoFile) Info() FileInfo { + return FileInfo{Arch: f.arch} +} + +func (f *machoFile) AsDebugDwarf() (*dwarf.Data, error) { + return f.f.DWARF() +} + +// Assert that machoFile implements AsDebugDwarf. +var _ AsDebugDwarf = (*machoFile)(nil) + +// AsDebugMacho is implemented by File types that can return an underlying +// *debug/macho.File for format-specific access. AsDebugMacho may return +// nil, so the caller must both check that the type implements +// AsDebugMacho and check the result of calling AsDebugMacho. +type AsDebugMacho interface { + File + AsDebugMacho() *macho.File +} + +func (f *machoFile) AsDebugMacho() *macho.File { + return f.f +} + +// Assert that machoFile implements AsDebugMacho. +var _ AsDebugMacho = (*machoFile)(nil) + +type machoSection struct { + // These fields are populated on loading. + + *Section + + macho *macho.Section + + dataOnce sync.Once + data []byte + dataErr error + mmapped []byte // if non-nil, original mmap of this section +} + +func (s *machoSection) String() string { + return fmt.Sprintf("%s [%d]", s.Name, s.RawID) +} + +func (f *machoFile) Sections() []*Section { + out := make([]*Section, len(f.sections)) + for i, ms := range f.sections { + out[i] = ms.Section + } + return out +} + +func (f *machoFile) Section(i SectionID) *Section { + return f.sections[i].Section +} + +func (f *machoFile) sectionData(s *Section, addr, size uint64, d *Data) (*Data, error) { + err := f.machoSectionData(f.sections[s.ID], addr, size, d) + if err != nil { + return nil, err + } + return d, nil +} + +func (f *machoFile) machoSectionData(s *machoSection, addr, size uint64, d *Data) error { + ms := s.macho + + // Validate requested range. + if addr+size < addr { + panic("address overflow") + } + if addr < ms.Addr || addr+size > ms.Addr+ms.Size { + panic(fmt.Sprintf("requested data [0x%x, 0x%x) is outside section [0x%x, 0x%x)", addr, addr+size, ms.Addr, ms.Addr+ms.Size)) + } + + // Read the section. + bytes, err := f.sectionBytes(s) + if err != nil { + return s.dataErr + } + + // Construct data. + *d = Data{ + Addr: addr, + B: bytes[addr-ms.Addr:][:size], + Layout: f.arch.Layout, + } + + return nil +} + +func (f *machoFile) sectionBytes(s *machoSection) (data []byte, err error) { + s.dataOnce.Do(func() { + s.data, s.mmapped, s.dataErr = f.sectionBytesUncached(s) + }) + return s.data, s.dataErr +} + +func (f *machoFile) sectionBytesUncached(s *machoSection) (data []byte, mmapped []byte, err error) { + // TODO: do the same mmap optimizations as ELF. + ms := s.macho + data, err = io.ReadAll(ms.Open()) + if err != nil { + return nil, nil, err + } + if uint64(len(data)) != ms.Size { + log.Fatalf("reading section got %d bytes, want %d", len(data), ms.Size) + } + return data, nil, nil +} + +func (f *machoFile) ResolveAddr(addr uint64) *Section { + for _, ms := range f.sections { + if ms.Addr <= addr && addr-ms.Addr < ms.Size { + return ms.Section + } + } + return nil +} diff --git a/obj/obj.go b/obj/obj.go index 185fcfe..8f614b4 100644 --- a/obj/obj.go +++ b/obj/obj.go @@ -18,13 +18,13 @@ import ( // Open attempts to open r as a known object file format. func Open(r io.ReaderAt) (File, error) { - if isElf, f, err := openElf(r); isElf { + if isELF, f, err := openELF(r); isELF { return f, err } - // if isPE, f, err := openPE(r); isPE { - // return f, err - // } - return nil, fmt.Errorf("unrecognized object file format") + if isMachO, f, err := openMachO(r); isMachO { + return f, err + } + return nil, fmt.Errorf("unrecognized object file format (must be ELF or MachO, no PE yet)") } // A File represents an object file.