blob: d2ed9f84cdd486bd7a51dd4a959d5333aaed1d6f [file] [log] [blame]
From 6c9f6812523a706c11a12e6cb4119b0cf67bbb21 Mon Sep 17 00:00:00 2001
From: Jez Ng <jezng@fb.com>
Date: Sun, 31 Jul 2022 20:16:08 -0400
Subject: [PATCH] [lld-macho] Support EH frame pointer encodings that use
sdata4
Previously we only supporting using the system pointer size (aka the
`absptr` encoding) because `llvm-mc`'s CFI directives always generate EH
frames with that encoding. But libffi uses 4-byte-encoded, hand-rolled
EH frames, so this patch adds support for it.
Fixes #56576.
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D130804
---
lld/MachO/EhFrame.cpp | 10 ++--
lld/MachO/EhFrame.h | 8 ++--
lld/MachO/InputFiles.cpp | 44 ++++++++++++------
lld/test/MachO/eh-frame-sdata4.s | 80 ++++++++++++++++++++++++++++++++
4 files changed, 117 insertions(+), 25 deletions(-)
create mode 100644 lld/test/MachO/eh-frame-sdata4.s
diff --git a/lld/MachO/EhFrame.cpp b/lld/MachO/EhFrame.cpp
index 50d8accc0596..55a85f316cdd 100644
--- a/lld/MachO/EhFrame.cpp
+++ b/lld/MachO/EhFrame.cpp
@@ -58,17 +58,17 @@ uint32_t EhReader::readU32(size_t *off) const {
return v;
}
-uint64_t EhReader::readPointer(size_t *off) const {
- if (*off + wordSize > data.size())
+uint64_t EhReader::readPointer(size_t *off, uint8_t size) const {
+ if (*off + size > data.size())
failOn(*off, "unexpected end of CIE/FDE");
uint64_t v;
- if (wordSize == 8)
+ if (size == 8)
v = read64le(data.data() + *off);
else {
- assert(wordSize == 4);
+ assert(size == 4);
v = read32le(data.data() + *off);
}
- *off += wordSize;
+ *off += size;
return v;
}
diff --git a/lld/MachO/EhFrame.h b/lld/MachO/EhFrame.h
index c8269b941bcf..609a3bb8b1fe 100644
--- a/lld/MachO/EhFrame.h
+++ b/lld/MachO/EhFrame.h
@@ -55,9 +55,8 @@ namespace macho {
class EhReader {
public:
- EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
- size_t wordSize)
- : file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
+ EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
+ : file(file), data(data), dataOff(dataOff) {}
size_t size() const { return data.size(); }
// Read and validate the length field.
uint64_t readLength(size_t *off) const;
@@ -65,7 +64,7 @@ public:
void skipValidLength(size_t *off) const;
uint8_t readByte(size_t *off) const;
uint32_t readU32(size_t *off) const;
- uint64_t readPointer(size_t *off) const;
+ uint64_t readPointer(size_t *off, uint8_t size) const;
StringRef readString(size_t *off) const;
void skipLeb128(size_t *off) const;
void failOn(size_t errOff, const Twine &msg) const;
@@ -76,7 +75,6 @@ private:
// The offset of the data array within its section. Used only for error
// reporting.
const size_t dataOff;
- size_t wordSize;
};
// The EH frame format, when emitted by llvm-mc, consists of a number of
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 5bd15d6baac8..8a414691ea6c 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -385,7 +385,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
}
void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
- EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
+ EhReader reader(this, data, /*dataOff=*/0);
size_t off = 0;
while (off < reader.size()) {
uint64_t frameOff = off;
@@ -1293,10 +1293,25 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
struct CIE {
macho::Symbol *personalitySymbol = nullptr;
- bool fdesHaveLsda = false;
bool fdesHaveAug = false;
+ uint8_t lsdaPtrSize = 0; // 0 => no LSDA
+ uint8_t funcPtrSize = 0;
};
+static uint8_t pointerEncodingToSize(uint8_t enc) {
+ switch (enc & 0xf) {
+ case dwarf::DW_EH_PE_absptr:
+ return target->wordSize;
+ case dwarf::DW_EH_PE_sdata4:
+ return 4;
+ case dwarf::DW_EH_PE_sdata8:
+ // ld64 doesn't actually support sdata8, but this seems simple enough...
+ return 8;
+ default:
+ return 0;
+ };
+}
+
static CIE parseCIE(const InputSection *isec, const EhReader &reader,
size_t off) {
// Handling the full generality of possible DWARF encodings would be a major
@@ -1304,8 +1319,6 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
// DWARF and handle just that.
constexpr uint8_t expectedPersonalityEnc =
dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
- constexpr uint8_t expectedPointerEnc =
- dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
CIE cie;
uint8_t version = reader.readByte(&off);
@@ -1332,16 +1345,17 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
break;
}
case 'L': {
- cie.fdesHaveLsda = true;
uint8_t lsdaEnc = reader.readByte(&off);
- if (lsdaEnc != expectedPointerEnc)
+ cie.lsdaPtrSize = pointerEncodingToSize(lsdaEnc);
+ if (cie.lsdaPtrSize == 0)
reader.failOn(off, "unexpected LSDA encoding 0x" +
Twine::utohexstr(lsdaEnc));
break;
}
case 'R': {
uint8_t pointerEnc = reader.readByte(&off);
- if (pointerEnc != expectedPointerEnc)
+ cie.funcPtrSize = pointerEncodingToSize(pointerEnc);
+ if (cie.funcPtrSize == 0 || !(pointerEnc & dwarf::DW_EH_PE_pcrel))
reader.failOn(off, "unexpected pointer encoding 0x" +
Twine::utohexstr(pointerEnc));
break;
@@ -1471,7 +1485,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
else if (isec->symbols[0]->value != 0)
fatal("found symbol at unexpected offset in __eh_frame");
- EhReader reader(this, isec->data, subsec.offset, target->wordSize);
+ EhReader reader(this, isec->data, subsec.offset);
size_t dataOff = 0; // Offset from the start of the EH frame.
reader.skipValidLength(&dataOff); // readLength() already validated this.
// cieOffOff is the offset from the start of the EH frame to the cieOff
@@ -1510,20 +1524,20 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
continue;
}
+ assert(cieMap.count(cieIsec));
+ const CIE &cie = cieMap[cieIsec];
// Offset of the function address within the EH frame.
const size_t funcAddrOff = dataOff;
- uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
- isecOff + funcAddrOff;
- uint32_t funcLength = reader.readPointer(&dataOff);
+ uint64_t funcAddr = reader.readPointer(&dataOff, cie.funcPtrSize) +
+ ehFrameSection.addr + isecOff + funcAddrOff;
+ uint32_t funcLength = reader.readPointer(&dataOff, cie.funcPtrSize);
size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
- assert(cieMap.count(cieIsec));
- const CIE &cie = cieMap[cieIsec];
Optional<uint64_t> lsdaAddrOpt;
if (cie.fdesHaveAug) {
reader.skipLeb128(&dataOff);
lsdaAddrOff = dataOff;
- if (cie.fdesHaveLsda) {
- uint64_t lsdaOff = reader.readPointer(&dataOff);
+ if (cie.lsdaPtrSize != 0) {
+ uint64_t lsdaOff = reader.readPointer(&dataOff, cie.lsdaPtrSize);
if (lsdaOff != 0) // FIXME possible to test this?
lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
}
diff --git a/lld/test/MachO/eh-frame-sdata4.s b/lld/test/MachO/eh-frame-sdata4.s
new file mode 100644
index 000000000000..20eb6cfd6e0e
--- /dev/null
+++ b/lld/test/MachO/eh-frame-sdata4.s
@@ -0,0 +1,80 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+## Test that we correctly handle the sdata4 DWARF pointer encoding. llvm-mc's
+## CFI directives always generate EH frames using the absptr (i.e. system
+## pointer size) encoding, but it is possible to hand-roll your own EH frames
+## that use the sdata4 encoding. For instance, libffi does this.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/sdata4.s -o %t/sdata4.o
+# RUN: %lld -lSystem %t/sdata4.o -o %t/sdata4
+# RUN: llvm-objdump --macho --syms --dwarf=frames %t/sdata4 | FileCheck %s
+
+# CHECK: SYMBOL TABLE:
+# CHECK: [[#%.16x,MAIN:]] g F __TEXT,__text _main
+
+# CHECK: .eh_frame contents:
+# CHECK: 00000000 00000010 00000000 CIE
+# CHECK: Format: DWARF32
+# CHECK: Version: 1
+# CHECK: Augmentation: "zR"
+# CHECK: Code alignment factor: 1
+# CHECK: Data alignment factor: 1
+# CHECK: Return address column: 1
+# CHECK: Augmentation data: 1B
+# CHECK: DW_CFA_def_cfa: reg7 +8
+# CHECK: CFA=reg7+8
+
+# CHECK: 00000014 00000010 00000018 FDE cie=00000000 pc=[[#%x,MAIN]]...[[#%x,MAIN+1]]
+# CHECK: Format: DWARF32
+# CHECK: DW_CFA_GNU_args_size: +16
+# CHECK: DW_CFA_nop:
+# CHECK: 0x[[#%x,MAIN]]: CFA=reg7+8
+
+#--- sdata4.s
+.globl _main
+_main:
+ retq
+LmainEnd:
+
+.balign 4
+.section __TEXT,__eh_frame
+# Although we don't reference this EhFrame symbol directly, we must have at
+# least one non-local symbol in this section, otherwise llvm-mc generates bogus
+# subtractor relocations.
+EhFrame:
+LCieHdr:
+ .long LCieEnd - LCieStart
+LCieStart:
+ .long 0 # CIE ID
+ .byte 1 # CIE version
+ .ascii "zR\0"
+ .byte 1 # Code alignment
+ .byte 1 # Data alignment
+ .byte 1 # RA column
+ .byte 1 # Augmentation size
+ .byte 0x1b # FDE pointer encoding (pcrel | sdata4)
+ .byte 0xc, 7, 8 # DW_CFA_def_cfa reg7 +8
+ .balign 4
+LCieEnd:
+
+LFdeHdr:
+ .long LFdeEnd - LFdeStart
+LFdeStart:
+ .long LFdeStart - LCieHdr
+ # The next two fields are longs instead of quads because of the sdata4
+ # encoding.
+ .long _main - . # Function address
+ .long LmainEnd - _main # Function length
+ .byte 0
+ ## Insert DW_CFA_GNU_args_size to prevent ld64 from creating a compact unwind
+ ## entry to replace this FDE. Makes it easier for us to cross-check behavior
+ ## across the two linkers (LLD never bothers trying to synthesize compact
+ ## unwind if it is not already present).
+ .byte 0x2e, 0x10 # DW_CFA_GNU_args_size
+ .balign 4
+LFdeEnd:
+
+ .long 0 # terminator
+
+.subsections_via_symbols
--
2.37.3.998.g577e59143f-goog