blob: c7c70df3181e9671b15a8aaab7e0ec44c416b03c [file] [log] [blame]
commit 5d9f419a2e01c437a35899952d3342cc96a7d9e1
Author: Fangrui Song <maskray@google.com>
Date: Fri Sep 6 15:57:24 2019 +0000
Revert "Revert r370635, it caused PR43241."
This reverts commit 50d2dca22b3b05d0ee4883b0cbf93d7d15f241fc.
llvm-svn: 371215
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 8996386e99a..36cda3a1e3d 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1901,6 +1901,26 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
markLive<ELFT>();
demoteSharedSymbols();
mergeSections();
+
+ // Make copies of any input sections that need to be copied into each
+ // partition.
+ copySectionsIntoPartitions();
+
+ // Create synthesized sections such as .got and .plt. This is called before
+ // processSectionCommands() so that they can be placed by SECTIONS commands.
+ createSyntheticSections<ELFT>();
+
+ // Some input sections that are used for exception handling need to be moved
+ // into synthetic sections. Do that now so that they aren't assigned to
+ // output sections in the usual way.
+ if (!config->relocatable)
+ combineEhSections();
+
+ // Create output sections described by SECTIONS commands.
+ script->processSectionCommands();
+
+ // Two input sections with different output sections should not be folded.
+ // ICF runs after processSectionCommands() so that we know the output sections.
if (config->icf != ICFLevel::None) {
findKeepUniqueSections<ELFT>(args);
doIcf<ELFT>();
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 8b01d06b024..a780ee12448 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -74,6 +74,8 @@
#include "ICF.h"
#include "Config.h"
+#include "LinkerScript.h"
+#include "OutputSections.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
@@ -304,10 +306,8 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
return false;
// If two sections have different output sections, we cannot merge them.
- // FIXME: This doesn't do the right thing in the case where there is a linker
- // script. We probably need to move output section assignment before ICF to
- // get the correct behaviour here.
- if (getOutputSectionName(a) != getOutputSectionName(b))
+ if (getOutputSectionName(a) != getOutputSectionName(b) ||
+ a->getParent() != b->getParent())
return false;
if (a->areRelocsRela)
@@ -499,6 +499,15 @@ template <class ELFT> void ICF<ELFT>::run() {
isec->markDead();
}
});
+
+ // InputSectionDescription::sections is populated by processSectionCommands().
+ // ICF may fold some input sections assigned to output sections. Remove them.
+ for (BaseCommand *base : script->sectionCommands)
+ if (auto *sec = dyn_cast<OutputSection>(base))
+ for (BaseCommand *sub_base : sec->sectionCommands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(sub_base))
+ llvm::erase_if(isd->sections,
+ [](InputSection *isec) { return !isec->isLive(); });
}
// ICF entry point function.
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 2fc9b0b091d..dbf705dc7f0 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -48,24 +48,22 @@ using namespace lld::elf;
LinkerScript *elf::script;
-static uint64_t getOutputSectionVA(SectionBase *inputSec, StringRef loc) {
- if (OutputSection *os = inputSec->getOutputSection())
- return os->addr;
- error(loc + ": unable to evaluate expression: input section " +
- inputSec->name + " has no output section assigned");
- return 0;
+static uint64_t getOutputSectionVA(SectionBase *sec) {
+ OutputSection *os = sec->getOutputSection();
+ assert(os && "input section has no output section assigned");
+ return os ? os->addr : 0;
}
uint64_t ExprValue::getValue() const {
if (sec)
- return alignTo(sec->getOffset(val) + getOutputSectionVA(sec, loc),
+ return alignTo(sec->getOffset(val) + getOutputSectionVA(sec),
alignment);
return alignTo(val, alignment);
}
uint64_t ExprValue::getSecAddr() const {
if (sec)
- return sec->getOffset(0) + getOutputSectionVA(sec, loc);
+ return sec->getOffset(0) + getOutputSectionVA(sec);
return 0;
}
@@ -73,7 +71,7 @@ uint64_t ExprValue::getSectionOffset() const {
// If the alignment is trivial, we don't have to compute the full
// value to know the offset. This allows this function to succeed in
// cases where the output section is not yet known.
- if (alignment == 1 && (!sec || !sec->getOutputSection()))
+ if (alignment == 1 && !sec)
return val;
return getValue() - getSecAddr();
}
@@ -157,8 +155,8 @@ static bool shouldDefineSym(SymbolAssignment *cmd) {
return false;
}
-// This function is called from processSectionCommands,
-// while we are fixing the output section layout.
+// Called by processSymbolAssignments() to assign definitions to
+// linker-script-defined symbols.
void LinkerScript::addSymbol(SymbolAssignment *cmd) {
if (!shouldDefineSym(cmd))
return;
@@ -478,36 +476,10 @@ LinkerScript::createInputSectionList(OutputSection &outCmd) {
return ret;
}
+// Create output sections described by SECTIONS commands.
void LinkerScript::processSectionCommands() {
- // A symbol can be assigned before any section is mentioned in the linker
- // script. In an DSO, the symbol values are addresses, so the only important
- // section values are:
- // * SHN_UNDEF
- // * SHN_ABS
- // * Any value meaning a regular section.
- // To handle that, create a dummy aether section that fills the void before
- // the linker scripts switches to another section. It has an index of one
- // which will map to whatever the first actual section is.
- aether = make<OutputSection>("", 0, SHF_ALLOC);
- aether->sectionIndex = 1;
-
- // Ctx captures the local AddressState and makes it accessible deliberately.
- // This is needed as there are some cases where we cannot just
- // thread the current state through to a lambda function created by the
- // script parser.
- auto deleter = std::make_unique<AddressState>();
- ctx = deleter.get();
- ctx->outSec = aether;
-
size_t i = 0;
- // Add input sections to output sections.
for (BaseCommand *base : sectionCommands) {
- // Handle symbol assignments outside of any output section.
- if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
- addSymbol(cmd);
- continue;
- }
-
if (auto *sec = dyn_cast<OutputSection>(base)) {
std::vector<InputSection *> v = createInputSectionList(*sec);
@@ -533,12 +505,6 @@ void LinkerScript::processSectionCommands() {
continue;
}
- // A directive may contain symbol definitions like this:
- // ".foo : { ...; bar = .; }". Handle them.
- for (BaseCommand *base : sec->sectionCommands)
- if (auto *outCmd = dyn_cast<SymbolAssignment>(base))
- addSymbol(outCmd);
-
// Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
// is given, input sections are aligned to that value, whether the
// given value is larger or smaller than the original section alignment.
@@ -548,7 +514,7 @@ void LinkerScript::processSectionCommands() {
s->alignment = subalign;
}
- // Add input sections to an output section.
+ // Some input sections may be removed from the list after ICF.
for (InputSection *s : v)
sec->addSection(s);
@@ -559,6 +525,32 @@ void LinkerScript::processSectionCommands() {
sec->flags &= ~(uint64_t)SHF_ALLOC;
}
}
+}
+
+void LinkerScript::processSymbolAssignments() {
+ // Dot outside an output section still represents a relative address, whose
+ // sh_shndx should not be SHN_UNDEF or SHN_ABS. Create a dummy aether section
+ // that fills the void outside a section. It has an index of one, which is
+ // indistinguishable from any other regular section index.
+ aether = make<OutputSection>("", 0, SHF_ALLOC);
+ aether->sectionIndex = 1;
+
+ // ctx captures the local AddressState and makes it accessible deliberately.
+ // This is needed as there are some cases where we cannot just thread the
+ // current state through to a lambda function created by the script parser.
+ AddressState state;
+ ctx = &state;
+ ctx->outSec = aether;
+
+ for (BaseCommand *base : sectionCommands) {
+ if (auto *cmd = dyn_cast<SymbolAssignment>(base))
+ addSymbol(cmd);
+ else
+ for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands)
+ if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base))
+ addSymbol(cmd);
+ }
+
ctx = nullptr;
}
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 5607b313031..4ac9e2a3909 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -274,6 +274,7 @@ public:
const Defined *assignAddresses();
void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
void processSectionCommands();
+ void processSymbolAssignments();
void declareSymbols();
// Used to handle INSERT AFTER statements.
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index b9f6aee701d..5c6fcc8532c 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -152,7 +152,7 @@ static void removeEmptyPTLoad(std::vector<PhdrEntry *> &phdrs) {
});
}
-static void copySectionsIntoPartitions() {
+void elf::copySectionsIntoPartitions() {
std::vector<InputSectionBase *> newSections;
for (unsigned part = 2; part != partitions.size() + 1; ++part) {
for (InputSectionBase *s : inputSections) {
@@ -308,8 +308,7 @@ static OutputSection *findSection(StringRef name, unsigned partition = 1) {
return nullptr;
}
-// Initialize Out members.
-template <class ELFT> static void createSyntheticSections() {
+template <class ELFT> void elf::createSyntheticSections() {
// Initialize all pointers with NULL. This is needed because
// you can call lld::elf::main more than once as a library.
memset(&Out::first, 0, sizeof(Out));
@@ -535,24 +534,6 @@ template <class ELFT> static void createSyntheticSections() {
// The main function of the writer.
template <class ELFT> void Writer<ELFT>::run() {
- // Make copies of any input sections that need to be copied into each
- // partition.
- copySectionsIntoPartitions();
-
- // Create linker-synthesized sections such as .got or .plt.
- // Such sections are of type input section.
- createSyntheticSections<ELFT>();
-
- // Some input sections that are used for exception handling need to be moved
- // into synthetic sections. Do that now so that they aren't assigned to
- // output sections in the usual way.
- if (!config->relocatable)
- combineEhSections();
-
- // We want to process linker script commands. When SECTIONS command
- // is given we let it create sections.
- script->processSectionCommands();
-
// Linker scripts controls how input sections are assigned to output sections.
// Input sections that were not handled by scripts are called "orphans", and
// they are assigned to output sections by the default rule. Process that.
@@ -1737,8 +1718,14 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
symtab->forEachSymbol(
[](Symbol *s) { s->isPreemptible = computeIsPreemptible(*s); });
+ // Change values of linker-script-defined symbols from placeholders (assigned
+ // by declareSymbols) to actual definitions.
+ script->processSymbolAssignments();
+
// Scan relocations. This must be done after every symbol is declared so that
- // we can correctly decide if a dynamic relocation is needed.
+ // we can correctly decide if a dynamic relocation is needed. This is called
+ // after processSymbolAssignments() because it needs to know whether a
+ // linker-script-defined symbol is absolute.
if (!config->relocatable) {
forEachRelSec(scanRelocations<ELFT>);
reportUndefinedSymbols<ELFT>();
@@ -2731,6 +2718,11 @@ template <class ELFT> void Writer<ELFT>::writeBuildId() {
part.buildId->writeBuildId(buildId);
}
+template void elf::createSyntheticSections<ELF32LE>();
+template void elf::createSyntheticSections<ELF32BE>();
+template void elf::createSyntheticSections<ELF64LE>();
+template void elf::createSyntheticSections<ELF64BE>();
+
template void elf::writeResult<ELF32LE>();
template void elf::writeResult<ELF32BE>();
template void elf::writeResult<ELF64LE>();
diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h
index 18c0ccfa183..3698544d977 100644
--- a/lld/ELF/Writer.h
+++ b/lld/ELF/Writer.h
@@ -20,6 +20,8 @@ namespace elf {
class InputFile;
class OutputSection;
class InputSectionBase;
+void copySectionsIntoPartitions();
+template <class ELFT> void createSyntheticSections();
void combineEhSections();
template <class ELFT> void writeResult();
diff --git a/lld/test/ELF/linkerscript/early-assign-symbol.s b/lld/test/ELF/linkerscript/early-assign-symbol.s
index 5f611786366..7271204221a 100644
--- a/lld/test/ELF/linkerscript/early-assign-symbol.s
+++ b/lld/test/ELF/linkerscript/early-assign-symbol.s
@@ -1,12 +1,15 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
-# RUN: echo "SECTIONS { aaa = foo | 1; .text : { *(.text*) } }" > %t3.script
-# RUN: not ld.lld -o %t --script %t3.script %t.o 2>&1 | FileCheck %s
+## The definitions of symbol assignments may reference other symbols.
+## Test we can handle them.
-# CHECK: error: {{.*}}.script:1: unable to evaluate expression: input section .text has no output section assigned
+# RUN: echo "SECTIONS { aaa = foo | 1; .text : { *(.text*) } }" > %t3.script
+# RUN: ld.lld -o %t --script %t3.script %t.o
+# RUN: llvm-objdump -t %t | FileCheck --check-prefix=VAL1 %s
-# Simple cases that we can handle.
+# VAL1: 0000000000000000 .text 00000000 foo
+# VAL1: 0000000000000001 .text 00000000 aaa
# RUN: echo "SECTIONS { aaa = ABSOLUTE(foo - 1) + 1; .text : { *(.text*) } }" > %t.script
# RUN: ld.lld -o %t --script %t.script %t.o
diff --git a/lld/test/ELF/linkerscript/icf-output-sections.s b/lld/test/ELF/linkerscript/icf-output-sections.s
new file mode 100644
index 00000000000..f23d7fff06b
--- /dev/null
+++ b/lld/test/ELF/linkerscript/icf-output-sections.s
@@ -0,0 +1,46 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo 'SECTIONS { .text : { *(.text*) } }' > %t1.script
+
+## Sections within the same output section can be freely folded.
+# RUN: ld.lld %t.o --script %t1.script --icf=all --print-icf-sections -o %t | FileCheck --check-prefix=ICF1 %s
+# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SEC1 %s --implicit-check-not=.text
+
+# ICF1: selected section {{.*}}.o:(.text.foo0)
+# ICF1-NEXT: removing identical section {{.*}}.o:(.text.foo1)
+# ICF1-NEXT: removing identical section {{.*}}.o:(.text.bar0)
+# ICF1-NEXT: removing identical section {{.*}}.o:(.text.bar1)
+
+# SEC1: .text PROGBITS 0000000000000000 001000 000001
+
+## Sections with different output sections cannot be folded. Without the
+## linker script, .text.foo* and .text.bar* go to the same output section
+## .text and they will be folded.
+# RUN: echo 'SECTIONS { .text.foo : {*(.text.foo*)} .text.bar : {*(.text.bar*)} }' > %t2.script
+# RUN: ld.lld %t.o --script %t2.script --icf=all --print-icf-sections -o %t | FileCheck --check-prefix=ICF2 %s
+# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SEC2 %s
+
+# ICF2: selected section {{.*}}.o:(.text.foo0)
+# ICF2-NEXT: removing identical section {{.*}}.o:(.text.foo1)
+# ICF2-NEXT: selected section {{.*}}.o:(.text.bar0)
+# ICF2-NEXT: removing identical section {{.*}}.o:(.text.bar1)
+
+# SEC2: .text.foo PROGBITS 0000000000000000 001000 000001
+# SEC2-NEXT: .text.bar PROGBITS 0000000000000001 001001 000001
+
+## .text.bar* are orphans that get assigned to .text.
+# RUN: echo 'SECTIONS { .text.foo : {*(.text.foo*)} }' > %t3.script
+# RUN: ld.lld %t.o --script %t3.script --icf=all --print-icf-sections -o %t | FileCheck --check-prefix=ICF2 %s
+# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SEC3 %s
+
+# SEC3: .text.foo PROGBITS 0000000000000000 001000 000001
+# SEC3-NEXT: .text PROGBITS 0000000000000004 001004 000001
+
+.section .text.foo0,"ax"
+ret
+.section .text.foo1,"ax"
+ret
+.section .text.bar0,"ax"
+ret
+.section .text.bar1,"ax"
+ret
diff --git a/lld/test/ELF/linkerscript/subalign.s b/lld/test/ELF/linkerscript/subalign.s
index 99cb3f19a99..8d8420c0f77 100644
--- a/lld/test/ELF/linkerscript/subalign.s
+++ b/lld/test/ELF/linkerscript/subalign.s
@@ -23,11 +23,11 @@
# SUBALIGN: 03000000 00000000 04000000 00000000
## Test we do not assert or crash when dot(.) is used inside SUBALIGN.
-## ld.bfd does not allow to use dot in such expressions, our behavior is
-## different for simplicity of implementation. Value of dot is undefined.
+## Value of dot is undefined. Some versions of ld.bfd do not allow to use dot
+## in such expressions.
# RUN: echo "SECTIONS { . = 0x32; .aaa : SUBALIGN(.) { *(.aaa*) } }" > %t3.script
-# RUN: ld.lld %t1.o --script %t3.script -o %t3
-# RUN: llvm-objdump -s %t3 > /dev/null
+# RUN: not ld.lld %t1.o --script %t3.script -o /dev/null 2>&1 | FileCheck --check-prefix=ERR1 %s
+# ERR1: {{.*}}.script:1: unable to get location counter value
## Test we are able to link with zero alignment, this is consistent with bfd 2.26.1.
# RUN: echo "SECTIONS { .aaa : SUBALIGN(0) { *(.aaa*) } }" > %t4.script
@@ -36,8 +36,8 @@
## Test we fail gracefuly when alignment value is not a power of 2.
# RUN: echo "SECTIONS { .aaa : SUBALIGN(3) { *(.aaa*) } }" > %t5.script
-# RUN: not ld.lld %t1.o --script %t5.script -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
-# ERR: {{.*}}.script:1: alignment must be power of 2
+# RUN: not ld.lld %t1.o --script %t5.script -o /dev/null 2>&1 | FileCheck --check-prefix=ERR2 %s
+# ERR2: {{.*}}.script:1: alignment must be power of 2
.global _start
_start: