Skip to content

Commit a4bd043

Browse files
MaskRaylravenclaw
authored andcommitted
[ELF] Make start/stop symbols retain associated discardable output sections
An empty output section specified in the `SECTIONS` command (e.g. `empty : { *(empty) }`) may be discarded. Due to phase ordering, we might define `__start_empty`/`__stop_empty` symbols with incorrect section indexes (usually benign, but could go out of bounds and cause `readelf -s` to print `BAD`). ``` finalizeSections addStartStopSymbols // __start_empty is defined // __start_empty is added to .symtab sortSections adjustOutputSections // `empty` is discarded writeSections // __start_empty is Defined with an invalid section index ``` Loaders use `st_value` members of the start/stop symbols and expect no "undefined symbol" linker error, but do not particularly care whether the symbols are defined or undefined. Let's retain the associated empty output section so that start/stop symbols will have correct section indexes. The approach allows us to remove `LinkerScript::isDiscarded` (https://reviews.llvm.org/D114179). Also delete the `findSection(".text")` special case from https://reviews.llvm.org/D46200, which is unnecessary even before this patch (`elfHeader` would be fine even with very large executables). Note: we should be careful not to unnecessarily retain .ARM.exidx, which would create an empty PT_ARM_EXIDX. ~40 tests would need to be updated. --- An alternative is to discard the empty output section and keep the start/stop symbols undefined. This approach needs more code and requires `LinkerScript::isDiscarded` before we discard empty sections in ``adjustOutputSections`. Pull Request: llvm#96343
1 parent abe0e2a commit a4bd043

7 files changed

+78
-54
lines changed

lld/ELF/LinkerScript.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -1184,11 +1184,6 @@ static bool isDiscardable(const OutputSection &sec) {
11841184
return true;
11851185
}
11861186

1187-
bool LinkerScript::isDiscarded(const OutputSection *sec) const {
1188-
return hasSectionsCommand && (getFirstInputSection(sec) == nullptr) &&
1189-
isDiscardable(*sec);
1190-
}
1191-
11921187
static void maybePropagatePhdrs(OutputSection &sec,
11931188
SmallVector<StringRef, 0> &phdrs) {
11941189
if (sec.phdrs.empty()) {

lld/ELF/LinkerScript.h

-2
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,6 @@ class LinkerScript final {
342342
void processSymbolAssignments();
343343
void declareSymbols();
344344

345-
bool isDiscarded(const OutputSection *sec) const;
346-
347345
// Used to handle INSERT AFTER statements.
348346
void processInsertCommands();
349347

lld/ELF/Writer.cpp

+21-29
Original file line numberDiff line numberDiff line change
@@ -2064,40 +2064,30 @@ template <class ELFT> void Writer<ELFT>::checkExecuteOnly() {
20642064
// The linker is expected to define SECNAME_start and SECNAME_end
20652065
// symbols for a few sections. This function defines them.
20662066
template <class ELFT> void Writer<ELFT>::addStartEndSymbols() {
2067-
// If a section does not exist, there's ambiguity as to how we
2068-
// define _start and _end symbols for an init/fini section. Since
2069-
// the loader assume that the symbols are always defined, we need to
2070-
// always define them. But what value? The loader iterates over all
2071-
// pointers between _start and _end to run global ctors/dtors, so if
2072-
// the section is empty, their symbol values don't actually matter
2073-
// as long as _start and _end point to the same location.
2074-
//
2075-
// That said, we don't want to set the symbols to 0 (which is
2076-
// probably the simplest value) because that could cause some
2077-
// program to fail to link due to relocation overflow, if their
2078-
// program text is above 2 GiB. We use the address of the .text
2079-
// section instead to prevent that failure.
2080-
//
2081-
// In rare situations, the .text section may not exist. If that's the
2082-
// case, use the image base address as a last resort.
2083-
OutputSection *Default = findSection(".text");
2084-
if (!Default)
2085-
Default = Out::elfHeader;
2086-
2067+
// If the associated output section does not exist, there is ambiguity as to
2068+
// how we define _start and _end symbols for an init/fini section. Users
2069+
// expect no "undefined symbol" linker errors and loaders expect equal
2070+
// st_value but do not particularly care whether the symbols are defined or
2071+
// not. We retain the output section so that the section indexes will be
2072+
// correct.
20872073
auto define = [=](StringRef start, StringRef end, OutputSection *os) {
2088-
if (os && !script->isDiscarded(os)) {
2089-
addOptionalRegular(start, os, 0);
2090-
addOptionalRegular(end, os, -1);
2074+
if (os) {
2075+
Defined *startSym = addOptionalRegular(start, os, 0);
2076+
Defined *stopSym = addOptionalRegular(end, os, -1);
2077+
if (startSym || stopSym)
2078+
os->usedInExpression = true;
20912079
} else {
2092-
addOptionalRegular(start, Default, 0);
2093-
addOptionalRegular(end, Default, 0);
2080+
addOptionalRegular(start, Out::elfHeader, 0);
2081+
addOptionalRegular(end, Out::elfHeader, 0);
20942082
}
20952083
};
20962084

20972085
define("__preinit_array_start", "__preinit_array_end", Out::preinitArray);
20982086
define("__init_array_start", "__init_array_end", Out::initArray);
20992087
define("__fini_array_start", "__fini_array_end", Out::finiArray);
21002088

2089+
// As a special case, don't unnecessarily retain .ARM.exidx, which would
2090+
// create an empty PT_ARM_EXIDX.
21012091
if (OutputSection *sec = findSection(".ARM.exidx"))
21022092
define("__exidx_start", "__exidx_end", sec);
21032093
}
@@ -2112,10 +2102,12 @@ void Writer<ELFT>::addStartStopSymbols(OutputSection &osec) {
21122102
StringRef s = osec.name;
21132103
if (!isValidCIdentifier(s))
21142104
return;
2115-
addOptionalRegular(saver().save("__start_" + s), &osec, 0,
2116-
config->zStartStopVisibility);
2117-
addOptionalRegular(saver().save("__stop_" + s), &osec, -1,
2118-
config->zStartStopVisibility);
2105+
Defined *startSym = addOptionalRegular(saver().save("__start_" + s), &osec, 0,
2106+
config->zStartStopVisibility);
2107+
Defined *stopSym = addOptionalRegular(saver().save("__stop_" + s), &osec, -1,
2108+
config->zStartStopVisibility);
2109+
if (startSym || stopSym)
2110+
osec.usedInExpression = true;
21192111
}
21202112

21212113
static bool needsPtLoad(OutputSection *sec) {

lld/test/ELF/linkerscript/preinit-array-empty.test renamed to lld/test/ELF/linkerscript/empty-preinit-array-start-stop.test

+7-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/t.s -o %t.o
44

55
## PR52534: https://bugs.llvm.org/show_bug.cgi?id=52534
6-
## Check case where .preinit_array is discarded.
6+
## Check case where .preinit_array would be discarded in the absence of the
7+
## start/stop symbols.
78
## Link should succeed without causing an out of range relocation error.
89
# RUN: ld.lld -T %t/discarded.script %t.o -o %t1 --image-base=0x80000000
910
# RUN: llvm-readelf -s %t1 | FileCheck --check-prefixes=CHECK,DISCARDED %s
@@ -15,7 +16,7 @@
1516
# CHECK: [[#%x,ADDR:]] 0 NOTYPE LOCAL HIDDEN [[#]] __preinit_array_start
1617
# CHECK-NEXT: [[#ADDR]] 0 NOTYPE LOCAL HIDDEN [[#]] __preinit_array_end
1718

18-
# DISCARDED-NEXT: [[#ADDR]] 0 NOTYPE GLOBAL DEFAULT [[#]] _start
19+
# DISCARDED-NEXT: {{0*}}[[#ADDR-14]] 0 NOTYPE GLOBAL DEFAULT [[#]] _start
1920

2021
# EMPTY-NOT: [[#ADDR]] 0 NOTYPE GLOBAL DEFAULT [[#]] _start
2122
# EMPTY: [[#ADDR]] 0 NOTYPE GLOBAL DEFAULT [[#]] ADDR
@@ -26,8 +27,12 @@ _start:
2627
movq __preinit_array_start@GOTPCREL(%rip),%rax
2728
movq __preinit_array_end@GOTPCREL(%rip),%rax
2829

30+
.section .rodata,"a"
31+
.byte 0
32+
2933
#--- discarded.script
3034
SECTIONS {
35+
.rodata : { *(.rodata); }
3136
.text : { *(.text); }
3237
.preinit_array : { *(.preinit_array); }
3338
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# REQUIRES: x86
2+
## __start/__stop symbols retain the associated empty sections with C identifier names.
3+
4+
# RUN: rm -rf %t && split-file %s %t
5+
# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/test.s -o %t.o
6+
# RUN: ld.lld -T %t/ldscript -o %t.out %t.o -z start-stop-visibility=default
7+
# RUN: llvm-objdump -h -t %t.out | FileCheck %s
8+
9+
# CHECK: .text
10+
# CHECK-NEXT: empty1
11+
# CHECK-NEXT: empty2
12+
# CHECK-NEXT: empty3
13+
14+
# CHECK: [[#%x,ADDR:]] l empty1 0000000000000000 .hidden __start_empty1
15+
# CHECK-NEXT: {{0*}}[[#ADDR]] g empty2 0000000000000000 .protected __stop_empty2
16+
# CHECK-NEXT: {{0*}}[[#ADDR]] g empty3 0000000000000000 __stop_empty3
17+
18+
#--- ldscript
19+
SECTIONS {
20+
.text : { *(.text .text.*) }
21+
empty0 : { *(empty0) }
22+
empty1 : { *(empty1) }
23+
empty2 : { *(empty2) }
24+
empty3 : { *(empty3) }
25+
}
26+
27+
#--- test.s
28+
.weak __start_empty1, __stop_empty2, __stop_empty3
29+
.hidden __start_empty1
30+
.protected __stop_empty2
31+
32+
.globl _start
33+
_start:
34+
movq __start_empty1@GOTPCREL(%rip),%rax
35+
movq __stop_empty2@GOTPCREL(%rip),%rax
36+
movq __stop_empty3@GOTPCREL(%rip),%rax

lld/test/ELF/linkerscript/sections-gc2.s

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# CHECK: Idx Name Size VMA Type
1212
# CHECK-NEXT: 0
1313
# CHECK-NEXT: used_in_reloc
14+
# CHECK-NEXT: used_in_script
1415
# CHECK-NEXT: .text
1516
# CHECK-NEXT: .comment
1617
# CHECK-NEXT: .symtab

lld/test/ELF/pre_init_fini_array_missing.s

+13-16
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,26 @@ _start:
1414
call __fini_array_start
1515
call __fini_array_end
1616

17-
// With no .init_array section the symbols resolve to .text.
18-
// 0x201120 - (0x201120 + 5) = -5
19-
// 0x201120 - (0x201125 + 5) = -10
20-
// ...
17+
/// Due to __init_array_start/__init_array_end, .init_array is retained.
2118

2219
// CHECK: Disassembly of section .text:
2320
// CHECK-EMPTY:
2421
// CHECK-NEXT: <_start>:
25-
// CHECK-NEXT: 201120: callq 0x201120
26-
// CHECK-NEXT: callq 0x201120
27-
// CHECK-NEXT: callq 0x201120
28-
// CHECK-NEXT: callq 0x201120
29-
// CHECK-NEXT: callq 0x201120
30-
// CHECK-NEXT: callq 0x201120
22+
// CHECK-NEXT: 201120: callq 0x200000
23+
// CHECK-NEXT: callq 0x200000
24+
// CHECK-NEXT: callq 0x200000
25+
// CHECK-NEXT: callq 0x200000
26+
// CHECK-NEXT: callq 0x200000
27+
// CHECK-NEXT: callq 0x200000
3128

3229
// In position-independent binaries, they resolve to .text too.
3330

3431
// PIE: Disassembly of section .text:
3532
// PIE-EMPTY:
3633
// PIE-NEXT: <_start>:
37-
// PIE-NEXT: 1210: callq 0x1210
38-
// PIE-NEXT: callq 0x1210
39-
// PIE-NEXT: callq 0x1210
40-
// PIE-NEXT: callq 0x1210
41-
// PIE-NEXT: callq 0x1210
42-
// PIE-NEXT: callq 0x1210
34+
// PIE-NEXT: 1210: callq 0x0
35+
// PIE-NEXT: callq 0x0
36+
// PIE-NEXT: callq 0x0
37+
// PIE-NEXT: callq 0x0
38+
// PIE-NEXT: callq 0x0
39+
// PIE-NEXT: callq 0x0

0 commit comments

Comments
 (0)