Skip to content

Commit 1297d89

Browse files
author
Tristan Ravitch
committed
Initial commit
0 parents  commit 1297d89

File tree

7 files changed

+440
-0
lines changed

7 files changed

+440
-0
lines changed

README.md

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
= Introduction =
2+
3+
This is a small python-based wrapper around a GCC-compatible compiler
4+
to make it easy to build whole-program (or whole-library) LLVM bitcode
5+
files. The idea is that it first invokes the compiler as normal to
6+
build a real object file. It then invokes a bitcode compiler to
7+
generate the corresponding bitcode, recording the location of the
8+
bitcode file in an ELF section of the actual object file.
9+
10+
When object files are linked together, the contents of non-special ELF
11+
sections are just concatenated (so we don't lose the locations of any
12+
of the constituent bitcode files). This package contains an extra
13+
utility, extract-bc, to read the contents of this ELF section and link
14+
all of the bitcode into a single whole-program bitcode file.
15+
16+
This two-phase build process is slower and more elaborate than normal,
17+
but in practice is necessary to be a drop-in replacement for gcc in
18+
any build system. Approaches using the LTO framework in gcc and the
19+
gold linker plugin work for many cases, but fail in the presence of
20+
static libraries in builds. This approach has the distinct advantage
21+
of generating working binaries, in case some part of a build process
22+
actually requires that.
23+
24+
Currently, this package only works using the dragonegg plugin and gcc
25+
4.5 (with the required patch for dragonegg). Support can be extended
26+
to clang (and the legacy llvm-gcc) if absolutely necessary.
27+
28+
= Usage =
29+
30+
There are three environment variables that must be set to use this
31+
wrapper script:
32+
33+
* LLVM_COMPILER should be set to 'dragonegg' (clang will be supported eventually).
34+
* LLVM_GCC_PREFIX should be set to the prefix for the version of gcc that should
35+
be used with dragonegg. This can be empty if there is no prefix.
36+
* LLVM_DRAGONEGG_PLUGIN should be the full path to the dragonegg plugin.
37+
38+
Once the environment is set up, just use wllvm and wllvm++ as your C
39+
and C++ compilers, respectively.
40+
41+
= Example =
42+
43+
export LLVM_COMPILER=dragonegg
44+
export LLVM_GCC_PREFIX=llvm-
45+
export LLVM_DRAGONEGG_PLUGIN=/unsup/llvm-2.9/lib/dragonegg.so
46+
47+
tar xf pkg-config-0.26.tar.gz
48+
cd pkg-config-0.26
49+
CC=wllvm ./configure
50+
make
51+
52+
extract-bc pkg-config # Produces pkg-config.bc
53+

driver/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

driver/as

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/usr/bin/env python
2+
#
3+
# This is the assembler phase. This variant is only invoked during
4+
# the second compilation where we are building bitcode. The compiler
5+
# has already been instructed to generate LLVM IR; the compiler then
6+
# tries to assemble it into an object file. The standard assembler
7+
# doesn't understand LLVM bitcode, so we interpose and use the llvm-as
8+
# command to build a bitcode file. We leave the bitcode in place, but
9+
# record its full absolute path in the corresponding object file
10+
# (which was created in the first compilation phase by the real
11+
# compiler). We'll link this together at a later stage.
12+
13+
import os
14+
import subprocess, sys
15+
from utils import *
16+
import tempfile
17+
18+
19+
class BCFilter(ArgumentListFilter):
20+
def __init__(self, arglist):
21+
self.bcName = None
22+
localCallbacks = { '-o' : (1, BCFilter.outFileCallback) }
23+
super(BCFilter, self).__init__(arglist, exactMatches=localCallbacks)
24+
25+
def outFileCallback(self, flag, name):
26+
self.outFileName = name
27+
28+
argFilter = BCFilter(sys.argv[1:])
29+
# Since this is just the assembler, there should only ever be one file
30+
[infile] = argFilter.inputFiles
31+
32+
# Now compile this llvm assembly file into a bitcode file. The output
33+
# filename is the same as the object with a .bc appended
34+
bcname = '.{0}.bc'.format(argFilter.outFileName)
35+
fakeAssembler = ['llvm-as', infile, '-o', bcname]
36+
asmProc = subprocess.Popen(fakeAssembler)
37+
realRet = asmProc.wait()
38+
39+
if realRet != 0:
40+
sys.exit(realRet)
41+
42+
# Now just build a temporary text file with the full path to the
43+
# bitcode file that we'll write into the object file.
44+
f = tempfile.NamedTemporaryFile(mode='rw+b', delete=False)
45+
f.write(os.path.abspath(bcname))
46+
f.write('\n')
47+
# Ensure buffers are flushed so that objcopy doesn't read an empty
48+
# file
49+
f.flush()
50+
os.fsync(f.fileno())
51+
f.close()
52+
53+
# Now write our .llvm_bc section
54+
objcopyCmd = ['objcopy', '-v', '--add-section', '.llvm_bc={0}'.format(f.name)]
55+
objcopyCmd.append(argFilter.outFileName)
56+
57+
orc = 0
58+
59+
try:
60+
if os.path.getsize(argFilter.outFileName) > 0:
61+
objProc = subprocess.Popen(objcopyCmd)
62+
orc = objProc.wait()
63+
except:
64+
# configure loves to immediately delete things, causing issues for
65+
# us here. Just ignore it
66+
os.remove(f.name)
67+
sys.exit(0)
68+
69+
os.remove(f.name)
70+
71+
if orc != 0:
72+
print('objcopy failed with {0}'.format(orc))
73+
sys.exit(-1)
74+
75+
sys.exit(realRet)
76+
77+
78+
79+

driver/utils.py

+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
from subprocess import *
2+
import collections
3+
import errno
4+
import os
5+
import re
6+
import sys
7+
8+
fullSelfPath = os.path.realpath(__file__)
9+
prefix = os.path.dirname(fullSelfPath)
10+
driverDir = prefix
11+
12+
# This class applies filters to GCC argument lists. It has a few
13+
# default arguments that it records, but does not modify the argument
14+
# list at all. It can be subclassed to change this behavior.
15+
#
16+
# The idea is that all flags accepting a parameter must be specified
17+
# so that they know to consume an extra token from the input stream.
18+
# Flags and arguments can be recorded in any way desired by providing
19+
# a callback. Each callback/flag has an arity specified - zero arity
20+
# flags (such as -v) are provided to their callback as-is. Higher
21+
# arities remove the appropriate number of arguments from the list and
22+
# pass them to the callback with the flag.
23+
#
24+
# Most flags can be handled with a simple lookup in a table - these
25+
# are exact matches. Other flags are more complex and can be
26+
# recognized by regular expressions. All regular expressions must be
27+
# tried, obviously. The first one that matches is taken, and no order
28+
# is specified. Try to avoid overlapping patterns.
29+
class ArgumentListFilter(object):
30+
def __init__(self, inputList, exactMatches={}, patternMatches={}):
31+
defaultArgExactMatches = {
32+
'-o' : (1, ArgumentListFilter.outputFileCallback),
33+
'-E' : (0, ArgumentListFilter.preprocessOnlyCallback),
34+
'--verbose' : (0, ArgumentListFilter.verboseFlagCallback),
35+
'--param' : (1, ArgumentListFilter.defaultOneArgument),
36+
'-aux-info' : (1, ArgumentListFilter.defaultOneArgument),
37+
# Preprocessor assertion
38+
'-A' : (1, ArgumentListFilter.defaultOneArgument),
39+
'-D' : (1, ArgumentListFilter.defaultOneArgument),
40+
'-U' : (1, ArgumentListFilter.defaultOneArgument),
41+
# Dependency generation
42+
'-MT' : (1, ArgumentListFilter.defaultOneArgument),
43+
'-MQ' : (1, ArgumentListFilter.defaultOneArgument),
44+
'-MF' : (1, ArgumentListFilter.defaultOneArgument),
45+
'-MD' : (1, ArgumentListFilter.defaultOneArgument),
46+
'-MMD' : (1, ArgumentListFilter.defaultOneArgument),
47+
# Include
48+
'-I' : (1, ArgumentListFilter.defaultOneArgument),
49+
'-idirafter' : (1, ArgumentListFilter.defaultOneArgument),
50+
'-include' : (1, ArgumentListFilter.defaultOneArgument),
51+
'-imacros' : (1, ArgumentListFilter.defaultOneArgument),
52+
'-iprefix' : (1, ArgumentListFilter.defaultOneArgument),
53+
'-iwithprefix' : (1, ArgumentListFilter.defaultOneArgument),
54+
'-iwithprefixbefore' : (1, ArgumentListFilter.defaultOneArgument),
55+
'-isystem' : (1, ArgumentListFilter.defaultOneArgument),
56+
'-isysroot' : (1, ArgumentListFilter.defaultOneArgument),
57+
'-iquote' : (1, ArgumentListFilter.defaultOneArgument),
58+
'-imultilib' : (1, ArgumentListFilter.defaultOneArgument),
59+
# Language
60+
'-x' : (1, ArgumentListFilter.defaultOneArgument),
61+
# Component-specifiers
62+
'-Xpreprocessor' : (1, ArgumentListFilter.defaultOneArgument),
63+
'-Xassembler' : (1, ArgumentListFilter.defaultOneArgument),
64+
'-Xlinker' : (1, ArgumentListFilter.defaultOneArgument),
65+
# Linker
66+
'-l' : (1, ArgumentListFilter.defaultOneArgument),
67+
'-L' : (1, ArgumentListFilter.defaultOneArgument),
68+
'-T' : (1, ArgumentListFilter.defaultOneArgument),
69+
'-u' : (1, ArgumentListFilter.defaultOneArgument),
70+
}
71+
72+
# The default pattern only recognizes input filenames. Flags can also
73+
# be recognized here.
74+
defaultArgPatterns = {
75+
r'^.+\.(c|cc|cpp|C|cxx|i|s)$' : (0, ArgumentListFilter.inputFileCallback),
76+
}
77+
78+
self.filteredArgs = []
79+
self.inputFiles = []
80+
self.outputFilename = None
81+
self.isVerbose = False
82+
self.isPreprocessOnly = False
83+
84+
argExactMatches = dict(defaultArgExactMatches)
85+
argExactMatches.update(exactMatches)
86+
argPatterns = dict(defaultArgPatterns)
87+
argPatterns.update(patternMatches)
88+
89+
self._inputArgs = collections.deque(inputList)
90+
while len(self._inputArgs) > 0:
91+
# Get the next argument
92+
currentItem = self._inputArgs.popleft()
93+
# First, see if this exact flag has a handler in the table.
94+
# This is a cheap test. Otherwise, see if the input matches
95+
# some pattern with a handler that we recognize
96+
if currentItem in argExactMatches:
97+
(arity, handler) = argExactMatches[currentItem]
98+
flagArgs = self._shiftArgs(arity)
99+
handler(self, currentItem, *flagArgs)
100+
else:
101+
matched = False
102+
for pattern, (arity, handler) in argPatterns.iteritems():
103+
if re.match(pattern, currentItem):
104+
flagArgs = self._shiftArgs(arity)
105+
handler(self, currentItem, *flagArgs)
106+
matched = True
107+
break
108+
# If no action has been specified, this is a zero-argument
109+
# flag that we should just keep.
110+
if not matched:
111+
self.keepArgument(currentItem)
112+
113+
def _shiftArgs(self, nargs):
114+
ret = []
115+
while nargs > 0:
116+
a = self._inputArgs.popleft()
117+
ret.append(a)
118+
nargs = nargs - 1
119+
return ret
120+
121+
def keepArgument(self, arg):
122+
self.filteredArgs.append(arg)
123+
124+
def outputFileCallback(self, flag, filename):
125+
self.outputFilename = filename
126+
self.keepArgument(flag)
127+
self.keepArgument(filename)
128+
129+
def preprocessOnlyCallback(self, flag):
130+
self.isPreprocessOnly = True
131+
self.keepArgument(flag)
132+
133+
def verboseFlagCallback(self, flag):
134+
self.isVerbose = True
135+
136+
def inputFileCallback(self, infile):
137+
self.inputFiles.append(infile)
138+
self.keepArgument(infile)
139+
140+
def defaultOneArgument(self, flag, arg):
141+
self.keepArgument(flag)
142+
self.keepArgument(arg)
143+
144+
def defaultNoArgument(self, flag):
145+
self.keepArgument(flag)
146+
147+
def getCompiler(isCxx):
148+
cstring = os.getenv('LLVM_COMPILER')
149+
pfx = ''
150+
if os.getenv('LLVM_GCC_PREFIX') is not None:
151+
pfx = os.getenv('LLVM_GCC_PREFIX')
152+
153+
if cstring == 'clang' and isCxx:
154+
return ['clang++']
155+
elif cstring == 'clang' and not isCxx:
156+
return ['clang']
157+
elif cstring == 'dragonegg' and isCxx:
158+
return ['{0}g++'.format(pfx)]
159+
elif cstring == 'dragonegg' and not isCxx:
160+
return ['{0}gcc'.format(pfx)]
161+
162+
print('Error: invalid LLVM_COMPILER: {0}'.format(cstring))
163+
sys.exit(-1)
164+
165+
def getBitcodeCompiler(isCxx):
166+
cc = getCompiler(isCxx)
167+
cstring = os.getenv('LLVM_COMPILER')
168+
if cstring == 'clang':
169+
return cc + ['-emit-llvm']
170+
elif cstring == 'dragonegg':
171+
pth = os.getenv('LLVM_DRAGONEGG_PLUGIN')
172+
# Pass -B here so that, when gcc calls as to assemble the
173+
# output, it invokes llvm-as instead of the system assembler
174+
# (which does not understand llvm assembly)
175+
return cc + [ '-B', driverDir, # '-specs=llvm-specs',
176+
'-fplugin={0}'.format(pth), '-fplugin-arg-dragonegg-emit-ir']
177+
178+
print('Error: invalid LLVM_COMPILER: {0}'.format(cstring))
179+
180+
181+
def buildObject(cmd, isCxx):
182+
objCompiler = getCompiler(isCxx)
183+
objCompiler.extend(cmd)
184+
proc = Popen(objCompiler)
185+
rc = proc.wait()
186+
if rc != 0:
187+
sys.exit(rc)
188+
189+
# This command does not have the executable with it
190+
def buildAndAttachBitcode(cmd, isCxx):
191+
af = ArgumentListFilter(cmd)
192+
if len(af.inputFiles) == 0:
193+
return
194+
bcc = getBitcodeCompiler(isCxx)
195+
bcc.extend(cmd)
196+
bcc.append('-c')
197+
proc = Popen(bcc)
198+
# FIXME: if clang, attach bitcode (dragonegg does it in as)
199+
sys.exit(proc.wait())
200+
201+

0 commit comments

Comments
 (0)