Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Be sure to have jemalloc installed, as it is used to improve llvm-bolt's memory

To build the toolchain, follow these steps:

Clone the repository: `git clone <https://github.com/ptr1337/llvm-bolt-scripts.git>`
Clone the repository: `git clone --branch=release/18.x --depth=1 <https://github.com/ptr1337/llvm-bolt-scripts.git>`
Navigate to the repository directory: `cd llvm-bolt-scripts`
Run the full workflow script: `./full_workflow.bash`
This process should give you a faster LLVM toolchain. You can experiment with different technologies (e.g. ThinLTO vs FullLTO) and measure the performance gains to determine if it is worth the effort.
Expand Down
61 changes: 28 additions & 33 deletions bolt-anything.bash
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
STAGE=

## File or binary you want to instrument and then bolt
: ${BINARY:=libLLVM-14.so}
: "${BINARY:=libLLVM-14.so}"

## PATH to the target
BINARYPATH=/usr/lib
Expand All @@ -18,10 +18,10 @@ BOLTPATH=~/toolchain/llvm/llvm-bolt/bin
## BASEDIR for data
TOPLEV=~/toolchain/bolt

## Here can be the optimized binarys, merged fdata and your original binary/file as backup
## Here can be the optimized binaries, merged fdata and your original binary/file as backup
BOLTBIN=${TOPLEV}/bin

## PATH FOR INTRUMENTED DATA
## PATH FOR INSTRUMENTED DATA
## Use a own PATH for it since it creates alot of files
FDATA=${TOPLEV}/fdata

Expand All @@ -39,59 +39,54 @@ create_path() {
}

instrument() {

echo "Instrument binary with llvm-bolt"
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \
--instrument \
--instrumentation-file-append-pid \
--instrumentation-file=${FDATA}/${BINARY}.fdata \
${BINARYPATH}/${BINARY} \
-o ${BOLTBIN}/${BINARY} || (echo "Could not create instrumented binary"; exit 1)
--instrumentation-file="${FDATA}/${BINARY}.fdata" \
"${BINARYPATH}/${BINARY}" \
-o "${BOLTBIN}/${BINARY}" || (echo "Could not create instrumented binary"; exit 1)
## Backup original file
sudo cp ${BINARYPATH}/${BINARY} ${BOLTBIN}/${BINARY}.org
sudo cp ${BINARYPATH}/${BINARY} ${BINARYPATH}/${BINARY}.org
sudo cp "${BINARYPATH}/${BINARY}" "${BOLTBIN}/${BINARY}.org"
sudo cp "${BINARYPATH}/${BINARY}" "${BINARYPATH}/${BINARY}.org"
## Move instrumented and replace the original one with it for gathering easier a profile
sudo cp ${BOLTBIN}/${BINARY} ${BINARYPATH}/${BINARY}
sudo cp "${BOLTBIN}/${BINARY}" "${BINARYPATH}/${BINARY}"
}

merge_fdata() {

echo "Merging generated profiles"
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/merge-fdata ${FDATA}/${BINARY}*.fdata > ${BOLTBIN}/${BINARY}-combined.fdata || (echo "Could not merge fdate"; exit 1)
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/merge-fdata "${FDATA}/${BINARY}"*.fdata > "${BOLTBIN}/${BINARY}-combined.fdata" || (echo "Could not merge fdata"; exit 1)
## Removing not needed bloated fdata
rm -rf ${FDATA}/${BINARY}*.fdata
rm -rf "${FDATA}/${BINARY}"*.fdata
}

optimize() {

echo "Optimizing binary with generated profile"
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${BOLTBIN}/${BINARY}.org \
--data ${BOLTBIN}/${BINARY}-combined.fdata \
-o ${BOLTBIN}/${BINARY}.bolt \
-reorder-blocks=ext-tsp
-reorder-functions=cdsort
-split-functions
-split-all-cold
-split-eh
-dyno-stats
-icf=1
-use-gnu-stack
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt "${BOLTBIN}/${BINARY}.org" \
--data "${BOLTBIN}/${BINARY}-combined.fdata" \
-o "${BOLTBIN}/${BINARY}.bolt" \
-reorder-blocks=ext-tsp \
-reorder-functions=cdsort \
-split-functions \
-split-all-cold \
-split-eh \
-dyno-stats \
-icf=1 \
-use-gnu-stack \
-plt=hot || (echo "Could not optimize the binary"; exit 1)
}

move_binary() {

echo "You can find now your optimzed binary at ${BOLTBIN}"
sudo rm -rf ${FDATA}/${BINARY}.fdata*
sudo cp ${BOLTBIN}/${BINARY}.bolt ${BINARYPATH}/${BINARY}
echo "You can find now your optimized binary at ${BOLTBIN}"
sudo rm -rf "${FDATA}/${BINARY}.fdata"*
sudo cp "${BOLTBIN}/${BINARY}.bolt" "${BINARYPATH}/${BINARY}"
}

build_llvm_bolt () {

TOPLEV=~/toolchain/llvm
mkdir -p ${TOPLEV}
cd ${TOPLEV} || (echo "Could not enter ${TOPLEV} directory"; exit 1)
git clone --depth=1 https://github.com/llvm/llvm-project.git
[ -d llvm-project ] || git clone --branch=release/18.x --depth=1 https://github.com/llvm/llvm-project.git

mkdir -p stage1 || (echo "Could not create stage1 directory"; exit 1)
cd stage1 || (echo "Could not enter stage 1 directory"; exit 1)
Expand All @@ -115,13 +110,13 @@ build_llvm_bolt () {
-DLLVM_ENABLE_PROJECTS="clang;lld;bolt;compiler-rt" \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DCMAKE_EXE_LINKER_FLAGS="-Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl" \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_BUILD_UTILS=OFF \
-DLLVM_ENABLE_BACKTRACES=OFF \
-DLLVM_ENABLE_WARNINGS=OFF \
-DLLVM_INCLUDE_TESTS=OFF \
-DLLVM_ENABLE_TERMINFO=OFF \
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/llvm-bolt || (echo "Could not configure project!"; exit 1)
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/llvm-bolt \
|| (echo "Could not configure project!"; exit 1)

echo "== Start Build"
ninja install || (echo "Could not build project!"; exit 1)
Expand Down
38 changes: 16 additions & 22 deletions bolt-gcc.bash
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@ PERFDATA=/home/foo/perf.data
## Stage 2 there we use llvm-bolt top optimize the binary
STAGE=


mkdir -p ${DATA}/cc1
mkdir -p ${DATA}/cc1plus



if [ ${STAGE} = 1 ]; then
if [ "${STAGE}" = 1 ]; then
echo "Instrument clang with llvm-bolt"

LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \
Expand All @@ -41,24 +38,22 @@ if [ ${STAGE} = 1 ]; then
--instrumentation-file=${DATA}/cc1plus/cc1plus.fdata \
${GCCPATH}/cc1plus \
-o ${DATA}/cc1plus/cc1plus
#echo "mooving instrumented binary"
#echo "moving instrumented binary"
sudo mv ${GCCPATH}/cc1 ${GCCPATH}/cc1.org
sudo mv ${DATA}/cc1/cc1 ${GCCPATH}/cc1
#echo "mooving instrumented binary"
#echo "moving instrumented binary"
sudo mv ${GCCPATH}/cc1plus ${GCCPATH}/cc1plus.org
sudo mv ${DATA}/cc1plus/cc1plus ${GCCPATH}/cc1plus

echo "Now move the binarys to the gcc path"
echo "Now move the binaries to the gcc path"
echo "now do some instrument compiles for example compiling a kernel or GCC"
fi

if [ ${STAGE} = 2 ]; then
if [ "${STAGE}" = 2 ]; then
echo "Instrument clang with llvm-bolt"

## Check if perf is available
perf record -e cycles:u -j any,u -- sleep 1 &>/dev/null;

if [[ $? == "0" ]]; then
if perf record -e cycles:u -j any,u -- sleep 1 &>/dev/null; then
echo "BOLTING with Profile!"

LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${GCCPATH}/cc1.org \
Expand All @@ -70,7 +65,7 @@ if [ ${STAGE} = 2 ]; then
-o ${DATA}/cc1plus.fdata || (echo "Could not convert perf-data to bolt for gcc"; exit 1)

echo "Optimizing cc1 with the generated profile"
cd ${TOPLEV}
cd ${TOPLEV} || exit 1
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \
--data ${DATA}/cc1.fdata \
-o ${TOPLEV}/cc1 \
Expand All @@ -84,7 +79,7 @@ if [ ${STAGE} = 2 ]; then
-use-gnu-stack \
-plt=hot || (echo "Could not optimize binary for cc1"; exit 1)

cd ${TOPLEV}
cd ${TOPLEV} || exit 1
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \
--data ${DATA}/cc1plus.fdata \
-o ${TOPLEV}/cc1plus \
Expand All @@ -99,13 +94,13 @@ if [ ${STAGE} = 2 ]; then
-plt=hot || (echo "Could not optimize binary for cc1plus"; exit 1)
else
echo "Merging generated profiles"
cd ${DATA}/cc1
${BOLTPATH}/merge-fdata *.fdata > cc1-combined.fdata
cd ${DATA}/cc1plus
${BOLTPATH}/merge-fdata *.fdata > cc1plus-combined.fdata
cd ${DATA}/cc1 || exit 1
${BOLTPATH}/merge-fdata ./*.fdata > cc1-combined.fdata
cd ${DATA}/cc1plus || exit 1
${BOLTPATH}/merge-fdata ./*.fdata > cc1plus-combined.fdata

echo "Optimizing cc1 with the generated profile"
cd ${TOPLEV}
cd ${TOPLEV} || exit 1
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \
--data ${DATA}/cc1/cc1-combined.fdata \
-o ${TOPLEV}/cc1 \
Expand All @@ -119,7 +114,7 @@ if [ ${STAGE} = 2 ]; then
-use-gnu-stack \
-plt=hot || (echo "Could not optimize binary for cc1"; exit 1)

cd ${TOPLEV}
cd ${TOPLEV} || exit 1
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \
--data ${DATA}/cc1plus/cc1plus-combined.fdata \
-o ${TOPLEV}/cc1plus \
Expand All @@ -134,10 +129,9 @@ if [ ${STAGE} = 2 ]; then
-plt=hot || (echo "Could not optimize binary for cc1plus"; exit 1)


echo "mooving bolted binary"
echo "moving bolted binary"
sudo mv ${TOPLEV}/cc1plus ${GCCPATH}/cc1plus
sudo mv ${TOPLEV}/cc1 ${GCCPATH}/cc1
echo "Now you can move the bolted binarys to your ${GCCPATH}"
echo "Now you can move the bolted binaries to your ${GCCPATH}"
fi

fi
6 changes: 3 additions & 3 deletions build_stage1.bash
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
TOPLEV=~/toolchain/llvm
mkdir -p ${TOPLEV}
cd ${TOPLEV} || (echo "Could not enter ${TOPLEV} directory"; exit 1)
git clone https://github.com/llvm/llvm-project.git
[ -d llvm-project ] || git clone --branch=release/18.x --depth=1 https://github.com/llvm/llvm-project.git

mkdir -p stage1 || (echo "Could not create stage1 directory"; exit 1)
cd stage1 || (echo "Could not enter stage 1 directory"; exit 1)
Expand All @@ -27,13 +27,13 @@ cmake -G Ninja ${TOPLEV}/llvm-project/llvm \
-DLLVM_ENABLE_PROJECTS="clang;lld;bolt;compiler-rt;llvm" \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DCMAKE_EXE_LINKER_FLAGS="-Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl" \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_BUILD_UTILS=OFF \
-DLLVM_ENABLE_BACKTRACES=OFF \
-DLLVM_ENABLE_WARNINGS=OFF \
-DLLVM_INCLUDE_TESTS=OFF \
-DLLVM_ENABLE_TERMINFO=OFF \
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/llvm-bolt || (echo "Could not configure project!"; exit 1)
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/llvm-bolt \
|| (echo "Could not configure project!"; exit 1)

echo "== Start Build"
ninja install || (echo "Could not build project!"; exit 1)
11 changes: 5 additions & 6 deletions build_stage2-prof-generate.bash
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/bin/bash

TOPLEV=~/toolchain/llvm
cd ${TOPLEV}
cd ${TOPLEV} || exit 1

mkdir ${TOPLEV}/stage2-prof-gen || (echo "Could not create stage2-prof-generate directory"; exit 1)
cd ${TOPLEV}/stage2-prof-gen
cd ${TOPLEV}/stage2-prof-gen || exit 1
CPATH=${TOPLEV}/llvm-bolt/bin

echo "== Configure Build"
Expand All @@ -21,19 +21,18 @@ cmake -G Ninja ${TOPLEV}/llvm-project/llvm \
-DLLVM_INCLUDE_EXAMPLES=OFF \
-DCMAKE_C_COMPILER=${CPATH}/clang \
-DCMAKE_CXX_COMPILER=${CPATH}/clang++ \
-DLLVM_USE_LINKER=${CPATH}/ld.lld \
-DLLVM_USE_LINKER=lld \
-DLLVM_ENABLE_PROJECTS="clang;lld" \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_WARNINGS=OFF \
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage2-prof-gen/install \
-DLLVM_BUILD_INSTRUMENTED=IR \
-DLLVM_BUILD_RUNTIME=OFF \
-DLLVM_LINK_LLVM_DYLIB=ON \
-DLLVM_VP_COUNTERS_PER_SITE=6 \
-DLLVM_BUILD_INSTRUMENTED=IR \
-DLLVM_ENABLE_PLUGINS=ON \
-DLLVM_BUILD_RUNTIME=No || (echo "Could not configure project!"; exit 1)
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage2-prof-gen/install \
|| (echo "Could not configure project!"; exit 1)

echo "== Start Build"
ninja || (echo "Could not build project!"; exit 1)
20 changes: 11 additions & 9 deletions build_stage2-prof-use-lto.bash
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
#!/bin/bash
TOPLEV=~/toolchain/llvm
cd ${TOPLEV}
cd ${TOPLEV} || exit 1

echo "Building Clang with PGO and LTO"

mkdir ${TOPLEV}/stage2-prof-use-lto
cd ${TOPLEV}/stage2-prof-use-lto
cd ${TOPLEV}/stage2-prof-use-lto || exit 1
CPATH=${TOPLEV}/llvm-bolt/bin

echo "== Configure Build"
echo "== Build with stage1-tools -- $CPATH"

COPT="-O3 -march=x86-64-v3 -mtune=haswell -ffunction-sections -fdata-sections"
cmake -G Ninja ${TOPLEV}/llvm-project/llvm \
-DLLVM_BINUTILS_INCDIR=/usr/include \
-DCLANG_ENABLE_ARCMT=OFF \
Expand All @@ -22,21 +23,22 @@ cmake -G Ninja ${TOPLEV}/llvm-project/llvm \
-DLLVM_INCLUDE_EXAMPLES=OFF \
-DCMAKE_C_COMPILER=${CPATH}/clang \
-DCMAKE_CXX_COMPILER=${CPATH}/clang++ \
-DLLVM_USE_LINKER=${CPATH}/ld.lld \
-DLLVM_USE_LINKER=lld \
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt;polly" \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DCMAKE_BUILD_TYPE=Release \
-DCLANG_VENDOR="CachyOS - LLVM 19 BOLT" \
-DCLANG_VENDOR="CachyOS - LLVM 18 BOLT" \
-DLLVM_ENABLE_WARNINGS=OFF \
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage2-prof-use-lto/install \
-DLLVM_PROFDATA_FILE=${TOPLEV}/stage2-prof-gen/profiles/clang.profdata \
-DLLVM_ENABLE_LTO=Thin \
-DCMAKE_C_FLAGS="-O3 -march=x86-64-v3 -mtune=haswell -ffunction-sections -fdata-sections" \
-DCMAKE_ASM_FLAGS="-O3 -march=x86-64-v3 -mtune=haswell -ffunction-sections -fdata-sections" \
-DCMAKE_CXX_FLAGS="-O3 -march=x86-64-v3 -mtune=haswell -ffunction-sections -fdata-sections" \
-DCMAKE_C_FLAGS="$COPT" \
-DCMAKE_ASM_FLAGS="$COPT" \
-DCMAKE_CXX_FLAGS="$COPT" \
-DCMAKE_EXE_LINKER_FLAGS="-Wl,-znow -Wl,--emit-relocs" \
-DLLVM_ENABLE_PLUGINS=ON \
-DLLVM_ENABLE_TERMINFO=OFF || (echo "Could not configure project!"; exit 1)
-DLLVM_ENABLE_TERMINFO=OFF \
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage2-prof-use-lto/install \
|| (echo "Could not configure project!"; exit 1)

echo "== Start Build"
ninja install || (echo "Could not build project!"; exit 1)
16 changes: 8 additions & 8 deletions build_stage3-bolt-without-sampling.bash
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/bin/bash

TOPLEV=~/toolchain/llvm
cd ${TOPLEV}
cd ${TOPLEV} || exit 1

mkdir -p ${TOPLEV}/stage3-without-sampling/intrumentdata || (echo "Could not create stage3-bolt directory"; exit 1)
cd ${TOPLEV}/stage3-without-sampling
cd ${TOPLEV}/stage3-without-sampling || exit 1
CPATH=${TOPLEV}/stage2-prof-use-lto/install/bin
BOLTPATH=${TOPLEV}/llvm-bolt/bin

Expand All @@ -18,30 +18,30 @@ ${BOLTPATH}/llvm-bolt \
${CPATH}/clang-18 \
-o ${CPATH}/clang-18.inst

echo "mooving instrumented binary"
echo "moving instrumented binary"
mv ${CPATH}/clang-18 ${CPATH}/clang-18.org
mv ${CPATH}/clang-18.inst ${CPATH}/clang-18

echo "== Configure Build"
echo "== Build with stage2-prof-use-lto instrumented clang -- $CPATH"

cmake -G Ninja ../llvm-project/llvm \
cmake -G Ninja ${TOPLEV}/llvm-project/llvm \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_PROJECTS="clang" \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DCMAKE_AR=${CPATH}/llvm-ar \
-DCMAKE_C_COMPILER=${CPATH}/clang-18 \
-DCMAKE_CXX_COMPILER=${CPATH}/clang++ \
-DLLVM_USE_LINKER=${CPATH}/ld.lld \
-DLLVM_USE_LINKER=lld \
-DCMAKE_RANLIB=${CPATH}/llvm-ranlib \
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage3-without-sampling/install

echo "== Start Training Build"
ninja & read -t 100 || kill $!
ninja & read -rt 100 || kill $!

echo "Merging generated profiles"
cd ${TOPLEV}/stage3-without-sampling/intrumentdata
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/merge-fdata *.fdata > combined.fdata
cd ${TOPLEV}/stage3-without-sampling/intrumentdata || exit 1
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/merge-fdata ./*.fdata > combined.fdata
echo "Optimizing Clang with the generated profile"

LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${CPATH}/clang-18.org \
Expand Down
Loading