Skip to content

Commit 0fba87c

Browse files
committed
[llvm-advisor] Add support for collecting extra build outputs
Adds helpers to run the compilation with extra flags to collect IR, assembly, AST dumps, include trees, debug info, and other data.
1 parent 8b2f6e9 commit 0fba87c

File tree

2 files changed

+407
-0
lines changed

2 files changed

+407
-0
lines changed
Lines changed: 367 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,367 @@
1+
#include "DataExtractor.h"
2+
#include "../Utils/ProcessRunner.h"
3+
#include "llvm/Support/FileSystem.h"
4+
#include "llvm/Support/Path.h"
5+
#include "llvm/Support/raw_ostream.h"
6+
#include <algorithm>
7+
8+
namespace llvm {
9+
namespace advisor {
10+
11+
DataExtractor::DataExtractor(const AdvisorConfig &config) : config_(config) {}
12+
13+
Error DataExtractor::extractAllData(CompilationUnit &unit,
14+
const std::string &tempDir) {
15+
if (config_.getVerbose()) {
16+
outs() << "Extracting data for unit: " << unit.getName() << "\n";
17+
}
18+
19+
// Create extraction subdirectories
20+
sys::fs::create_directories(tempDir + "/ir");
21+
sys::fs::create_directories(tempDir + "/assembly");
22+
sys::fs::create_directories(tempDir + "/ast");
23+
sys::fs::create_directories(tempDir + "/preprocessed");
24+
sys::fs::create_directories(tempDir + "/include-tree");
25+
sys::fs::create_directories(tempDir + "/debug");
26+
sys::fs::create_directories(tempDir + "/static-analyzer");
27+
28+
if (auto Err = extractIR(unit, tempDir))
29+
return Err;
30+
if (auto Err = extractAssembly(unit, tempDir))
31+
return Err;
32+
if (auto Err = extractAST(unit, tempDir))
33+
return Err;
34+
if (auto Err = extractPreprocessed(unit, tempDir))
35+
return Err;
36+
if (auto Err = extractIncludeTree(unit, tempDir))
37+
return Err;
38+
if (auto Err = extractDebugInfo(unit, tempDir))
39+
return Err;
40+
if (auto Err = extractStaticAnalysis(unit, tempDir))
41+
return Err;
42+
if (auto Err = extractMacroExpansion(unit, tempDir))
43+
return Err;
44+
if (auto Err = extractCompilationPhases(unit, tempDir))
45+
return Err;
46+
47+
return Error::success();
48+
}
49+
50+
std::vector<std::string>
51+
DataExtractor::getBaseCompilerArgs(const CompilationUnitInfo &unitInfo) const {
52+
std::vector<std::string> baseArgs;
53+
54+
// Copy include paths and defines
55+
for (const auto &arg : unitInfo.compileFlags) {
56+
if (StringRef(arg).starts_with("-I") || StringRef(arg).starts_with("-D") ||
57+
StringRef(arg).starts_with("-U") ||
58+
StringRef(arg).starts_with("-std=") ||
59+
StringRef(arg).starts_with("-m") || StringRef(arg).starts_with("-f") ||
60+
StringRef(arg).starts_with("-W") || StringRef(arg).starts_with("-O")) {
61+
// Skip problematic flags for extraction
62+
if (StringRef(arg).starts_with("-fsave-optimization-record") ||
63+
StringRef(arg).starts_with("-fprofile-instr-generate") ||
64+
StringRef(arg).starts_with("-fcoverage-mapping") ||
65+
StringRef(arg).starts_with("-foptimization-record-file")) {
66+
continue;
67+
}
68+
baseArgs.push_back(arg);
69+
}
70+
}
71+
72+
return baseArgs;
73+
}
74+
75+
Error DataExtractor::extractIR(CompilationUnit &unit,
76+
const std::string &tempDir) {
77+
for (const auto &source : unit.getInfo().sources) {
78+
if (source.isHeader)
79+
continue;
80+
81+
std::string outputFile =
82+
tempDir + "/ir/" + sys::path::stem(source.path).str() + ".ll";
83+
84+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
85+
baseArgs.push_back("-emit-llvm");
86+
baseArgs.push_back("-S");
87+
baseArgs.push_back("-o");
88+
baseArgs.push_back(outputFile);
89+
baseArgs.push_back(source.path);
90+
91+
if (auto Err = runCompilerWithFlags(baseArgs)) {
92+
if (config_.getVerbose()) {
93+
errs() << "Failed to extract IR for " << source.path << "\n";
94+
}
95+
continue;
96+
}
97+
98+
if (sys::fs::exists(outputFile)) {
99+
unit.addGeneratedFile("ir", outputFile);
100+
}
101+
}
102+
return Error::success();
103+
}
104+
105+
Error DataExtractor::extractAssembly(CompilationUnit &unit,
106+
const std::string &tempDir) {
107+
for (const auto &source : unit.getInfo().sources) {
108+
if (source.isHeader)
109+
continue;
110+
111+
std::string outputFile =
112+
tempDir + "/assembly/" + sys::path::stem(source.path).str() + ".s";
113+
114+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
115+
baseArgs.push_back("-S");
116+
baseArgs.push_back("-o");
117+
baseArgs.push_back(outputFile);
118+
baseArgs.push_back(source.path);
119+
120+
if (auto Err = runCompilerWithFlags(baseArgs)) {
121+
if (config_.getVerbose()) {
122+
errs() << "Failed to extract assembly for " << source.path << "\n";
123+
}
124+
continue;
125+
}
126+
127+
if (sys::fs::exists(outputFile)) {
128+
unit.addGeneratedFile("assembly", outputFile);
129+
}
130+
}
131+
return Error::success();
132+
}
133+
134+
Error DataExtractor::extractAST(CompilationUnit &unit,
135+
const std::string &tempDir) {
136+
for (const auto &source : unit.getInfo().sources) {
137+
if (source.isHeader)
138+
continue;
139+
140+
std::string outputFile =
141+
tempDir + "/ast/" + sys::path::stem(source.path).str() + ".ast";
142+
143+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
144+
baseArgs.push_back("-ast-dump");
145+
baseArgs.push_back("-fsyntax-only");
146+
baseArgs.push_back(source.path);
147+
148+
auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
149+
config_.getTimeout());
150+
if (result && result->exitCode == 0) {
151+
std::error_code EC;
152+
raw_fd_ostream OS(outputFile, EC);
153+
if (!EC) {
154+
OS << result->stdout;
155+
unit.addGeneratedFile("ast", outputFile);
156+
}
157+
}
158+
}
159+
return Error::success();
160+
}
161+
162+
Error DataExtractor::extractPreprocessed(CompilationUnit &unit,
163+
const std::string &tempDir) {
164+
for (const auto &source : unit.getInfo().sources) {
165+
if (source.isHeader)
166+
continue;
167+
168+
std::string ext = (source.language == "C++") ? ".ii" : ".i";
169+
std::string outputFile =
170+
tempDir + "/preprocessed/" + sys::path::stem(source.path).str() + ext;
171+
172+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
173+
baseArgs.push_back("-E");
174+
baseArgs.push_back("-o");
175+
baseArgs.push_back(outputFile);
176+
baseArgs.push_back(source.path);
177+
178+
if (auto Err = runCompilerWithFlags(baseArgs)) {
179+
if (config_.getVerbose()) {
180+
errs() << "Failed to extract preprocessed for " << source.path << "\n";
181+
}
182+
continue;
183+
}
184+
185+
if (sys::fs::exists(outputFile)) {
186+
unit.addGeneratedFile("preprocessed", outputFile);
187+
}
188+
}
189+
return Error::success();
190+
}
191+
192+
Error DataExtractor::extractIncludeTree(CompilationUnit &unit,
193+
const std::string &tempDir) {
194+
for (const auto &source : unit.getInfo().sources) {
195+
if (source.isHeader)
196+
continue;
197+
198+
std::string outputFile = tempDir + "/include-tree/" +
199+
sys::path::stem(source.path).str() +
200+
".include.txt";
201+
202+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
203+
baseArgs.push_back("-H");
204+
baseArgs.push_back("-fsyntax-only");
205+
baseArgs.push_back(source.path);
206+
207+
auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
208+
config_.getTimeout());
209+
if (result && !result->stderr.empty()) {
210+
std::error_code EC;
211+
raw_fd_ostream OS(outputFile, EC);
212+
if (!EC) {
213+
OS << result->stderr; // Include tree goes to stderr
214+
unit.addGeneratedFile("include-tree", outputFile);
215+
}
216+
}
217+
}
218+
return Error::success();
219+
}
220+
221+
Error DataExtractor::extractDebugInfo(CompilationUnit &unit,
222+
const std::string &tempDir) {
223+
for (const auto &source : unit.getInfo().sources) {
224+
if (source.isHeader)
225+
continue;
226+
227+
std::string outputFile =
228+
tempDir + "/debug/" + sys::path::stem(source.path).str() + ".debug.txt";
229+
std::string objectFile =
230+
tempDir + "/debug/" + sys::path::stem(source.path).str() + ".o";
231+
232+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
233+
baseArgs.push_back("-g");
234+
baseArgs.push_back("-c");
235+
baseArgs.push_back("-o");
236+
baseArgs.push_back(objectFile);
237+
baseArgs.push_back(source.path);
238+
239+
if (auto Err = runCompilerWithFlags(baseArgs)) {
240+
if (config_.getVerbose()) {
241+
errs() << "Failed to extract debug info for " << source.path << "\n";
242+
}
243+
continue;
244+
}
245+
246+
// Extract DWARF info using llvm-dwarfdump
247+
if (sys::fs::exists(objectFile)) {
248+
std::vector<std::string> dwarfArgs = {objectFile};
249+
auto result =
250+
ProcessRunner::run("llvm-dwarfdump", dwarfArgs, config_.getTimeout());
251+
if (result && result->exitCode == 0) {
252+
std::error_code EC;
253+
raw_fd_ostream OS(outputFile, EC);
254+
if (!EC) {
255+
OS << result->stdout;
256+
unit.addGeneratedFile("debug", outputFile);
257+
}
258+
}
259+
}
260+
}
261+
return Error::success();
262+
}
263+
264+
Error DataExtractor::extractStaticAnalysis(CompilationUnit &unit,
265+
const std::string &tempDir) {
266+
for (const auto &source : unit.getInfo().sources) {
267+
if (source.isHeader)
268+
continue;
269+
270+
std::string outputFile = tempDir + "/static-analyzer/" +
271+
sys::path::stem(source.path).str() +
272+
".analysis.txt";
273+
274+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
275+
baseArgs.push_back("--analyze");
276+
baseArgs.push_back("-Xanalyzer");
277+
baseArgs.push_back("-analyzer-output=text");
278+
baseArgs.push_back(source.path);
279+
280+
auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
281+
config_.getTimeout());
282+
if (result) {
283+
std::error_code EC;
284+
raw_fd_ostream OS(outputFile, EC);
285+
if (!EC) {
286+
OS << "STDOUT:\n" << result->stdout << "\nSTDERR:\n" << result->stderr;
287+
unit.addGeneratedFile("static-analyzer", outputFile);
288+
}
289+
}
290+
}
291+
return Error::success();
292+
}
293+
294+
Error DataExtractor::extractMacroExpansion(CompilationUnit &unit,
295+
const std::string &tempDir) {
296+
for (const auto &source : unit.getInfo().sources) {
297+
if (source.isHeader)
298+
continue;
299+
300+
std::string outputFile =
301+
tempDir + "/preprocessed/" + sys::path::stem(source.path).str() +
302+
".macro-expanded" + ((source.language == "C++") ? ".ii" : ".i");
303+
304+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
305+
baseArgs.push_back("-E");
306+
baseArgs.push_back("-dM"); // Show macro definitions
307+
baseArgs.push_back("-o");
308+
baseArgs.push_back(outputFile);
309+
baseArgs.push_back(source.path);
310+
311+
if (auto Err = runCompilerWithFlags(baseArgs)) {
312+
if (config_.getVerbose()) {
313+
errs() << "Failed to extract macro expansion for " << source.path
314+
<< "\n";
315+
}
316+
continue;
317+
}
318+
319+
if (sys::fs::exists(outputFile)) {
320+
unit.addGeneratedFile("macro-expansion", outputFile);
321+
}
322+
}
323+
return Error::success();
324+
}
325+
326+
Error DataExtractor::extractCompilationPhases(CompilationUnit &unit,
327+
const std::string &tempDir) {
328+
for (const auto &source : unit.getInfo().sources) {
329+
if (source.isHeader)
330+
continue;
331+
332+
std::string outputFile = tempDir + "/debug/" +
333+
sys::path::stem(source.path).str() + ".phases.txt";
334+
335+
auto baseArgs = getBaseCompilerArgs(unit.getInfo());
336+
baseArgs.push_back("-v"); // Verbose compilation phases
337+
baseArgs.push_back("-fsyntax-only");
338+
baseArgs.push_back(source.path);
339+
340+
auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
341+
config_.getTimeout());
342+
if (result) {
343+
std::error_code EC;
344+
raw_fd_ostream OS(outputFile, EC);
345+
if (!EC) {
346+
OS << "COMPILATION PHASES:\n"
347+
<< result->stderr; // Verbose output goes to stderr
348+
unit.addGeneratedFile("compilation-phases", outputFile);
349+
}
350+
}
351+
}
352+
return Error::success();
353+
}
354+
355+
Error DataExtractor::runCompilerWithFlags(
356+
const std::vector<std::string> &args) {
357+
auto result = ProcessRunner::run(config_.getToolPath("clang"), args,
358+
config_.getTimeout());
359+
if (!result || result->exitCode != 0) {
360+
return createStringError(std::make_error_code(std::errc::io_error),
361+
"Compiler failed");
362+
}
363+
return Error::success();
364+
}
365+
366+
} // namespace advisor
367+
} // namespace llvm

0 commit comments

Comments
 (0)