Skip to content

Commit 1367104

Browse files
committed
Add tool src/tools/coverage-dump for use by some new coverage tests
1 parent 04374cd commit 1367104

File tree

11 files changed

+562
-1
lines changed

11 files changed

+562
-1
lines changed

Cargo.lock

+18
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,18 @@ version = "0.8.4"
722722
source = "registry+https://github.com/rust-lang/crates.io-index"
723723
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
724724

725+
[[package]]
726+
name = "coverage-dump"
727+
version = "0.1.0"
728+
dependencies = [
729+
"anyhow",
730+
"leb128",
731+
"md-5",
732+
"miniz_oxide",
733+
"regex",
734+
"rustc-demangle",
735+
]
736+
725737
[[package]]
726738
name = "coverage_test_macros"
727739
version = "0.0.0"
@@ -2041,6 +2053,12 @@ version = "1.3.0"
20412053
source = "registry+https://github.com/rust-lang/crates.io-index"
20422054
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
20432055

2056+
[[package]]
2057+
name = "leb128"
2058+
version = "0.2.5"
2059+
source = "registry+https://github.com/rust-lang/crates.io-index"
2060+
checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
2061+
20442062
[[package]]
20452063
name = "levenshtein"
20462064
version = "1.0.5"

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ members = [
4343
"src/tools/generate-windows-sys",
4444
"src/tools/rustdoc-gui-test",
4545
"src/tools/opt-dist",
46+
"src/tools/coverage-dump",
4647
]
4748

4849
exclude = [

src/bootstrap/builder.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,8 @@ impl<'a> Builder<'a> {
703703
llvm::Lld,
704704
llvm::CrtBeginEnd,
705705
tool::RustdocGUITest,
706-
tool::OptimizedDist
706+
tool::OptimizedDist,
707+
tool::CoverageDump,
707708
),
708709
Kind::Check | Kind::Clippy | Kind::Fix => describe!(
709710
check::Std,

src/bootstrap/tool.rs

+1
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ bootstrap_tool!(
306306
GenerateWindowsSys, "src/tools/generate-windows-sys", "generate-windows-sys";
307307
RustdocGUITest, "src/tools/rustdoc-gui-test", "rustdoc-gui-test", is_unstable_tool = true, allow_features = "test";
308308
OptimizedDist, "src/tools/opt-dist", "opt-dist";
309+
CoverageDump, "src/tools/coverage-dump", "coverage-dump";
309310
);
310311

311312
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]

src/tools/coverage-dump/Cargo.toml

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[package]
2+
name = "coverage-dump"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
anyhow = "1.0.71"
10+
leb128 = "0.2.5"
11+
md5 = { package = "md-5" , version = "0.10.5" }
12+
miniz_oxide = "0.7.1"
13+
regex = "1.8.4"
14+
rustc-demangle = "0.1.23"

src/tools/coverage-dump/README.md

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
This tool extracts coverage mapping information from an LLVM IR assembly file
2+
(`.ll`), and prints it in a more human-readable form that can be used for
3+
snapshot tests.
4+
5+
The output format is mostly arbitrary, so it's OK to change the output as long
6+
as any affected tests are also re-blessed. However, the output should be
7+
consistent across different executions on different platforms, so avoid
8+
printing any information that is platform-specific or non-deterministic.

src/tools/coverage-dump/src/covfun.rs

+296
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
use crate::parser::{unescape_llvm_string_contents, Parser};
2+
use anyhow::{anyhow, Context};
3+
use regex::Regex;
4+
use std::collections::HashMap;
5+
use std::fmt::{self, Debug, Write as _};
6+
use std::sync::OnceLock;
7+
8+
pub(crate) fn dump_covfun_mappings(
9+
llvm_ir: &str,
10+
function_names: &HashMap<u64, String>,
11+
) -> anyhow::Result<()> {
12+
// Extract function coverage entries from the LLVM IR assembly, and associate
13+
// each entry with its (demangled) name.
14+
let mut covfun_entries = llvm_ir
15+
.lines()
16+
.filter_map(covfun_line_data)
17+
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
18+
.collect::<Vec<_>>();
19+
covfun_entries.sort_by(|a, b| {
20+
// Sort entries primarily by name, to help make the order consistent
21+
// across platforms and relatively insensitive to changes.
22+
// (Sadly we can't use `sort_by_key` because we would need to return references.)
23+
Ord::cmp(&a.0, &b.0)
24+
.then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used))
25+
.then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice()))
26+
});
27+
28+
for (name, line_data) in &covfun_entries {
29+
let name = name.unwrap_or("(unknown)");
30+
let unused = if line_data.is_used { "" } else { " (unused)" };
31+
println!("Function name: {name}{unused}");
32+
33+
let payload: &[u8] = &line_data.payload;
34+
println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len());
35+
36+
let mut parser = Parser::new(payload);
37+
38+
let num_files = parser.read_uleb128_u32()?;
39+
println!("Number of files: {num_files}");
40+
41+
for i in 0..num_files {
42+
let global_file_id = parser.read_uleb128_u32()?;
43+
println!("- file {i} => global file {global_file_id}");
44+
}
45+
46+
let num_expressions = parser.read_uleb128_u32()?;
47+
println!("Number of expressions: {num_expressions}");
48+
49+
let mut expression_resolver = ExpressionResolver::new();
50+
for i in 0..num_expressions {
51+
let lhs = parser.read_simple_term()?;
52+
let rhs = parser.read_simple_term()?;
53+
println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}");
54+
expression_resolver.push_operands(lhs, rhs);
55+
}
56+
57+
for i in 0..num_files {
58+
let num_mappings = parser.read_uleb128_u32()?;
59+
println!("Number of file {i} mappings: {num_mappings}");
60+
61+
for _ in 0..num_mappings {
62+
let (kind, region) = parser.read_mapping_kind_and_region()?;
63+
println!("- {kind:?} at {region:?}");
64+
65+
match kind {
66+
// Also print expression mappings in resolved form.
67+
MappingKind::Code(term @ CovTerm::Expression { .. })
68+
| MappingKind::Gap(term @ CovTerm::Expression { .. }) => {
69+
println!(" = {}", expression_resolver.format_term(term));
70+
}
71+
// If the mapping is a branch region, print both of its arms
72+
// in resolved form (even if they aren't expressions).
73+
MappingKind::Branch { r#true, r#false } => {
74+
println!(" true = {}", expression_resolver.format_term(r#true));
75+
println!(" false = {}", expression_resolver.format_term(r#false));
76+
}
77+
_ => (),
78+
}
79+
}
80+
}
81+
82+
parser.ensure_empty()?;
83+
println!();
84+
}
85+
Ok(())
86+
}
87+
88+
struct CovfunLineData {
89+
name_hash: u64,
90+
is_used: bool,
91+
payload: Vec<u8>,
92+
}
93+
94+
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
95+
/// entry, and if so extracts relevant data in a `CovfunLineData`.
96+
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
97+
let re = {
98+
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
99+
// rather than the section name, because the section name is harder to
100+
// extract and differs across Linux/Windows/macOS. We also extract the
101+
// symbol name hash from the variable name rather than the data, since
102+
// it's easier and both should match.
103+
static RE: OnceLock<Regex> = OnceLock::new();
104+
RE.get_or_init(|| {
105+
Regex::new(
106+
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
107+
)
108+
.unwrap()
109+
})
110+
};
111+
112+
let captures = re.captures(line)?;
113+
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
114+
let is_used = captures.name("is_used").is_some();
115+
let payload = unescape_llvm_string_contents(&captures["payload"]);
116+
117+
Some(CovfunLineData { name_hash, is_used, payload })
118+
}
119+
120+
// Extra parser methods only needed when parsing `covfun` payloads.
121+
impl<'a> Parser<'a> {
122+
fn read_simple_term(&mut self) -> anyhow::Result<CovTerm> {
123+
let raw_term = self.read_uleb128_u32()?;
124+
CovTerm::decode(raw_term).context("decoding term")
125+
}
126+
127+
fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> {
128+
let mut kind = self.read_raw_mapping_kind()?;
129+
let mut region = self.read_raw_mapping_region()?;
130+
131+
const HIGH_BIT: u32 = 1u32 << 31;
132+
if region.end_column & HIGH_BIT != 0 {
133+
region.end_column &= !HIGH_BIT;
134+
kind = match kind {
135+
MappingKind::Code(term) => MappingKind::Gap(term),
136+
// LLVM's coverage mapping reader will actually handle this
137+
// case without complaint, but the result is almost certainly
138+
// a meaningless implementation artifact.
139+
_ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")),
140+
}
141+
}
142+
143+
Ok((kind, region))
144+
}
145+
146+
fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> {
147+
let raw_mapping_kind = self.read_uleb128_u32()?;
148+
if let Some(term) = CovTerm::decode(raw_mapping_kind) {
149+
return Ok(MappingKind::Code(term));
150+
}
151+
152+
assert_eq!(raw_mapping_kind & 0b11, 0);
153+
assert_ne!(raw_mapping_kind, 0);
154+
155+
let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0);
156+
if is_expansion {
157+
Ok(MappingKind::Expansion(high))
158+
} else {
159+
match high {
160+
0 => unreachable!("zero kind should have already been handled as a code mapping"),
161+
2 => Ok(MappingKind::Skip),
162+
4 => {
163+
let r#true = self.read_simple_term()?;
164+
let r#false = self.read_simple_term()?;
165+
Ok(MappingKind::Branch { r#true, r#false })
166+
}
167+
_ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")),
168+
}
169+
}
170+
}
171+
172+
fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> {
173+
let start_line_offset = self.read_uleb128_u32()?;
174+
let start_column = self.read_uleb128_u32()?;
175+
let end_line_offset = self.read_uleb128_u32()?;
176+
let end_column = self.read_uleb128_u32()?;
177+
Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column })
178+
}
179+
}
180+
181+
/// Enum that can hold a constant zero value, the ID of an physical coverage
182+
/// counter, or the ID (and operation) of a coverage-counter expression.
183+
///
184+
/// Terms are used as the operands of coverage-counter expressions, as the arms
185+
/// of branch mappings, and as the value of code/gap mappings.
186+
#[derive(Clone, Copy, Debug)]
187+
pub(crate) enum CovTerm {
188+
Zero,
189+
Counter(u32),
190+
Expression(u32, Op),
191+
}
192+
193+
/// Operator (addition or subtraction) used by an expression.
194+
#[derive(Clone, Copy, Debug)]
195+
pub(crate) enum Op {
196+
Sub,
197+
Add,
198+
}
199+
200+
impl CovTerm {
201+
pub(crate) fn decode(input: u32) -> Option<Self> {
202+
let (high, tag) = (input >> 2, input & 0b11);
203+
match tag {
204+
0b00 if high == 0 => Some(Self::Zero),
205+
0b01 => Some(Self::Counter(high)),
206+
0b10 => Some(Self::Expression(high, Op::Sub)),
207+
0b11 => Some(Self::Expression(high, Op::Add)),
208+
// When reading expression operands or branch arms, the LLVM coverage
209+
// mapping reader will always interpret a `0b00` tag as a zero
210+
// term, even when the high bits are non-zero.
211+
// We treat that case as failure instead, so that this code can be
212+
// shared by the full mapping-kind reader as well.
213+
_ => None,
214+
}
215+
}
216+
}
217+
218+
#[derive(Debug)]
219+
enum MappingKind {
220+
Code(CovTerm),
221+
Gap(CovTerm),
222+
Expansion(u32),
223+
Skip,
224+
// Using raw identifiers here makes the dump output a little bit nicer
225+
// (via the derived Debug), at the expense of making this tool's source
226+
// code a little bit uglier.
227+
Branch { r#true: CovTerm, r#false: CovTerm },
228+
}
229+
230+
struct MappingRegion {
231+
/// Offset of this region's start line, relative to the *start line* of
232+
/// the *previous mapping* (or 0). Line numbers are 1-based.
233+
start_line_offset: u32,
234+
/// This region's start column, absolute and 1-based.
235+
start_column: u32,
236+
/// Offset of this region's end line, relative to the *this mapping's*
237+
/// start line. Line numbers are 1-based.
238+
end_line_offset: u32,
239+
/// This region's end column, absolute, 1-based, and exclusive.
240+
///
241+
/// If the highest bit is set, that bit is cleared and the associated
242+
/// mapping becomes a gap region mapping.
243+
end_column: u32,
244+
}
245+
246+
impl Debug for MappingRegion {
247+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248+
write!(
249+
f,
250+
"(prev + {}, {}) to (start + {}, {})",
251+
self.start_line_offset, self.start_column, self.end_line_offset, self.end_column
252+
)
253+
}
254+
}
255+
256+
/// Helper type that prints expressions in a "resolved" form, so that
257+
/// developers reading the dump don't need to resolve expressions by hand.
258+
struct ExpressionResolver {
259+
operands: Vec<(CovTerm, CovTerm)>,
260+
}
261+
262+
impl ExpressionResolver {
263+
fn new() -> Self {
264+
Self { operands: Vec::new() }
265+
}
266+
267+
fn push_operands(&mut self, lhs: CovTerm, rhs: CovTerm) {
268+
self.operands.push((lhs, rhs));
269+
}
270+
271+
fn format_term(&self, term: CovTerm) -> String {
272+
let mut output = String::new();
273+
self.write_term(&mut output, term);
274+
output
275+
}
276+
277+
fn write_term(&self, output: &mut String, term: CovTerm) {
278+
match term {
279+
CovTerm::Zero => output.push_str("Zero"),
280+
CovTerm::Counter(id) => write!(output, "c{id}").unwrap(),
281+
CovTerm::Expression(id, op) => {
282+
let (lhs, rhs) = self.operands[id as usize];
283+
let op = match op {
284+
Op::Sub => "-",
285+
Op::Add => "+",
286+
};
287+
288+
output.push('(');
289+
self.write_term(output, lhs);
290+
write!(output, " {op} ").unwrap();
291+
self.write_term(output, rhs);
292+
output.push(')');
293+
}
294+
}
295+
}
296+
}

src/tools/coverage-dump/src/main.rs

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
mod covfun;
2+
mod parser;
3+
mod prf_names;
4+
5+
fn main() -> anyhow::Result<()> {
6+
use anyhow::Context as _;
7+
8+
let args = std::env::args().collect::<Vec<_>>();
9+
10+
let llvm_ir_path = args.get(1).context("LLVM IR file not specified")?;
11+
let llvm_ir = std::fs::read_to_string(llvm_ir_path).context("couldn't read LLVM IR file")?;
12+
13+
let function_names = crate::prf_names::make_function_names_table(&llvm_ir)?;
14+
crate::covfun::dump_covfun_mappings(&llvm_ir, &function_names)?;
15+
16+
Ok(())
17+
}

0 commit comments

Comments
 (0)