|
| 1 | +use crate::parser::{unescape_llvm_string_contents, Parser}; |
| 2 | +use anyhow::{anyhow, Context}; |
| 3 | +use regex::Regex; |
| 4 | +use std::collections::HashMap; |
| 5 | +use std::fmt::{self, Debug, Write as _}; |
| 6 | +use std::sync::OnceLock; |
| 7 | + |
| 8 | +pub(crate) fn dump_covfun_mappings( |
| 9 | + llvm_ir: &str, |
| 10 | + function_names: &HashMap<u64, String>, |
| 11 | +) -> anyhow::Result<()> { |
| 12 | + // Extract function coverage entries from the LLVM IR assembly, and associate |
| 13 | + // each entry with its (demangled) name. |
| 14 | + let mut covfun_entries = llvm_ir |
| 15 | + .lines() |
| 16 | + .filter_map(covfun_line_data) |
| 17 | + .map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data)) |
| 18 | + .collect::<Vec<_>>(); |
| 19 | + covfun_entries.sort_by(|a, b| { |
| 20 | + // Sort entries primarily by name, to help make the order consistent |
| 21 | + // across platforms and relatively insensitive to changes. |
| 22 | + // (Sadly we can't use `sort_by_key` because we would need to return references.) |
| 23 | + Ord::cmp(&a.0, &b.0) |
| 24 | + .then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used)) |
| 25 | + .then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice())) |
| 26 | + }); |
| 27 | + |
| 28 | + for (name, line_data) in &covfun_entries { |
| 29 | + let name = name.unwrap_or("(unknown)"); |
| 30 | + let unused = if line_data.is_used { "" } else { " (unused)" }; |
| 31 | + println!("Function name: {name}{unused}"); |
| 32 | + |
| 33 | + let payload: &[u8] = &line_data.payload; |
| 34 | + println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len()); |
| 35 | + |
| 36 | + let mut parser = Parser::new(payload); |
| 37 | + |
| 38 | + let num_files = parser.read_uleb128_u32()?; |
| 39 | + println!("Number of files: {num_files}"); |
| 40 | + |
| 41 | + for i in 0..num_files { |
| 42 | + let global_file_id = parser.read_uleb128_u32()?; |
| 43 | + println!("- file {i} => global file {global_file_id}"); |
| 44 | + } |
| 45 | + |
| 46 | + let num_expressions = parser.read_uleb128_u32()?; |
| 47 | + println!("Number of expressions: {num_expressions}"); |
| 48 | + |
| 49 | + let mut expression_resolver = ExpressionResolver::new(); |
| 50 | + for i in 0..num_expressions { |
| 51 | + let lhs = parser.read_simple_term()?; |
| 52 | + let rhs = parser.read_simple_term()?; |
| 53 | + println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}"); |
| 54 | + expression_resolver.push_operands(lhs, rhs); |
| 55 | + } |
| 56 | + |
| 57 | + for i in 0..num_files { |
| 58 | + let num_mappings = parser.read_uleb128_u32()?; |
| 59 | + println!("Number of file {i} mappings: {num_mappings}"); |
| 60 | + |
| 61 | + for _ in 0..num_mappings { |
| 62 | + let (kind, region) = parser.read_mapping_kind_and_region()?; |
| 63 | + println!("- {kind:?} at {region:?}"); |
| 64 | + |
| 65 | + match kind { |
| 66 | + // Also print expression mappings in resolved form. |
| 67 | + MappingKind::Code(term @ CovTerm::Expression { .. }) |
| 68 | + | MappingKind::Gap(term @ CovTerm::Expression { .. }) => { |
| 69 | + println!(" = {}", expression_resolver.format_term(term)); |
| 70 | + } |
| 71 | + // If the mapping is a branch region, print both of its arms |
| 72 | + // in resolved form (even if they aren't expressions). |
| 73 | + MappingKind::Branch { r#true, r#false } => { |
| 74 | + println!(" true = {}", expression_resolver.format_term(r#true)); |
| 75 | + println!(" false = {}", expression_resolver.format_term(r#false)); |
| 76 | + } |
| 77 | + _ => (), |
| 78 | + } |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + parser.ensure_empty()?; |
| 83 | + println!(); |
| 84 | + } |
| 85 | + Ok(()) |
| 86 | +} |
| 87 | + |
| 88 | +struct CovfunLineData { |
| 89 | + name_hash: u64, |
| 90 | + is_used: bool, |
| 91 | + payload: Vec<u8>, |
| 92 | +} |
| 93 | + |
| 94 | +/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun` |
| 95 | +/// entry, and if so extracts relevant data in a `CovfunLineData`. |
| 96 | +fn covfun_line_data(line: &str) -> Option<CovfunLineData> { |
| 97 | + let re = { |
| 98 | + // We cheat a little bit and match variable names `@__covrec_[HASH]u` |
| 99 | + // rather than the section name, because the section name is harder to |
| 100 | + // extract and differs across Linux/Windows/macOS. We also extract the |
| 101 | + // symbol name hash from the variable name rather than the data, since |
| 102 | + // it's easier and both should match. |
| 103 | + static RE: OnceLock<Regex> = OnceLock::new(); |
| 104 | + RE.get_or_init(|| { |
| 105 | + Regex::new( |
| 106 | + r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#, |
| 107 | + ) |
| 108 | + .unwrap() |
| 109 | + }) |
| 110 | + }; |
| 111 | + |
| 112 | + let captures = re.captures(line)?; |
| 113 | + let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap(); |
| 114 | + let is_used = captures.name("is_used").is_some(); |
| 115 | + let payload = unescape_llvm_string_contents(&captures["payload"]); |
| 116 | + |
| 117 | + Some(CovfunLineData { name_hash, is_used, payload }) |
| 118 | +} |
| 119 | + |
| 120 | +// Extra parser methods only needed when parsing `covfun` payloads. |
| 121 | +impl<'a> Parser<'a> { |
| 122 | + fn read_simple_term(&mut self) -> anyhow::Result<CovTerm> { |
| 123 | + let raw_term = self.read_uleb128_u32()?; |
| 124 | + CovTerm::decode(raw_term).context("decoding term") |
| 125 | + } |
| 126 | + |
| 127 | + fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> { |
| 128 | + let mut kind = self.read_raw_mapping_kind()?; |
| 129 | + let mut region = self.read_raw_mapping_region()?; |
| 130 | + |
| 131 | + const HIGH_BIT: u32 = 1u32 << 31; |
| 132 | + if region.end_column & HIGH_BIT != 0 { |
| 133 | + region.end_column &= !HIGH_BIT; |
| 134 | + kind = match kind { |
| 135 | + MappingKind::Code(term) => MappingKind::Gap(term), |
| 136 | + // LLVM's coverage mapping reader will actually handle this |
| 137 | + // case without complaint, but the result is almost certainly |
| 138 | + // a meaningless implementation artifact. |
| 139 | + _ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")), |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + Ok((kind, region)) |
| 144 | + } |
| 145 | + |
| 146 | + fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> { |
| 147 | + let raw_mapping_kind = self.read_uleb128_u32()?; |
| 148 | + if let Some(term) = CovTerm::decode(raw_mapping_kind) { |
| 149 | + return Ok(MappingKind::Code(term)); |
| 150 | + } |
| 151 | + |
| 152 | + assert_eq!(raw_mapping_kind & 0b11, 0); |
| 153 | + assert_ne!(raw_mapping_kind, 0); |
| 154 | + |
| 155 | + let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0); |
| 156 | + if is_expansion { |
| 157 | + Ok(MappingKind::Expansion(high)) |
| 158 | + } else { |
| 159 | + match high { |
| 160 | + 0 => unreachable!("zero kind should have already been handled as a code mapping"), |
| 161 | + 2 => Ok(MappingKind::Skip), |
| 162 | + 4 => { |
| 163 | + let r#true = self.read_simple_term()?; |
| 164 | + let r#false = self.read_simple_term()?; |
| 165 | + Ok(MappingKind::Branch { r#true, r#false }) |
| 166 | + } |
| 167 | + _ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")), |
| 168 | + } |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> { |
| 173 | + let start_line_offset = self.read_uleb128_u32()?; |
| 174 | + let start_column = self.read_uleb128_u32()?; |
| 175 | + let end_line_offset = self.read_uleb128_u32()?; |
| 176 | + let end_column = self.read_uleb128_u32()?; |
| 177 | + Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column }) |
| 178 | + } |
| 179 | +} |
| 180 | + |
| 181 | +/// Enum that can hold a constant zero value, the ID of an physical coverage |
| 182 | +/// counter, or the ID (and operation) of a coverage-counter expression. |
| 183 | +/// |
| 184 | +/// Terms are used as the operands of coverage-counter expressions, as the arms |
| 185 | +/// of branch mappings, and as the value of code/gap mappings. |
| 186 | +#[derive(Clone, Copy, Debug)] |
| 187 | +pub(crate) enum CovTerm { |
| 188 | + Zero, |
| 189 | + Counter(u32), |
| 190 | + Expression(u32, Op), |
| 191 | +} |
| 192 | + |
| 193 | +/// Operator (addition or subtraction) used by an expression. |
| 194 | +#[derive(Clone, Copy, Debug)] |
| 195 | +pub(crate) enum Op { |
| 196 | + Sub, |
| 197 | + Add, |
| 198 | +} |
| 199 | + |
| 200 | +impl CovTerm { |
| 201 | + pub(crate) fn decode(input: u32) -> Option<Self> { |
| 202 | + let (high, tag) = (input >> 2, input & 0b11); |
| 203 | + match tag { |
| 204 | + 0b00 if high == 0 => Some(Self::Zero), |
| 205 | + 0b01 => Some(Self::Counter(high)), |
| 206 | + 0b10 => Some(Self::Expression(high, Op::Sub)), |
| 207 | + 0b11 => Some(Self::Expression(high, Op::Add)), |
| 208 | + // When reading expression operands or branch arms, the LLVM coverage |
| 209 | + // mapping reader will always interpret a `0b00` tag as a zero |
| 210 | + // term, even when the high bits are non-zero. |
| 211 | + // We treat that case as failure instead, so that this code can be |
| 212 | + // shared by the full mapping-kind reader as well. |
| 213 | + _ => None, |
| 214 | + } |
| 215 | + } |
| 216 | +} |
| 217 | + |
| 218 | +#[derive(Debug)] |
| 219 | +enum MappingKind { |
| 220 | + Code(CovTerm), |
| 221 | + Gap(CovTerm), |
| 222 | + Expansion(u32), |
| 223 | + Skip, |
| 224 | + // Using raw identifiers here makes the dump output a little bit nicer |
| 225 | + // (via the derived Debug), at the expense of making this tool's source |
| 226 | + // code a little bit uglier. |
| 227 | + Branch { r#true: CovTerm, r#false: CovTerm }, |
| 228 | +} |
| 229 | + |
| 230 | +struct MappingRegion { |
| 231 | + /// Offset of this region's start line, relative to the *start line* of |
| 232 | + /// the *previous mapping* (or 0). Line numbers are 1-based. |
| 233 | + start_line_offset: u32, |
| 234 | + /// This region's start column, absolute and 1-based. |
| 235 | + start_column: u32, |
| 236 | + /// Offset of this region's end line, relative to the *this mapping's* |
| 237 | + /// start line. Line numbers are 1-based. |
| 238 | + end_line_offset: u32, |
| 239 | + /// This region's end column, absolute, 1-based, and exclusive. |
| 240 | + /// |
| 241 | + /// If the highest bit is set, that bit is cleared and the associated |
| 242 | + /// mapping becomes a gap region mapping. |
| 243 | + end_column: u32, |
| 244 | +} |
| 245 | + |
| 246 | +impl Debug for MappingRegion { |
| 247 | + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 248 | + write!( |
| 249 | + f, |
| 250 | + "(prev + {}, {}) to (start + {}, {})", |
| 251 | + self.start_line_offset, self.start_column, self.end_line_offset, self.end_column |
| 252 | + ) |
| 253 | + } |
| 254 | +} |
| 255 | + |
| 256 | +/// Helper type that prints expressions in a "resolved" form, so that |
| 257 | +/// developers reading the dump don't need to resolve expressions by hand. |
| 258 | +struct ExpressionResolver { |
| 259 | + operands: Vec<(CovTerm, CovTerm)>, |
| 260 | +} |
| 261 | + |
| 262 | +impl ExpressionResolver { |
| 263 | + fn new() -> Self { |
| 264 | + Self { operands: Vec::new() } |
| 265 | + } |
| 266 | + |
| 267 | + fn push_operands(&mut self, lhs: CovTerm, rhs: CovTerm) { |
| 268 | + self.operands.push((lhs, rhs)); |
| 269 | + } |
| 270 | + |
| 271 | + fn format_term(&self, term: CovTerm) -> String { |
| 272 | + let mut output = String::new(); |
| 273 | + self.write_term(&mut output, term); |
| 274 | + output |
| 275 | + } |
| 276 | + |
| 277 | + fn write_term(&self, output: &mut String, term: CovTerm) { |
| 278 | + match term { |
| 279 | + CovTerm::Zero => output.push_str("Zero"), |
| 280 | + CovTerm::Counter(id) => write!(output, "c{id}").unwrap(), |
| 281 | + CovTerm::Expression(id, op) => { |
| 282 | + let (lhs, rhs) = self.operands[id as usize]; |
| 283 | + let op = match op { |
| 284 | + Op::Sub => "-", |
| 285 | + Op::Add => "+", |
| 286 | + }; |
| 287 | + |
| 288 | + output.push('('); |
| 289 | + self.write_term(output, lhs); |
| 290 | + write!(output, " {op} ").unwrap(); |
| 291 | + self.write_term(output, rhs); |
| 292 | + output.push(')'); |
| 293 | + } |
| 294 | + } |
| 295 | + } |
| 296 | +} |
0 commit comments