Skip to content

Commit f61d0ab

Browse files
authored
Try to make the nom errors better! (#22)
1 parent 7d73af0 commit f61d0ab

15 files changed

+297
-114
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77
## [Unreleased]
88
### Added
99
- `InstrumenationProfile::is_empty` to detect when there are no records
10+
- Fuzzing module for profile files
1011

1112
### Changed
1213
- Added anyhow and use in place of `Result<T, Box<dyn Error>>`
14+
- Make error type for profiles `VerboseError`
1315

1416
### Fixed
1517
- Handle merging of completely disjoint records - now profiles generated from multiple
1618
applications are accurately merged
19+
- Handle invalid Hash enum variant in `IndexedProfile`
1720

1821
## [0.2.0] - 2022-09-11
1922
### Changed

README.md

+12-2
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,22 @@
44
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
55
[![Coverage Status](https://coveralls.io/repos/github/xd009642/llvm-profparser/badge.svg?branch=master)](https://coveralls.io/github/xd009642/llvm-profparser?branch=master)
66

7-
This is a WIP to parse the llvm instrumentation profraw file format and avoid
8-
the need to install and use the llvm-profdata binary.
7+
This is a reasonably complete to parse the llvm instrumentation profraw file
8+
format and avoid the need to install and use the llvm-profdata/llvm-cov
9+
binaries. It aims to be backwards compatible with as many llvm versions that
10+
could be used for coverage data in Rust projects and currently supports the
11+
following llvm versions: 11, 12, 13, 14, 15.
912

1013
**This project is not affilated with the llvm-project in anyway!** It is merely
1114
a parser for some of their file formats to aid interoperability in Rust.
1215

16+
## Contributing
17+
18+
All of the functionality required has been implemented, however there are areas
19+
to improve in handling unexpected or invalid files. To start fining issues
20+
there's a fuzz directory which will undoubtedly reveal some issues that can be
21+
fixed. Go into the fuzz directory for guides on how to run.
22+
1323
## License
1424

1525
llvm\_profparser is currently licensed under the terms of the Apache License

fuzz/.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
target
2+
corpus
3+
artifacts

fuzz/Cargo.toml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[package]
2+
name = "llvm_profparser-fuzz"
3+
version = "0.0.0"
4+
authors = ["Automatically generated"]
5+
publish = false
6+
edition = "2018"
7+
8+
[package.metadata]
9+
cargo-fuzz = true
10+
11+
[dependencies]
12+
libfuzzer-sys = "0.4"
13+
14+
[dependencies.llvm_profparser]
15+
path = ".."
16+
17+
# Prevent this from interfering with workspaces
18+
[workspace]
19+
members = ["."]
20+
21+
[[bin]]
22+
name = "profile_data"
23+
path = "fuzz_targets/profile_data.rs"
24+
test = false
25+
doc = false

fuzz/README.md

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# llvm-profparser fuzz
2+
3+
This requires cargo-fuzz to be installed and a nightly compiler, to install:
4+
5+
```
6+
cargo install -f cargo-fuzz
7+
```
8+
9+
And then to run for the first time:
10+
11+
```
12+
./setup_corpus.sh
13+
cargo +nightly fuzz run profile_data
14+
```
15+
16+
The script `setup_corpus.sh` copies the test files into the corpus directory in
17+
order to give the fuzzer a good place to start from.

fuzz/fuzz_targets/profile_data.rs

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#![no_main]
2+
use libfuzzer_sys::fuzz_target;
3+
use llvm_profparser::parse_bytes;
4+
5+
fuzz_target!(|data: &[u8]| {
6+
// fuzzed code goes here
7+
let _ = parse_bytes(data);
8+
});

fuzz/setup_corpus.sh

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
3+
# Here we'll just create an empty corpus folder and copy all the profile data
4+
# files into it.
5+
6+
mkdir -p corpus/profile_data/
7+
8+
cp ../tests/data/profdata/llvm-11/* corpus/profile_data/
9+
cp ../tests/data/profdata/llvm-12/* corpus/profile_data/
10+
cp ../tests/data/profdata/llvm-13/* corpus/profile_data/
11+
cp ../tests/data/profdata/llvm-14/* corpus/profile_data/
12+
cp ../tests/data/profdata/llvm-15/* corpus/profile_data/

src/coverage/coverage_mapping.rs

+8-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::coverage::*;
33
use crate::instrumentation_profile::types::*;
44
use crate::util::*;
55
use anyhow::{bail, Result};
6+
use nom::error::Error as NomError;
67
use object::{Endian, Endianness, Object, ObjectSection, Section};
78
use std::collections::HashMap;
89
use std::convert::TryInto;
@@ -263,8 +264,8 @@ fn parse_coverage_mapping(
263264

264265
//let bytes = &data[16..(16 + filename_data_len as usize)];
265266
let bytes = &data[16..];
266-
let (bytes, file_strings) =
267-
parse_path_list(bytes, version).map_err(|_| SectionReadError::InvalidPathList)?;
267+
let (bytes, file_strings) = parse_path_list(bytes, version)
268+
.map_err(|_: nom::Err<NomError<_>>| SectionReadError::InvalidPathList)?;
268269
result.insert(hash, file_strings);
269270
let read_len = data_len - bytes.len();
270271
let padding = if !bytes.is_empty() && (read_len & 0x07) != 0 {
@@ -305,21 +306,21 @@ fn parse_coverage_functions(
305306
let _start_len = bytes[28..].len();
306307
bytes = &bytes[28..];
307308

308-
let (data, id_len) = parse_leb128(bytes).unwrap();
309+
let (data, id_len) = parse_leb128::<NomError<_>>(bytes).unwrap();
309310
bytes = data;
310311
let mut filename_indices = vec![];
311312
for _ in 0..id_len {
312-
let (data, id) = parse_leb128(bytes).unwrap(); // Issue
313+
let (data, id) = parse_leb128::<NomError<_>>(bytes).unwrap(); // Issue
313314
filename_indices.push(id);
314315
bytes = data;
315316
}
316-
let (data, expr_len) = parse_leb128(bytes).unwrap();
317+
let (data, expr_len) = parse_leb128::<NomError<_>>(bytes).unwrap();
317318
let expr_len = expr_len as usize;
318319
bytes = data;
319320
let mut exprs = vec![Expression::default(); expr_len];
320321
for i in 0..expr_len {
321-
let (data, lhs) = parse_leb128(bytes).unwrap();
322-
let (data, rhs) = parse_leb128(data).unwrap();
322+
let (data, lhs) = parse_leb128::<NomError<_>>(bytes).unwrap();
323+
let (data, rhs) = parse_leb128::<NomError<_>>(data).unwrap();
323324
let lhs = parse_counter(lhs, &mut exprs);
324325
let rhs = parse_counter(rhs, &mut exprs);
325326
exprs[i].lhs = lhs;

src/hash_table.rs

+32-12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
use crate::instrumentation_profile::types::*;
1+
use crate::instrumentation_profile::{types::*, ParseResult};
22
use indexmap::IndexMap;
3-
use nom::{number::complete::*, IResult};
3+
use nom::{
4+
error::{ErrorKind, ParseError, VerboseError, VerboseErrorKind},
5+
number::complete::*,
6+
};
47
use std::borrow::Cow;
58

69
#[derive(Copy, Clone, Debug)]
@@ -12,26 +15,43 @@ struct KeyDataLen {
1215
#[derive(Clone, Debug)]
1316
pub(crate) struct HashTable(pub IndexMap<(u64, String), InstrProfRecord>);
1417

15-
fn read_key_data_len(input: &[u8]) -> IResult<&[u8], KeyDataLen> {
18+
fn read_key_data_len(input: &[u8]) -> ParseResult<KeyDataLen> {
1619
let (bytes, key_len) = le_u64(input)?;
1720
let (bytes, data_len) = le_u64(bytes)?;
1821
let res = KeyDataLen { key_len, data_len };
1922
Ok((bytes, res))
2023
}
2124

22-
fn read_key(input: &[u8], key_len: usize) -> IResult<&[u8], Cow<'_, str>> {
23-
let res = String::from_utf8_lossy(&input[..key_len]);
24-
Ok((&input[key_len..], res))
25+
fn read_key(input: &[u8], key_len: usize) -> ParseResult<Cow<'_, str>> {
26+
if key_len > input.len() {
27+
Err(nom::Err::Failure(VerboseError::from_error_kind(
28+
&input[input.len()..],
29+
ErrorKind::Eof,
30+
)))
31+
} else {
32+
let res = String::from_utf8_lossy(&input[..key_len]);
33+
Ok((&input[key_len..], res))
34+
}
2535
}
2636

2737
fn read_value(
2838
version: u64,
2939
mut input: &[u8],
3040
data_len: usize,
31-
) -> IResult<&[u8], (u64, InstrProfRecord)> {
41+
) -> ParseResult<(u64, InstrProfRecord)> {
3242
if data_len % 8 != 0 {
3343
// Element is corrupted, it should be aligned
34-
todo!();
44+
let errors = vec![(
45+
input,
46+
VerboseErrorKind::Context("table data length is not 8 byte aligned"),
47+
)];
48+
return Err(nom::Err::Failure(VerboseError { errors }));
49+
}
50+
if input.len() < data_len {
51+
return Err(nom::Err::Failure(VerboseError::from_error_kind(
52+
&input[input.len()..],
53+
ErrorKind::Eof,
54+
)));
3555
}
3656
let mut result = vec![];
3757
let end_len = input.len() - data_len;
@@ -102,12 +122,12 @@ impl HashTable {
102122
/// buckets is the data the hash table buckets start at - the start of the `HashTable` in memory.
103123
/// hash. offset shows the offset from the base address to the start of the `HashTable` as this
104124
/// will be used to correct any offsets
105-
pub(crate) fn parse(
125+
pub(crate) fn parse<'a>(
106126
version: u64,
107-
input: &[u8],
127+
input: &'a [u8],
108128
_offset: usize,
109129
bucket_start: usize,
110-
) -> IResult<&[u8], Self> {
130+
) -> ParseResult<'a, Self> {
111131
assert!(bucket_start > 0);
112132
let (bytes, _num_buckets) = le_u64(&input[bucket_start..])?;
113133
let (_bytes, mut num_entries) = le_u64(bytes)?;
@@ -126,7 +146,7 @@ impl HashTable {
126146
version: u64,
127147
input: &'a [u8],
128148
mut num_entries: u64,
129-
) -> IResult<&'a [u8], u64> {
149+
) -> ParseResult<'a, u64> {
130150
let (bytes, num_items_in_bucket) = le_u16(input)?;
131151
let mut remaining = bytes;
132152
for _i in 0..num_items_in_bucket {

src/instrumentation_profile/indexed_profile.rs

+12-5
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use crate::instrumentation_profile::*;
33
use crate::summary::*;
44
use anyhow::bail;
55
use nom::{
6+
error::{ContextError, ErrorKind, ParseError, VerboseError},
67
number::{complete::*, Endianness},
7-
IResult,
88
};
99
use std::collections::HashMap;
1010
use std::convert::TryFrom;
@@ -82,7 +82,7 @@ fn parse_summary<'a>(
8282
mut input: &'a [u8],
8383
header: &Header,
8484
use_cs: bool,
85-
) -> IResult<&'a [u8], Option<ProfileSummary>> {
85+
) -> ParseResult<'a, Option<ProfileSummary>> {
8686
if header.version() >= 4 {
8787
let (bytes, n_fields) = le_u64(input)?;
8888
let (bytes, n_entries) = le_u64(bytes)?;
@@ -154,7 +154,7 @@ fn parse_summary<'a>(
154154
impl InstrProfReader for IndexedInstrProf {
155155
type Header = Header;
156156

157-
fn parse_bytes(mut input: &[u8]) -> IResult<&[u8], InstrumentationProfile> {
157+
fn parse_bytes(mut input: &[u8]) -> ParseResult<InstrumentationProfile> {
158158
let (bytes, header) = Self::parse_header(input)?;
159159
let (bytes, _summary) = parse_summary(bytes, &header, false)?;
160160
let (bytes, _cs_summary) = if header.is_csir_prof() {
@@ -192,13 +192,20 @@ impl InstrProfReader for IndexedInstrProf {
192192
Ok((input, profile))
193193
}
194194

195-
fn parse_header(input: &[u8]) -> IResult<&[u8], Self::Header> {
195+
fn parse_header(input: &[u8]) -> ParseResult<Self::Header> {
196196
if Self::has_format(input) {
197197
let (bytes, version) = le_u64(&input[8..])?;
198198
let (bytes, _) = le_u64(bytes)?;
199199
let (bytes, hash_type) = le_u64(bytes)?;
200+
let hash_type = HashType::try_from(hash_type).map_err(|_e| {
201+
let error = VerboseError::from_error_kind(bytes, ErrorKind::Satisfy);
202+
nom::Err::Failure(VerboseError::add_context(
203+
bytes,
204+
"invalid enum variant for profile hash",
205+
error,
206+
))
207+
})?;
200208
let (bytes, hash_offset) = le_u64(bytes)?;
201-
let hash_type = HashType::try_from(hash_type).expect("BAD ENUM BRUH");
202209
Ok((
203210
bytes,
204211
Self::Header {

src/instrumentation_profile/mod.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::instrumentation_profile::indexed_profile::*;
22
use crate::instrumentation_profile::raw_profile::*;
33
use crate::instrumentation_profile::text_profile::*;
44
use crate::instrumentation_profile::types::*;
5-
use nom::IResult;
5+
use nom::{error::VerboseError, IResult};
66
use std::fs::File;
77
use std::io;
88
use std::io::prelude::*;
@@ -16,6 +16,8 @@ pub mod summary;
1616
pub mod text_profile;
1717
pub mod types;
1818

19+
pub type ParseResult<'a, T> = IResult<&'a [u8], T, VerboseError<&'a [u8]>>;
20+
1921
pub const fn get_num_padding_bytes(len: u64) -> u8 {
2022
7 & (8 - (len % 8) as u8)
2123
}
@@ -47,18 +49,19 @@ pub fn parse_bytes(data: &[u8]) -> io::Result<InstrumentationProfile> {
4749
"Unsupported instrumentation profile format",
4850
));
4951
};
50-
nom_res.map(|(_bytes, res)| res).map_err(|e| {
51-
println!("Parsing failed: {}", e);
52+
nom_res.map(|(_bytes, res)| res).map_err(|_e| {
53+
#[cfg(test)]
54+
println!("{}", _e);
5255
io::Error::new(io::ErrorKind::Other, "Parsing failed")
5356
})
5457
}
5558

5659
pub trait InstrProfReader {
5760
type Header;
5861
/// Parse the profile no lazy parsing here!
59-
fn parse_bytes(input: &[u8]) -> IResult<&[u8], InstrumentationProfile>;
62+
fn parse_bytes(input: &[u8]) -> ParseResult<InstrumentationProfile>;
6063
/// Parses a header
61-
fn parse_header(input: &[u8]) -> IResult<&[u8], Self::Header>;
64+
fn parse_header(input: &[u8]) -> ParseResult<Self::Header>;
6265
/// Detects that the bytes match the current reader format if it can't read the format it will
6366
/// return false
6467
fn has_format(input: impl Read) -> bool;

0 commit comments

Comments
 (0)