Skip to content

Commit 8a5f0c4

Browse files
committed
Add paired context for preprocess
1 parent 0f2e2cf commit 8a5f0c4

2 files changed

Lines changed: 39 additions & 16 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ is roughly based on [Keep a Changelog], and this project tries to adheres to
1313

1414
### Changed
1515

16-
- Adds more context to paired read errors in `sampler`, `xleave`, and `trimmer`
16+
- Adds more context to paired read errors
1717

1818
## [0.9.0] - 2026-03-06
1919

src/processes/integrated/preprocess.rs

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,17 @@ pub fn preprocess_process(args: PreprocessArgs) -> Result<(), std::io::Error> {
103103
Ok(())
104104
}
105105

106+
/// The type preprocess uses for input, along with the input path for error
107+
/// context.
108+
struct Reader {
109+
path: PathBuf,
110+
iter: IterWithContext<FastQReader<ReadFileZipPipe>>,
111+
}
112+
106113
struct ParsedPreprocessIoArgs {
107114
table_writer: BufWriter<WriterWithContext<File>>,
108-
reader1: IterWithContext<FastQReader<ReadFileZipPipe>>,
109-
reader2: Option<IterWithContext<FastQReader<ReadFileZipPipe>>>,
115+
reader1: Reader,
116+
reader2: Option<Reader>,
110117
log_writer: Option<BufWriter<WriterWithContext<File>>>,
111118
log_file: Option<PathBuf>,
112119
}
@@ -147,6 +154,12 @@ fn parse_preprocess_args(args: PreprocessArgs) -> std::io::Result<ParsedPreproce
147154

148155
let RecordReaders { reader1, reader2 } = readers;
149156

157+
let reader1 = Reader {
158+
path: fastq_input,
159+
iter: reader1,
160+
};
161+
let reader2 = fastq_input2.zip(reader2).map(|(path, iter)| Reader { path, iter });
162+
150163
let log_writer = match log_file {
151164
Some(ref file_path) => Some(OutputOptions::new_from_path(file_path).use_file().open()?),
152165
None => None,
@@ -185,11 +198,20 @@ fn parse_preprocess_args(args: PreprocessArgs) -> std::io::Result<ParsedPreproce
185198
fn trim_and_deflate(
186199
options: &ParsedPreprocessOptions, io_args: &mut ParsedPreprocessIoArgs,
187200
) -> std::io::Result<(DeflatedSequences, FastQMetadata)> {
188-
let reader1 = &mut io_args.reader1;
201+
let Reader {
202+
path: input_path1,
203+
iter: reader1,
204+
} = &mut io_args.reader1;
205+
189206
let mut deflated = DeflatedSequences::with_hasher(get_hasher());
190207
let mut metadata = FastQMetadata::default();
191208

192209
if let Some(reader2) = &mut io_args.reader2 {
210+
let Reader {
211+
path: input_path2,
212+
iter: reader2,
213+
} = reader2;
214+
193215
if options.filter_widows {
194216
let result = reader1.by_ref().zip_paired_reads(reader2.by_ref()).try_for_each(|pair| {
195217
preprocess_pair(pair?, &mut metadata, &mut deflated, options);
@@ -200,9 +222,10 @@ fn trim_and_deflate(
200222
Ok(()) => {}
201223
Err(ZipPairedReadsError::IoError(e)) => return Err(e),
202224
Err(ZipPairedReadsError::MismatchedHeaders([r1, r2])) => {
203-
let err = ZipPairedReadsError::MismatchedHeaders([r1.as_view(), r2.as_view()]);
225+
let err = ZipPairedReadsError::MismatchedHeaders([r1.as_view(), r2.as_view()])
226+
.add_path_context(input_path1, input_path2);
204227
eprintln!(
205-
"{MODULE} WARNING! {err} `--filter-widows or -f` is being disabled for the remainder of the processing. Consider rerunning with corrected inputs."
228+
"{MODULE} WARNING! {err}\n\n`--filter-widows` or `-f` is being disabled for the remainder of the processing. Consider rerunning with corrected inputs."
206229
);
207230

208231
std::iter::once(Ok(r1)).chain(reader1).try_for_each(|read| {
@@ -216,14 +239,22 @@ fn trim_and_deflate(
216239
})?;
217240
}
218241
Err(ZipPairedReadsError::ExtraFirstRead(r1)) => {
219-
eprintln!("{}", extra_read_warning());
242+
eprintln!(
243+
"{MODULE} WARNING! An extra read was found in file: '{input_path1}'\n → Unexpected read found with header: {header1}\n\n`--filter-widows` or `-f` is being disabled for the remainder of the processing. Consider rerunning with corrected inputs.",
244+
input_path1 = input_path1.display(),
245+
header1 = r1.header
246+
);
220247
std::iter::once(Ok(r1)).chain(reader1).try_for_each(|read| {
221248
preprocess_seq(&mut read?, ReadSide::R1, &mut metadata, &mut deflated, options);
222249
std::io::Result::Ok(())
223250
})?;
224251
}
225252
Err(ZipPairedReadsError::ExtraSecondRead(r2)) => {
226-
eprintln!("{}", extra_read_warning());
253+
eprintln!(
254+
"{MODULE} WARNING! An extra read was found in file: '{input_path2}'\n → Unexpected read found with header: {header2}\n\n`--filter-widows` or `-f` is being disabled for the remainder of the processing. Consider rerunning with corrected inputs.",
255+
input_path2 = input_path2.display(),
256+
header2 = r2.header
257+
);
227258
std::iter::once(Ok(r2)).chain(reader2).try_for_each(|read| {
228259
preprocess_seq(&mut read?, ReadSide::R2, &mut metadata, &mut deflated, options);
229260
std::io::Result::Ok(())
@@ -366,14 +397,6 @@ fn diagnose_none_passing(metadata: &FastQMetadata, paired_reads: bool, options:
366397
}
367398
}
368399

369-
#[inline]
370-
#[must_use]
371-
fn extra_read_warning() -> String {
372-
format!(
373-
"{MODULE} WARNING! Extra unpaired read(s) found at end of first FASTQ file. `--filter-widows or -f` is being disabled for the remainder of the processing. Consider rerunning with corrected inputs."
374-
)
375-
}
376-
377400
/// Trims a read and tallies its metadata. `Some` is returned if it passes all
378401
/// quality filters.
379402
fn trim_filter_tally<'a>(

0 commit comments

Comments
 (0)