Skip to content

Commit 7dc5dae

Browse files
committed
feat (parseFusionCatcher): added --skip-failed
1 parent 03b00b3 commit 7dc5dae

File tree

5 files changed

+86
-13
lines changed

5 files changed

+86
-13
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
1212

1313
## [1.4.6-rc3] - 2025-03-10
1414

15-
- Added --skip-failed flag to callVariant, parseArriba, parserSTARFusion.
15+
- Added --skip-failed flag to callVariant, parseArriba, parserSTARFusion, parseFusionCatcher.
1616

1717
## [1.4.6-rc2] - 2025-03-03
1818

moPepGen/cli/parse_arriba.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ def parse_arriba(args:argparse.Namespace) -> None:
146146
if args.skip_failed:
147147
tally.skipped.invalid_position += 1
148148
tally.skipped.total += 1
149-
else:
150-
raise
149+
continue
150+
raise
151151

152152
logger.info('Arriba output %s loaded.', fusion)
153153

moPepGen/cli/parse_fusion_catcher.py

+55-5
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22
[FusionCatcher](https://github.com/ndaniel/fusioncatcher) and save as a
33
GVF file. The GVF file can be later used to call variant peptides using
44
[callVariant](call-variant.md)."""
5-
from typing import List
5+
from __future__ import annotations
6+
from typing import TYPE_CHECKING
67
from pathlib import Path
78
import argparse
89
from moPepGen import get_logger, seqvar, parser, err
910
from moPepGen.cli import common
1011

1112

13+
if TYPE_CHECKING:
14+
from typing import List
15+
from logging import Logger
16+
1217
INPUT_FILE_FORMATS = ['.tsv', '.txt']
1318
OUTPUT_FILE_FORMATS = ['.gvf']
1419

@@ -42,16 +47,48 @@ def add_subparser_parse_fusion_catcher(subparsers:argparse._SubParsersAction):
4247
default=5,
4348
metavar='<number>'
4449
)
50+
common.add_args_skip_failed(p)
4551
common.add_args_source(p)
4652
common.add_args_reference(p, proteome=False)
4753
common.add_args_debug_level(p)
4854
p.set_defaults(func=parse_fusion_catcher)
4955
common.print_help_if_missing_args(p)
5056
return p
5157

58+
class TallyTable():
59+
""" Tally table """
60+
def __init__(self, logger:Logger):
61+
""" Constructor """
62+
self.total:int = 0
63+
self.succeed:int = 0
64+
self.skipped:TallyTableSkipped = TallyTableSkipped()
65+
self.logger = logger
66+
67+
def log(self):
68+
""" Show tally results """
69+
self.logger.info("Summary:")
70+
self.logger.info("Totally records read: %i", self.total)
71+
self.logger.info("Records successfully processed: %i", self.succeed)
72+
self.logger.info("Records skipped: %i", self.skipped.total)
73+
if self.skipped.total > 0:
74+
self.logger.info("Out of those skipped,")
75+
self.logger.info(" Invalid gene ID: %i", self.skipped.invalid_gene_id)
76+
self.logger.info(" Invalid position: %i", self.skipped.invalid_position)
77+
self.logger.info(" Insufficient evidence: %i", self.skipped.insufficient_evidence)
78+
79+
class TallyTableSkipped():
80+
""" Tally table for failed ones """
81+
def __init__(self):
82+
""" constructor """
83+
self.invalid_gene_id:int = 0
84+
self.invalid_position:int = 0
85+
self.insufficient_evidence:int = 0
86+
self.total:int = 0
87+
5288
def parse_fusion_catcher(args:argparse.Namespace) -> None:
5389
""" Parse FusionCatcher output and save it in GVF format. """
5490
logger = get_logger()
91+
tally = TallyTable(logger)
5592
# unpack args
5693
fusion = args.input_path
5794
output_path:Path = args.output_path
@@ -69,15 +106,26 @@ def parse_fusion_catcher(args:argparse.Namespace) -> None:
69106
variants:List[seqvar.VariantRecord] = []
70107

71108
for record in parser.FusionCatcherParser.parse(fusion):
72-
if record.counts_of_common_mapping_reads > args.max_common_mapping:
73-
continue
74-
if record.spanning_unique_reads < args.min_spanning_unique:
109+
tally.total += 1
110+
if record.counts_of_common_mapping_reads > args.max_common_mapping \
111+
or record.spanning_unique_reads < args.min_spanning_unique:
112+
tally.skipped.insufficient_evidence += 1
113+
tally.skipped.total += 1
75114
continue
76115
try:
77116
var_records = record.convert_to_variant_records(anno, genome)
117+
variants.extend(var_records)
118+
tally.succeed += 1
78119
except err.GeneNotFoundError:
120+
tally.skipped.invalid_gene_id += 1
121+
tally.skipped.total += 1
79122
continue
80-
variants.extend(var_records)
123+
except:
124+
if args.skip_failed:
125+
tally.skipped.total += 1
126+
tally.skipped.invalid_position += 1
127+
continue
128+
raise
81129

82130
logger.info('FusionCatcher output %s loaded.', fusion)
83131

@@ -95,3 +143,5 @@ def parse_fusion_catcher(args:argparse.Namespace) -> None:
95143
seqvar.io.write(variants, output_path, metadata)
96144

97145
logger.info("Variants written to disk.")
146+
147+
tally.log()

moPepGen/cli/parse_star_fusion.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -103,20 +103,22 @@ def parse_star_fusion(args:argparse.Namespace) -> None:
103103
tally.total += 1
104104
if record.est_j < args.min_est_j:
105105
tally.skipped.insufficient_evidence += 1
106+
tally.skipped.total += 1
106107
continue
107108
try:
108109
var_records = record.convert_to_variant_records(anno, genome)
109110
variants.extend(var_records)
110111
tally.succeed += 1
111112
except err.GeneNotFoundError:
112113
tally.skipped.invalid_gene_id += 1
114+
tally.skipped.total += 1
113115
continue
114116
except:
115117
if args.skip_failed:
116118
tally.skipped.invalid_position += 1
119+
tally.skipped.total += 1
117120
continue
118-
else:
119-
raise
121+
raise
120122

121123
logger.info('STAR-Fusion output %s loaded.', fusion)
122124

test/integration/test_parse_fusion_catcher.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from pathlib import Path
44
import subprocess as sp
55
import sys
6+
from unittest.mock import Mock
67
from test.unit import load_references
78
from test.integration import TestCaseIntegration
89
from moPepGen import cli, parser
@@ -54,11 +55,9 @@ def test_fusioncatcher_parser(self):
5455
right_seq = gene2_seq.seq[_start2:_end2]
5556
self.assertEqual(str(right_seq), fusion_seq[1])
5657

57-
def test_parse_fusion_catcher(self):
58-
""" Test parseFusionCatcher """
58+
def create_base_args(self) -> argparse.Namespace:
5959
args = argparse.Namespace()
6060
args.command = 'parseFusionCatcher'
61-
args.input_path = self.data_dir/'fusion/fusion_catcher.txt'
6261
args.source = 'Fusion'
6362
args.index_dir = None
6463
args.genome_fasta = self.data_dir/'genome.fasta'
@@ -68,9 +67,31 @@ def test_parse_fusion_catcher(self):
6867
args.output_path = self.work_dir/'fusion_catcher.gvf'
6968
args.max_common_mapping = 0
7069
args.min_spanning_unique = 5
70+
args.skip_failed = False
7171
args.quiet = True
72+
return args
73+
74+
def test_parse_fusion_catcher(self):
75+
""" Test parseFusionCatcher """
76+
args = self.create_base_args()
77+
args.input_path = self.data_dir/'fusion/fusion_catcher.txt'
7278
cli.parse_fusion_catcher(args)
7379
files = {str(file.name) for file in self.work_dir.glob('*')}
7480
expected = {'fusion_catcher.gvf'}
7581
self.assertEqual(files, expected)
7682
self.assert_gvf_order(args.output_path, args.annotation_gtf)
83+
84+
def test_parse_fusion_catcher_skip_failed(self):
85+
""" Test parseFusionCatcher with --skip-failed """
86+
from moPepGen import parser
87+
parser.FusionCatcherParser.FusionCatcherRecord.convert_to_variant_records = Mock(
88+
side_effect=ValueError()
89+
)
90+
91+
args = self.create_base_args()
92+
args.input_path = self.data_dir/'fusion/fusion_catcher.txt'
93+
with self.assertRaises(ValueError):
94+
cli.parse_fusion_catcher(args)
95+
96+
args.skip_failed = True
97+
cli.parse_fusion_catcher(args)

0 commit comments

Comments
 (0)