Skip to content

Commit 03b00b3

Browse files
committed
feat (parseSTARFusion): added --skip-failed
1 parent 8e30c97 commit 03b00b3

File tree

3 files changed

+74
-15
lines changed

3 files changed

+74
-15
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
1212

1313
## [1.4.6-rc3] - 2025-03-10
1414

15-
- Added --skip-failed flag to callVariant, parseArriba.
15+
- Added --skip-failed flag to callVariant, parseArriba, parserSTARFusion.
1616

1717
## [1.4.6-rc2] - 2025-03-03
1818

moPepGen/cli/parse_star_fusion.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
GVF file. The GVF file can be later used to call variant peptides using
44
[callVariant](call-variant.md)."""
55
from __future__ import annotations
6+
from typing import TYPE_CHECKING
67
import argparse
78
from typing import List
89
from moPepGen import get_logger, seqvar, parser, err
910
from moPepGen.cli import common
1011

1112

13+
if TYPE_CHECKING:
14+
from logging import Logger
15+
1216
INPUT_FILE_FORMATS = ['.tsv', '.txt']
1317
OUTPUT_FILE_FORMATS = ['.gvf']
1418

@@ -37,16 +41,48 @@ def add_subparser_parse_star_fusion(subparsers:argparse._SubParsersAction):
3741
default=5.0,
3842
metavar='<number>'
3943
)
44+
common.add_args_skip_failed(p)
4045
common.add_args_source(p)
4146
common.add_args_reference(p, proteome=False)
4247
common.add_args_debug_level(p)
4348
p.set_defaults(func=parse_star_fusion)
4449
common.print_help_if_missing_args(p)
4550
return p
4651

52+
class TallyTable():
53+
""" Tally table """
54+
def __init__(self, logger:Logger):
55+
""" Constructor """
56+
self.total:int = 0
57+
self.succeed:int = 0
58+
self.skipped:TallyTableSkipped = TallyTableSkipped()
59+
self.logger = logger
60+
61+
def log(self):
62+
""" Show tally results """
63+
self.logger.info("Summary:")
64+
self.logger.info("Totally records read: %i", self.total)
65+
self.logger.info("Records successfully processed: %i", self.succeed)
66+
self.logger.info("Records skipped: %i", self.skipped.total)
67+
if self.skipped.total > 0:
68+
self.logger.info("Out of those skipped,")
69+
self.logger.info(" Invalid gene ID: %i", self.skipped.invalid_gene_id)
70+
self.logger.info(" Invalid position: %i", self.skipped.invalid_position)
71+
self.logger.info(" Insufficient evidence: %i", self.skipped.insufficient_evidence)
72+
73+
class TallyTableSkipped():
74+
""" Tally table for failed ones """
75+
def __init__(self):
76+
""" constructor """
77+
self.invalid_gene_id:int = 0
78+
self.invalid_position:int = 0
79+
self.insufficient_evidence:int = 0
80+
self.total:int = 0
81+
4782
def parse_star_fusion(args:argparse.Namespace) -> None:
4883
""" Parse the STAR-Fusion's output and save it in GVF format. """
4984
logger = get_logger()
85+
tally = TallyTable(logger)
5086
# unpack args
5187
fusion = args.input_path
5288
common.validate_file_format(
@@ -64,13 +100,23 @@ def parse_star_fusion(args:argparse.Namespace) -> None:
64100
variants:List[seqvar.VariantRecord] = []
65101

66102
for record in parser.STARFusionParser.parse(fusion):
103+
tally.total += 1
67104
if record.est_j < args.min_est_j:
105+
tally.skipped.insufficient_evidence += 1
68106
continue
69107
try:
70108
var_records = record.convert_to_variant_records(anno, genome)
109+
variants.extend(var_records)
110+
tally.succeed += 1
71111
except err.GeneNotFoundError:
112+
tally.skipped.invalid_gene_id += 1
72113
continue
73-
variants.extend(var_records)
114+
except:
115+
if args.skip_failed:
116+
tally.skipped.invalid_position += 1
117+
continue
118+
else:
119+
raise
74120

75121
logger.info('STAR-Fusion output %s loaded.', fusion)
76122

@@ -88,3 +134,5 @@ def parse_star_fusion(args:argparse.Namespace) -> None:
88134
seqvar.io.write(variants, output_path, metadata)
89135

90136
logger.info('Variant info written to disk.')
137+
138+
tally.log()

test/integration/test_parse_star_fusion.py

+24-13
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import argparse
33
import subprocess as sp
44
import sys
5+
from unittest.mock import Mock
56
from test.integration import TestCaseIntegration
67
from moPepGen import cli, seqvar
78
from moPepGen.cli.common import load_references
@@ -27,19 +28,25 @@ def test_parse_star_fusion_cli(self):
2728
print(res.stderr.decode('utf-8'))
2829
raise
2930

30-
def test_star_fusion_record_case1(self):
31-
""" Test parseSTARFusion """
31+
def create_base_args(self) -> argparse.Namespace:
32+
""" Create base args """
3233
args = argparse.Namespace()
3334
args.command = 'parseSTARFusion'
3435
args.source = 'Fusion'
35-
args.input_path = self.data_dir/'fusion/star_fusion.txt'
3636
args.index_dir = None
3737
args.genome_fasta = self.data_dir/'genome.fasta'
3838
args.annotation_gtf = self.data_dir/'annotation.gtf'
3939
args.reference_source = None
4040
args.output_path = self.work_dir/'star_fusion.gvf'
4141
args.min_est_j = 3.0
4242
args.quiet = True
43+
args.skip_failed =False
44+
return args
45+
46+
def test_star_fusion_record_case1(self):
47+
""" Test parseSTARFusion """
48+
args = self.create_base_args()
49+
args.input_path = self.data_dir/'fusion/star_fusion.txt'
4350
cli.parse_star_fusion(args)
4451
files = {str(file.name) for file in self.work_dir.glob('*')}
4552
expected = {'star_fusion.gvf'}
@@ -64,19 +71,23 @@ def test_star_fusion_record_case1(self):
6471

6572
def test_parse_star_fusion_case1(self):
6673
""" test parseSTARFusion case1 """
67-
args = argparse.Namespace()
68-
args.command = 'parseSTARFusion'
74+
args = self.create_base_args()
6975
args.input_path = self.data_dir/'fusion/star_fusion.txt'
70-
args.source = 'Fusion'
71-
args.index_dir = None
72-
args.genome_fasta = self.data_dir/'genome.fasta'
73-
args.annotation_gtf = self.data_dir/'annotation.gtf'
74-
args.reference_source = None
75-
args.output_path = self.work_dir/'star_fusion.gvf'
76-
args.min_est_j = 3.0
77-
args.quiet = True
7876
cli.parse_star_fusion(args)
7977
files = {str(file.name) for file in self.work_dir.glob('*')}
8078
expected = {'star_fusion.gvf'}
8179
self.assertEqual(files, expected)
8280
self.assert_gvf_order(args.output_path, args.annotation_gtf)
81+
82+
def test_parse_star_fusion_skip_failed(self):
83+
""" test parseSTARFusion case1 """
84+
from moPepGen import parser
85+
parser.STARFusionParser.STARFusionRecord.convert_to_variant_records = Mock(
86+
side_effect=ValueError()
87+
)
88+
args = self.create_base_args()
89+
args.input_path = self.data_dir/'fusion/star_fusion.txt'
90+
with self.assertRaises(ValueError):
91+
cli.parse_star_fusion(args)
92+
args.skip_failed = True
93+
cli.parse_star_fusion(args)

0 commit comments

Comments
 (0)