3
3
GVF file. The GVF file can be later used to call variant peptides using
4
4
[callVariant](call-variant.md)."""
5
5
from __future__ import annotations
6
+ from typing import TYPE_CHECKING
6
7
import argparse
7
8
from typing import List
8
9
from moPepGen import get_logger , seqvar , parser , err
9
10
from moPepGen .cli import common
10
11
11
12
13
+ if TYPE_CHECKING :
14
+ from logging import Logger
15
+
12
16
INPUT_FILE_FORMATS = ['.tsv' , '.txt' ]
13
17
OUTPUT_FILE_FORMATS = ['.gvf' ]
14
18
@@ -37,16 +41,48 @@ def add_subparser_parse_star_fusion(subparsers:argparse._SubParsersAction):
37
41
default = 5.0 ,
38
42
metavar = '<number>'
39
43
)
44
+ common .add_args_skip_failed (p )
40
45
common .add_args_source (p )
41
46
common .add_args_reference (p , proteome = False )
42
47
common .add_args_debug_level (p )
43
48
p .set_defaults (func = parse_star_fusion )
44
49
common .print_help_if_missing_args (p )
45
50
return p
46
51
52
+ class TallyTable ():
53
+ """ Tally table """
54
+ def __init__ (self , logger :Logger ):
55
+ """ Constructor """
56
+ self .total :int = 0
57
+ self .succeed :int = 0
58
+ self .skipped :TallyTableSkipped = TallyTableSkipped ()
59
+ self .logger = logger
60
+
61
+ def log (self ):
62
+ """ Show tally results """
63
+ self .logger .info ("Summary:" )
64
+ self .logger .info ("Totally records read: %i" , self .total )
65
+ self .logger .info ("Records successfully processed: %i" , self .succeed )
66
+ self .logger .info ("Records skipped: %i" , self .skipped .total )
67
+ if self .skipped .total > 0 :
68
+ self .logger .info ("Out of those skipped," )
69
+ self .logger .info (" Invalid gene ID: %i" , self .skipped .invalid_gene_id )
70
+ self .logger .info (" Invalid position: %i" , self .skipped .invalid_position )
71
+ self .logger .info (" Insufficient evidence: %i" , self .skipped .insufficient_evidence )
72
+
73
+ class TallyTableSkipped ():
74
+ """ Tally table for failed ones """
75
+ def __init__ (self ):
76
+ """ constructor """
77
+ self .invalid_gene_id :int = 0
78
+ self .invalid_position :int = 0
79
+ self .insufficient_evidence :int = 0
80
+ self .total :int = 0
81
+
47
82
def parse_star_fusion (args :argparse .Namespace ) -> None :
48
83
""" Parse the STAR-Fusion's output and save it in GVF format. """
49
84
logger = get_logger ()
85
+ tally = TallyTable (logger )
50
86
# unpack args
51
87
fusion = args .input_path
52
88
common .validate_file_format (
@@ -64,13 +100,23 @@ def parse_star_fusion(args:argparse.Namespace) -> None:
64
100
variants :List [seqvar .VariantRecord ] = []
65
101
66
102
for record in parser .STARFusionParser .parse (fusion ):
103
+ tally .total += 1
67
104
if record .est_j < args .min_est_j :
105
+ tally .skipped .insufficient_evidence += 1
68
106
continue
69
107
try :
70
108
var_records = record .convert_to_variant_records (anno , genome )
109
+ variants .extend (var_records )
110
+ tally .succeed += 1
71
111
except err .GeneNotFoundError :
112
+ tally .skipped .invalid_gene_id += 1
72
113
continue
73
- variants .extend (var_records )
114
+ except :
115
+ if args .skip_failed :
116
+ tally .skipped .invalid_position += 1
117
+ continue
118
+ else :
119
+ raise
74
120
75
121
logger .info ('STAR-Fusion output %s loaded.' , fusion )
76
122
@@ -88,3 +134,5 @@ def parse_star_fusion(args:argparse.Namespace) -> None:
88
134
seqvar .io .write (variants , output_path , metadata )
89
135
90
136
logger .info ('Variant info written to disk.' )
137
+
138
+ tally .log ()
0 commit comments