4
4
[callVariant](call-variant.md). Noted that only known circRNA is supported (
5
5
\*_circular_known.txt) """
6
6
from __future__ import annotations
7
+ from typing import TYPE_CHECKING
7
8
import argparse
8
- from typing import List , Dict
9
9
from pathlib import Path
10
10
from moPepGen import get_logger , circ , err
11
11
from moPepGen .parser import CIRCexplorerParser
12
12
from moPepGen .cli import common
13
13
14
14
15
+ if TYPE_CHECKING :
16
+ from typing import List , Dict
17
+ from logging import Logger
18
+
15
19
INPUT_FILE_FORMATS = ['.tsv' , '.txt' ]
16
20
OUTPUT_FILE_FORMATS = ['.gvf' ]
17
21
@@ -74,16 +78,48 @@ def add_subparser_parse_circexplorer(subparsers:argparse._SubParsersAction):
74
78
default = '-100,5' ,
75
79
metavar = '<number>'
76
80
)
81
+ common .add_args_skip_failed (p )
77
82
common .add_args_source (p )
78
83
common .add_args_reference (p , genome = False , proteome = False )
79
84
common .add_args_debug_level (p )
80
85
p .set_defaults (func = parse_circexplorer )
81
86
common .print_help_if_missing_args (p )
82
87
return p
83
88
89
+ class TallyTable ():
90
+ """ Tally table """
91
+ def __init__ (self , logger :Logger ):
92
+ """ Constructor """
93
+ self .total :int = 0
94
+ self .succeed :int = 0
95
+ self .skipped :TallyTableSkipped = TallyTableSkipped ()
96
+ self .logger = logger
97
+
98
+ def log (self ):
99
+ """ Show tally results """
100
+ self .logger .info ("Summary:" )
101
+ self .logger .info ("Totally records read: %i" , self .total )
102
+ self .logger .info ("Records successfully processed: %i" , self .succeed )
103
+ self .logger .info ("Records skipped: %i" , self .skipped .total )
104
+ if self .skipped .total > 0 :
105
+ self .logger .info ("Out of those skipped," )
106
+ self .logger .info (" Invalid circRNA record: %i" , self .skipped .invalid_record )
107
+ self .logger .info (" Insufficient evidence: %i" , self .skipped .insufficient_evidence )
108
+
109
+ class TallyTableSkipped ():
110
+ """ Tally table for failed ones """
111
+ def __init__ (self ):
112
+ """ constructor """
113
+ self .invalid_gene_id :int = 0
114
+ self .invalid_position :int = 0
115
+ self .insufficient_evidence :int = 0
116
+ self .invalid_record :int = 0
117
+ self .total :int = 0
118
+
84
119
def parse_circexplorer (args :argparse .Namespace ):
85
120
""" Parse circexplorer known circRNA results. """
86
121
logger = get_logger ()
122
+ tally = TallyTable (logger )
87
123
88
124
input_path :Path = args .input_path
89
125
output_path :Path = args .output_path
@@ -104,11 +140,16 @@ def parse_circexplorer(args:argparse.Namespace):
104
140
circ_records :Dict [str , List [circ .CircRNAModel ]] = {}
105
141
106
142
for record in CIRCexplorerParser .parse (input_path , args .circexplorer3 ):
143
+ tally .total += 1
107
144
if not args .circexplorer3 :
108
145
if not record .is_valid (args .min_read_number ):
146
+ tally .skipped .total += 1
147
+ tally .skipped .insufficient_evidence += 1
109
148
continue
110
149
elif not record .is_valid (args .min_read_number , args .min_fbr_circ , \
111
150
args .min_circ_score ):
151
+ tally .skipped .total += 1
152
+ tally .skipped .insufficient_evidence += 1
112
153
continue
113
154
try :
114
155
circ_record = record .convert_to_circ_rna (anno , intron_start_range ,
@@ -119,13 +160,17 @@ def parse_circexplorer(args:argparse.Namespace):
119
160
" Skipping it from parsing." ,
120
161
record .name , record .isoform_name
121
162
)
163
+ tally .skipped .invalid_record += 1
164
+ tally .skipped .total += 1
122
165
continue
123
166
except err .IntronNotFoundError :
124
167
logger .warning (
125
168
"The CIRCexplorer record %s from transcript %s contains an unknown"
126
169
" intron. Skipping it from parsing." ,
127
170
record .name , record .isoform_name
128
171
)
172
+ tally .skipped .invalid_record += 1
173
+ tally .skipped .total += 1
129
174
continue
130
175
except :
131
176
logger .error ('Exception raised from record: %s' , record .name )
@@ -135,21 +180,20 @@ def parse_circexplorer(args:argparse.Namespace):
135
180
circ_records [gene_id ] = []
136
181
circ_records [gene_id ].append (circ_record )
137
182
138
- if not circ_records :
139
- logger . warning ( 'No variant record is saved.' )
140
- return
183
+ if circ_records :
184
+ genes_rank = anno . get_genes_rank ( )
185
+ ordered_keys = sorted ( circ_records . keys (), key = lambda x : genes_rank [ x ])
141
186
142
- genes_rank = anno .get_genes_rank ()
143
- ordered_keys = sorted (circ_records .keys (), key = lambda x :genes_rank [x ])
187
+ records = []
188
+ for key in ordered_keys :
189
+ val = circ_records [key ]
190
+ records .extend (val )
144
191
145
- records = []
146
- for key in ordered_keys :
147
- val = circ_records [key ]
148
- records .extend (val )
192
+ metadata = common .generate_metadata (args )
149
193
150
- metadata = common .generate_metadata (args )
194
+ with open (output_path , 'w' ) as handle :
195
+ circ .io .write (records , metadata , handle )
151
196
152
- with open (output_path , 'w' ) as handle :
153
- circ .io .write (records , metadata , handle )
197
+ logger .info ("CircRNA records written to disk." )
154
198
155
- logger . info ( "CircRNA records written to disk." )
199
+ tally . log ( )
0 commit comments