Skip to content

Commit e607218

Browse files
author
Dominik R Laetsch
committedJul 13, 2017
Improvements to bamfilter
- blazing fast
1 parent 0d4d90e commit e607218

File tree

2 files changed

+10
-11
lines changed

2 files changed

+10
-11
lines changed
 

‎bloblib/BtIO.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ def write_read_pair_seqs(pair_count_by_type, pair_seqs_by_type, out_fs_by_type):
255255
out_f = out_fs_by_type[pair_type]
256256
with open(out_f, 'w') as out_fh:
257257
print BtLog.status_d['24'] % out_f
258-
out_fh.write("\n".join(pair_seqs_by_type[pair_type]) + "\n")
258+
#out_fh.write("\n".join(pair_seqs_by_type[pair_type]) + "\n")
259+
out_fh.write("\n".join([pair for pair in pair_seqs_by_type[pair_type]]) + "\n")
259260

260261

261262
def get_read_pair_seqs(read1, read2):
@@ -278,7 +279,8 @@ def get_read_pair_seqs(read1, read2):
278279
#fq[5] = "".join([COMPLEMENT.get(nt.upper(), '') for nt in fq[5][::-1]])
279280
fq[5] = "".join([COMPLEMENT.get(nt, '') for nt in fq[5][::-1]])
280281
fq[7] = fq[7][::-1]
281-
return tuple(fq)
282+
#return tuple(fq)
283+
return "\n".join(fq)
282284

283285

284286
def init_read_pairs(outfile, include_unmapped, include, exclude):
@@ -291,17 +293,14 @@ def init_read_pairs(outfile, include_unmapped, include, exclude):
291293
read_pair_types.append('UnUn')
292294
pair_count_by_type = {read_pair_type : 0 for read_pair_type in read_pair_types}
293295
# initialise read_pair tuples
294-
read_pair_seqs = {read_pair_type : tuple() for read_pair_type in read_pair_types}
296+
# read_pair_seqs = {read_pair_type : tuple() for read_pair_type in read_pair_types}
297+
read_pair_seqs = {read_pair_type : [] for read_pair_type in read_pair_types}
295298
# initialise read_pair files
296299
read_pair_out_fs = {}
297300
for read_pair_type in read_pair_types:
298301
read_pair_out_fs[read_pair_type] = getOutFile(outfile, None, read_pair_type + ".fq")
299302
return pair_count_by_type, read_pair_seqs, read_pair_out_fs
300303

301-
def close_fhs(used_fhs):
302-
for fh in used_fhs.values():
303-
fh.close()
304-
305304
def print_bam(read_pair_out_fs, read_pair_type, read1, read2):
306305
with open(read_pair_out_fs[read_pair_type] + ".txt", 'a') as fh:
307306
fh.write("\t".join(read1) + "\n")
@@ -356,10 +355,11 @@ def parseBamForFilter(infile, progress_flag, include_unmapped, outfile, include,
356355
read2 = sam_lines[i+1].split()
357356
read_pair_type = "".join(sorted([sequence_to_type_dict[read1[2]], sequence_to_type_dict[read2[2]]]))
358357
if read_pair_type in pair_seqs_by_type:
359-
pair_seqs_by_type[read_pair_type] += get_read_pair_seqs(read1, read2)
358+
#pair_seqs_by_type[read_pair_type] += get_read_pair_seqs(read1, read2)
359+
pair_seqs_by_type[read_pair_type].append(get_read_pair_seqs(read1, read2))
360360
pair_count_by_type[read_pair_type] += 1
361-
BtLog.progress(seen_reads, progress_unit, reads_total)
362-
except StopIteration:
361+
BtLog.progress(seen_reads, progress_unit, reads_total)
362+
except IndexError:
363363
print BtLog.warn_d['11']
364364
#print_bam(read_pair_out_fs, read_pair_type, read1, read2) # this prints SAM files for debugging
365365
if progress_flag:

‎bloblib/bamfilter.py

-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ def main():
5050
do_sort = args['--sort']
5151
keep_sorted = args['--keep']
5252
sort_threads = int(args['--threads'])
53-
5453
out_f = BtIO.getOutFile(bam_f, out_prefix, None)
5554
if include_f and exclude_f:
5655
print BtLog.error('43')

0 commit comments

Comments
 (0)
Please sign in to comment.