Skip to content

Commit f4b95a4

Browse files
Dom LaetschDom Laetsch
Dom Laetsch
authored and
Dom Laetsch
committed
getting ready for release
1 parent d54b7e8 commit f4b95a4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+234
-626654
lines changed

.gitignore

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
testing/
2-
testing_fly/
3-
*.json
1+
nodesDB.txt
42
*.pyc
5-
*.txt
6-
*.png
7-
*.tsv
3+
*.png

lib/BtCore.py bloblib/BtCore.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33

4+
"""
5+
File : BtCore.py
6+
Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
7+
"""
8+
49
from __future__ import division
5-
import lib.BtLog as BtLog
6-
import lib.BtIO as BtIO
7-
import lib.BtTax as BtTax
10+
import bloblib.BtLog as BtLog
11+
import bloblib.BtIO as BtIO
12+
import bloblib.BtTax as BtTax
813
from os.path import abspath, isfile, basename, isdir, join
914
from os import getcwd, mkdir
1015
import json
@@ -39,7 +44,6 @@ def view(self, **kwargs):
3944
seqs = kwargs['seqs']
4045
cov_libs = kwargs['cov_libs']
4146
progress_bar = kwargs['progressbar']
42-
4347
# Default sequences if no subset
4448
if not (seqs):
4549
seqs = self.order_of_blobs
@@ -219,9 +223,6 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
219223
read_cov_dict = {}
220224
max_cov = 0.0
221225

222-
if taxrule not in self.taxrules:
223-
BtLog.error('11', taxrule, self.taxrules)
224-
225226
cov_lib_dict = self.covLibs
226227
cov_lib_names_l = self.covLibs.keys() # does not include cov_sum
227228
if len(cov_lib_names_l) > 1:
@@ -236,8 +237,13 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
236237
if (catcolour_dict): # annotation with categories specified in catcolour
237238
group = str(catcolour_dict[name])
238239
elif (c_index): # annotation with c_index instead of taxonomic group
239-
group = str(blob['taxonomy'][taxrule][rank]['c_index'])
240+
if taxrule not in self.taxrules:
241+
BtLog.error('11', taxrule, self.taxrules)
242+
else:
243+
group = str(blob['taxonomy'][taxrule][rank]['c_index'])
240244
else: # annotation with taxonomic group
245+
if taxrule not in self.taxrules:
246+
BtLog.warn_d['9'] % (taxrule, self.taxrules)
241247
group = str(blob['taxonomy'][taxrule][rank]['tax'])
242248
if not group in data_dict:
243249
data_dict[group] = {
@@ -360,6 +366,9 @@ def parseCoverage(self, **kwargs):
360366
covLib.cov_sum += cov
361367
self.dict_of_blobs[name].addCov(covLib.name, cov)
362368
self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
369+
out_f = BtIO.getOutFile(covLib.f, None, None)
370+
covView = ViewObj(name="covlib", out_f=out_f, suffix="cov", header="", body=[])
371+
self.view(viewObjs=[covView], ranks=None, taxrule=None, hits_flag=None, seqs=None, cov_libs=[covLib.name], progressbar=False)
363372

364373
elif covLib.fmt == 'cov':
365374
base_cov_dict, covLib.reads_total, covLib.reads_mapped, covLib.reads_unmapped, read_cov_dict = BtIO.parseCov(covLib.f, set(self.dict_of_blobs))

lib/BtIO.py bloblib/BtIO.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33

44
"""
55
File : BtIO.py
6-
Version : 0.1
76
Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
8-
Bugs : ?
9-
To do : ?
107
"""
118

129
from __future__ import division
@@ -16,7 +13,7 @@
1613
import shutil
1714
import os
1815
import sys
19-
import lib.BtLog as BtLog
16+
import bloblib.BtLog as BtLog
2017
from collections import deque
2118

2219

lib/BtLog.py bloblib/BtLog.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33

44
"""
55
File : BtLog.py
6-
Version : 0.1
76
Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
8-
Bugs : ?
9-
To do : ?
107
"""
8+
119
from __future__ import division
1210
import sys
1311
from os.path import basename
@@ -60,13 +58,13 @@ def progress(iteration, steps, max_value, no_limit=False):
6058
'22' : '[ERROR:22]\t: Tax file %s seems to have no taxids.',
6159
'23' : '[ERROR:23]\t: Catcolour file %s does not seem to have the right format.',
6260
'24' : '[ERROR:24]\t: Catcolour file incompatible with c-index colouring.',
63-
'25' : '[ERROR:25]\t: Cov file %s does not seem to have the right format.',
61+
'25' : '[ERROR:25]\t: COV file %s does not seem to have the right format.',
6462
'26' : '[ERROR:26]\t: Please specify type similarity search result (--rnacentral FILE or --diamond FILE) or a single TAXID (--taxid INT)',
6563
'27' : '[ERROR:27]\t: nodesDB ("--db") %s could not be read.',
6664
'28' : '[ERROR:28]\t: Please specify "--names" and "--nodes", or "--db"',
6765
'29' : '[ERROR:29]\t: No mapping reads found in %s',
6866
'30' : '[ERROR:30]\t: The module docopt is not installed. Please install it to run blobtools\n\tpip install docopt',
69-
'31' : '[ERROR:31]\t: Please specify a read mapping file',
67+
'31' : '[ERROR:31]\t: Please specify a read mapping file (BAM/SAM/CAS)',
7068
'32' : '[ERROR:32]\t: Choose either --cumulative or --multiplot',
7169
'33' : '[ERROR:33] : CovLib(s) not found. The available covlibs are: \n%s',
7270
'34' : '[ERROR:34] : Invalid plot type : %s',
@@ -75,6 +73,7 @@ def progress(iteration, steps, max_value, no_limit=False):
7573
'37' : '[ERROR:37] : %s does not seem to be a valid %s file',
7674
'38' : '[ERROR:38] : %s is not an integer',
7775
'39' : '[ERROR:39] : Please specify a taxid file (mapping subjects to taxids)',
76+
'40' : '[ERROR:40] : CovLib \'%s\' not specified in refcov file'
7877
}
7978

8079
warn_d = {
@@ -86,7 +85,9 @@ def progress(iteration, steps, max_value, no_limit=False):
8685
'5' : '[WARN]\t\t: Hit for sequence %s in tax file %s has multiple taxIds, only first one is used.',
8786
'6' : '[WARN]\t\t: Sum of coverage in cov lib %s is 0.0. Please ignore this warning if "--no_base_cov" was specified.',
8887
'7' : '[WARN]\t\t: No taxonomy information found.',
89-
'8' : '[WARN]\t\t: Duplicated sequences found :\n\t\t\t%s'
88+
'8' : '[WARN]\t\t: Duplicated sequences found :\n\t\t\t%s',
89+
'9' : '[WARN]\t\t: Taxrule "%s" was not computed for this BlobDb. Available taxrule(s) : %s. Will proceed without taxonomic annotation ...'
90+
9091
}
9192
status_d = {
9293
'0': '[STATUS]\t: Nothing to be done. %s',
@@ -98,15 +99,15 @@ def progress(iteration, steps, max_value, no_limit=False):
9899
'6' : '[STATUS]\t: Computing taxonomy using taxrule(s) %s',
99100
'7' : '[STATUS]\t: Generating BlobDB and writing to file %s',
100101
'8' : '[STATUS]\t: Plotting %s',
101-
'9' : '[STATUS]\t: Reading BlobDb %s',
102+
'9' : '[STATUS]\t: Reading BlobDB %s',
102103
'10': '[STATUS]\t: \tChecking with \'samtools flagstat\'',
103104
'11': '[STATUS]\t: \tMapping reads = %s, total reads = %s (mapping rate = %s)',
104105
'12': '[STATUS]\t: \tChecking with \'clc_mapping_info\'',
105106
'13': '[STATUS]\t: \tWriting %s',
106107
'14': '[STATUS]\t: Preparing view(s) ...',
107-
'15': '[STATUS]\t: \tLoading BlobDb into memory ...',
108-
'16': '[STATUS]\t: \tSerialising BlobDb (using \'%s\' module) (this may take a while) ...',
109-
'17': '[STATUS]\t: \tSerialising BlobDb (using \'%s\' module) (this may take a while, consider installing the \'ujson\' module) ...',
108+
'15': '[STATUS]\t: \tLoading BlobDB into memory ...',
109+
'16': '[STATUS]\t: \tSerialising BlobDB (using \'%s\' module) (this may take a while) ...',
110+
'17': '[STATUS]\t: \tSerialising BlobDB (using \'%s\' module) (this may take a while, consider installing the \'ujson\' module) ...',
110111
'18': '[STATUS]\t: Extracting data for plots ...',
111112
'19': '[STATUS]\t: Writing output ...',
112113
'20': '[STATUS]\t: \tFinished in %ss',

lib/BtPlot.py bloblib/BtPlot.py

+55-41
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,15 @@
33

44
"""
55
File : BtPlot.py
6-
Version : 0.1
76
Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
8-
Bugs : ?
9-
To do : ?
107
"""
8+
119
from __future__ import division
1210
from numpy import array, arange, logspace, mean, std
1311
import math
14-
import lib.BtLog as BtLog
15-
import lib.BtIO as BtIO
16-
import lib.BtTax as BtTax
12+
import bloblib.BtLog as BtLog
13+
import bloblib.BtIO as BtIO
14+
import bloblib.BtTax as BtTax
1715
import matplotlib as mat
1816
from matplotlib import cm
1917
from matplotlib.ticker import NullFormatter, MultipleLocator, AutoMinorLocator
@@ -91,17 +89,13 @@ def set_format_scatterplot(axScatter, **kwargs):
9189
min_y, max_y = 0.005, kwargs['max_cov']+1000
9290
axScatter.set_yscale('log')
9391
axScatter.set_xscale('linear')
94-
axScatter.set_xlabel("GC proportion")
95-
axScatter.set_ylabel("Coverage")
9692
axScatter.xaxis.set_major_locator(major_xticks)
9793
axScatter.xaxis.set_minor_locator(minor_xticks)
9894
elif kwargs['plot'] == 'covplot':
9995
min_x, max_x = 0.005, kwargs['max_cov']+1000
10096
min_y, max_y = 0.005, kwargs['max_cov']+1000
10197
axScatter.set_yscale('log')
10298
axScatter.set_xscale('log')
103-
axScatter.set_xlabel(kwargs['xlabel'])
104-
axScatter.set_ylabel(kwargs['ylabel'])
10599
else:
106100
BtLog.error('34' % kwargs['plot'])
107101
axScatter.set_xlim( (min_x, max_x) )
@@ -211,12 +205,14 @@ def __init__(self, data_dict, cov_lib_dict, cov_lib_selection, plot_type):
211205
self.labels = {'all'}
212206
self.plot = plot_type # type of plot
213207
self.group_labels = {}
208+
self.cov_lib_dict = cov_lib_dict
214209
self.cov_libs = self.subselect_cov_libs(cov_lib_dict, cov_lib_selection)
215210
self.cov_libs_total_reads_dict = self.get_cov_libs_total_reads_dict(cov_lib_dict)
216211
self.cov_libs_mapped_reads_dict = self.get_cov_libs_mapped_reads_dict(cov_lib_dict)
217212
self.data_dict = data_dict
218213
self.stats = {}
219214
self.exclude_groups = []
215+
self.version = None
220216
self.colours = {}
221217
self.group_order = []
222218
self.plot_order = []
@@ -230,10 +226,8 @@ def __init__(self, data_dict, cov_lib_dict, cov_lib_selection, plot_type):
230226
self.cumulative_flag = ''
231227

232228
self.cov_y_dict = {}
233-
self.xlabel = ''
234-
self.ylabel = ''
235-
self.xmax = ''
236-
self.ymax = ''
229+
self.xlabel = None
230+
self.ylabel = None
237231

238232
self.refcov_dict = {}
239233

@@ -298,18 +292,33 @@ def write_stats(self, out_f):
298292
for g, labels in self.group_labels.items():
299293
if label in labels:
300294
stats.append(self.get_stats_for_group(g))
301-
295+
output = []
296+
output.append('## %s' % self.version)
297+
for cov_lib, cov_lib_dict in self.cov_lib_dict.items():
298+
if cov_lib in self.cov_libs:
299+
output.append("## %s=%s" % (cov_lib, cov_lib_dict['f']))
300+
fields = ['name', 'colour', 'count_visible', 'count_visible_perc', 'span_visible','span_visible_perc', 'n50', 'gc_mean', 'gc_std']
301+
header = [field for field in fields]
302+
for cov_lib in sorted(self.cov_libs):
303+
header.append('%s_mean' % cov_lib)
304+
header.append('%s_std' % cov_lib)
305+
header.append('%s_read_map' % cov_lib)
306+
header.append('%s_read_map_p' % cov_lib)
307+
output.append('# %s' % "\t".join(header))
308+
for stat in stats:
309+
line = []
310+
for field in fields:
311+
line.append(stat[field])
312+
for cov_lib in sorted(self.cov_libs):
313+
line.append(stat['cov_mean'][cov_lib])
314+
line.append(stat['cov_std'][cov_lib])
315+
line.append(stat['reads_mapped'][cov_lib])
316+
line.append(stat['reads_mapped_perc'][cov_lib])
317+
output.append("%s" % "\t".join(line))
302318
out_f = "%s.stats.txt" % out_f
303319
with open(out_f, 'w') as fh:
304-
for cov_lib in sorted(self.cov_libs):
305-
fh.write("# %s - %s\n" % (self.out_f, cov_lib))
306-
fh.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ('Group', 'colour', 'count', 'visible-%', 'span', 'visible-%', 'n50', 'GC-mean', 'GC-std', 'Cov-mean', 'Cov-std', 'Mapped-reads', 'Mapped-reads-%'))
307-
for stat in stats:
308-
fh.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (\
309-
stat['name'], stat['colour'], stat['count_visible'], stat['count_visible_perc'], stat['span_visible'], \
310-
stat['span_visible_perc'], stat['n50'], stat['gc_mean'], stat['gc_std'], stat['cov_mean'][cov_lib], \
311-
stat['cov_std'][cov_lib], stat['reads_mapped'][cov_lib], stat['reads_mapped_perc'][cov_lib]))
312-
320+
print BtLog.status_d['24'] % ("%s" % out_f)
321+
fh.write("\n".join(output))
313322

314323
def compute_stats(self):
315324
stats = {}
@@ -334,6 +343,7 @@ def compute_stats(self):
334343
'count_hidden' : 0,
335344
'span_hidden' : 0
336345
}
346+
337347
for group, labels in self.group_labels.items():
338348
for label in labels:
339349
stats[label]['name'] = stats[label]['name'] + self.data_dict[group]['name']
@@ -372,10 +382,8 @@ def relabel_and_colour(self, colour_dict, user_labels):
372382
colour_groups = [group if not (group in user_labels) else user_labels[group] for group in groups]
373383
colour_dict = generateColourDict(colour_groups)
374384
for idx, group in enumerate(self.group_order):
375-
if (self.exclude_groups):
376-
if group in self.exclude_groups:
377-
self.group_labels[group].add('other')
378-
self.colours[group] = WHITE
385+
if group in self.exclude_groups:
386+
pass
379387
elif group in user_labels:
380388
label = user_labels[group]
381389
self.group_labels[group].add(label)
@@ -408,7 +416,9 @@ def setupPlot(self, plot):
408416
# Setting up plots and axes
409417
fig = plt.figure(1, figsize=(35,35), dpi=400)
410418
axScatter = plt.axes(rect_scatter, axisbg=BGGREY)
411-
axScatter = set_format_scatterplot(axScatter, max_cov=self.max_cov, plot=plot, xlabel=self.xlabel, ylabel=self.ylabel)
419+
axScatter = set_format_scatterplot(axScatter, max_cov=self.max_cov, plot=plot)
420+
axScatter.set_xlabel(self.xlabel)
421+
axScatter.set_ylabel(self.ylabel)
412422
axHistx = plt.axes(rect_histx, axisbg=BGGREY)
413423
axHistx = set_format_hist_x(axHistx, axScatter)
414424
axHisty = plt.axes(rect_histy, axisbg=BGGREY)
@@ -475,16 +485,20 @@ def plotBar(self, cov_lib, out_f):
475485
ax_main_data['labels'].append('Mapped (assembly)')
476486
ax_main_data['values'].append(reads_mapped/reads_total)
477487
ax_main_data['colours'].append(DGREY)
478-
if cov_lib in self.refcov_dict:
479-
reads_total_ref = refcov_dict[cov_lib]['reads_total']
480-
reads_mapped_ref = refcov_dict[cov_lib]['reads_mapped']
481-
reads_unmapped_ref = reads_total_ref - reads_mapped_ref
482-
ax_main_data['labels'].append('Unmapped (ref)')
483-
ax_main_data['values'].append(reads_unmapped_ref/reads_total_ref)
484-
ax_main_data['colours'].append(DGREY)
485-
ax_main_data['labels'].append('Mapped (ref)')
486-
ax_main_data['values'].append(reads_mapped_ref/reads_total_ref)
487-
ax_main_data['colours'].append(DGREY)
488+
if (self.refcov_dict):
489+
if cov_lib in self.refcov_dict:
490+
reads_total_ref = self.refcov_dict[cov_lib]['reads_total']
491+
reads_mapped_ref = self.refcov_dict[cov_lib]['reads_mapped']
492+
reads_unmapped_ref = reads_total_ref - reads_mapped_ref
493+
ax_main_data['labels'].append('Unmapped (ref)')
494+
ax_main_data['values'].append(reads_unmapped_ref/reads_total_ref)
495+
ax_main_data['colours'].append(DGREY)
496+
ax_main_data['labels'].append('Mapped (ref)')
497+
ax_main_data['values'].append(reads_mapped_ref/reads_total_ref)
498+
ax_main_data['colours'].append(DGREY)
499+
else:
500+
BtLog.error('40', cov_lib)
501+
488502
# mapped plotted groups
489503
for group in self.plot_order:
490504
ax_group_data['labels'].append(group)
@@ -524,7 +538,7 @@ def plotScatter(self, cov_lib, info_flag, out_f):
524538
if group == 'no-hit':
525539
alpha = 0.5
526540
group_length_array = array(self.stats[group]['length'])
527-
if len(group_length_array) > 0:
541+
if len(group_length_array) > 0 and group not in self.exclude_groups:
528542
colour = self.colours[group]
529543
group_x_array = ''
530544
group_y_array = ''
@@ -567,7 +581,7 @@ def plotScatter(self, cov_lib, info_flag, out_f):
567581
axScatter.scatter(group_x_array, group_y_array, color = colour, s = marker_size_array, lw = lw, alpha=alpha, edgecolor=BLACK, label=label)
568582
axLegend.axis('off')
569583
if (self.multiplot):
570-
fig_m, axScatter_m, axHistx_m, axHisty_m, axLegend_m = self.setupPlot()
584+
fig_m, axScatter_m, axHistx_m, axHisty_m, axLegend_m, top_bins, right_bins = self.setupPlot(self.plot)
571585
legend_handles_m = []
572586
legend_labels_m = []
573587
legend_handles_m.append(Line2D([0], [0], linewidth = 0.5, linestyle="none", marker="o", alpha=1, markersize=24, markerfacecolor=colour))

lib/BtTax.py bloblib/BtTax.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33

44
"""
55
File : BtTax.py
6-
Version : 0.1
76
Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
8-
Bugs : ?
9-
To do : ?
107
"""
8+
119
from __future__ import division
1210
RANKS = ['species', 'genus', 'family', 'order', 'phylum', 'superkingdom']
1311
TAXRULES = ['bestsum', 'bestsumorder'] # this should be re-named colour rules at one point
File renamed without changes.

0 commit comments

Comments
 (0)