diff --git a/examples/BedMatrixExampleTrack.py b/examples/BedMatrixExampleTrack.py index 55d0cd3d..190d45c2 100644 --- a/examples/BedMatrixExampleTrack.py +++ b/examples/BedMatrixExampleTrack.py @@ -1,16 +1,15 @@ # -*- coding: utf-8 -*- import numpy as np -from . BedGraphTrack import BedGraphTrack -from . GenomeTrack import GenomeTrack +from . BedGraphLikeTrack import BedGraphLikeTrack -class BedGraphMatrixTrack(BedGraphTrack): - # this track class extends a BedGraphTrack that is already part of +class BedGraphMatrixTrack(BedGraphLikeTrack): + # this track class extends a BedGraphLikeTrack that is already part of # pyGenomeTracks. The advantage of extending this class is that # we can re-use the code for reading a bedgraph file SUPPORTED_ENDINGS = ['.bm', '.bm.gz'] TRACK_TYPE = 'bedgraph_matrix' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" + OPTIONS_TXT = BedGraphLikeTrack.OPTIONS_TXT + f""" # a bedgraph matrix file is like a bedgraph, except that per bin there # are more than one value (separated by tab). This file type is # produced by the HiCExplorer tool hicFindTads and contains @@ -38,12 +37,6 @@ class BedGraphMatrixTrack(BedGraphTrack): 'height': [0, np.inf]} INTEGER_PROPERTIES = {} - # In BedGraphTrack the method set_properties_defaults - # has been adapted to a coverage track. Here we want to - # go back to the initial method: - def set_properties_defaults(self): - GenomeTrack.set_properties_defaults(self) - def plot(self, ax, chrom_region, start_region, end_region): """ Args: @@ -88,7 +81,3 @@ def plot(self, ax, chrom_region, start_region, end_region): def plot_y_axis(self, ax, plot_axis): """turn off y_axis plot""" pass - - def __del__(self): - if self.tbx is not None: - self.tbx.close() diff --git a/pygenometracks/tests/test_data/master_tracks.ini b/pygenometracks/tests/test_data/master_tracks.ini index bceaf9c8..627a0b0f 100644 --- a/pygenometracks/tests/test_data/master_tracks.ini +++ b/pygenometracks/tests/test_data/master_tracks.ini @@ -132,21 +132,11 @@ height = 2 # Otherwise, each plot use its own scale #overlay_previous = yes -# If the bed file contains the exon -# structure (bed 12) then this is plotted. Otherwise -# a region **with direction** is plotted. -# If the bed file contains a column for color (column 9), then this color can be used by -# setting: -#color = bed_rgb -# if color is a valid colormap name (like RbBlGn), then the score (column 5) is mapped -# to the colormap. -# In this case, the the min_value and max_value for the score can be provided, otherwise -# the maximum score and minimum score found are used. -#color = RdYlBu -#min_value=0 -#max_value=100 -# If the color is simply a color name, then this color is used and the score is not considered. -color = darkblue +# optional: line_width +#line_width = 0.5 +# optional, default is black. To remove the border, simply set 'border_color' to none +#border_color = black + # whether printing the labels labels = false # optional: @@ -155,8 +145,6 @@ labels = false #max_labels = 60 # optional: font size can be given to override the default size fontsize = 10 -# optional: line_width -#line_width = 0.5 # the display parameter defines how the bed file is plotted. # Default is 'stacked' where regions are plotted on different lines so # we can see all regions and all labels. @@ -164,9 +152,6 @@ fontsize = 10 # These options assume that the regions do not overlap. # `collapsed`: The bed regions are plotted one after the other in one line. # `interleaved`: The bed regions are plotted in two lines, first up, then down, then up etc. -# optional, default is black. To remove the border, simply set 'border_color' to none -# Not used in tssarrow style -#border_color = black # style to plot the genes when the display is not triangles #style = UCSC #style = flybase @@ -205,9 +190,22 @@ fontsize = 10 # If you want that the tip of the arrow correspond to # the extremity of the interval use: # arrowhead_included = true + +# If the bed file contains a column for color (column 9), then this color can be used by +# setting: +#color = bed_rgb +# if color is a valid colormap name (like RbBlGn), then the score (column 5) is mapped +# to the colormap. +# In this case, the the min_value and max_value for the score can be provided, otherwise +# the maximum score and minimum score found are used. +#color = RdYlBu +#min_value=0 +#max_value=100 +# If the color is simply a color name, then this color is used and the score is not considered. +color = darkblue # optional. If not given is guessed from the file ending. file_type = bed - + [epilog.qcat] file = pygenometracks/tests/test_data/epilog.qcat.bgz diff --git a/pygenometracks/tracks/BedGeneLikeTrack.py b/pygenometracks/tracks/BedGeneLikeTrack.py new file mode 100644 index 00000000..ce790e11 --- /dev/null +++ b/pygenometracks/tracks/BedGeneLikeTrack.py @@ -0,0 +1,790 @@ +from . BedLikeTrack import BedLikeTrack, AROUND_REGION +from .. utilities import get_length_w, change_chrom_names +from matplotlib import font_manager +from matplotlib.patches import Rectangle, Polygon +from matplotlib.lines import Line2D +import matplotlib.pyplot as plt +import numpy as np + +DEFAULT_DISPLAY_BED = 'stacked' +DISPLAY_BED_VALID = ['collapsed', 'triangles', 'interleaved', 'stacked'] +DISPLAY_BED_SYNONYMOUS = {'interlaced': 'interleaved', 'domain': 'interleaved'} + + +class BedGeneLikeTrack(BedLikeTrack): + SUPPORTED_ENDINGS = [] + TRACK_TYPE = None + OPTIONS_TXT = BedLikeTrack.OPTIONS_TXT + """ +# whether printing the labels +labels = false +# optional: +# by default the labels are not printed if you have more than 60 features. +# to change it, just increase the value: +#max_labels = 60 +# optional: font size can be given to override the default size +fontsize = 10 +# the display parameter defines how the bed file is plotted. +# Default is 'stacked' where regions are plotted on different lines so +# we can see all regions and all labels. +# The other options are ['collapsed', 'interleaved', 'triangles'] +# These options assume that the regions do not overlap. +# `collapsed`: The bed regions are plotted one after the other in one line. +# `interleaved`: The bed regions are plotted in two lines, first up, then down, then up etc. +# style to plot the genes when the display is not triangles +#style = UCSC +#style = flybase +#style = tssarrow +# maximum number of gene rows to be plotted. This +# field is useful to limit large number of close genes +# to be printed over many rows. When several images want +# to be combined this must be set to get equal size +# otherwise, on each image the height of each gene changes +#gene_rows = 10 +# by default the ymax is the number of +# rows occupied by the genes in the region plotted. However, +# by setting this option, the global maximum is used instead. +# This is useful to combine images that are all consistent and +# have the same number of rows. +#global_max_row = true +# If you want to plot all labels inside the plotting region: +#all_labels_inside = true +# If you want to display the name of the gene which goes over the plotted +# region in the right margin put: +#labels_in_margin = true +# if you use UCSC style, you can set the relative distance between 2 arrows on introns +# default is 2 +#arrow_interval = 2 +# if you use tssarrow style, you can choose the length of the arrow in bp +# (default is 4% of the plotted region) +#arrow_length = 5000 +# if you use flybase or tssarrow style, you can choose the color of non-coding intervals: +#color_utr = grey +# as well as the proportion between their height and the one of coding +# (by default they are the same height): +#height_utr = 1 +# By default, for oriented intervals in flybase style, +# or bed files with less than 12 columns, the arrowhead is added +# outside of the interval. +# If you want that the tip of the arrow correspond to +# the extremity of the interval use: +# arrowhead_included = true +""" + + DEFAULTS_PROPERTIES = dict({'fontsize': 12, + 'labels': True, + 'style': 'flybase', + 'display': DEFAULT_DISPLAY_BED, + 'max_labels': 60, + 'global_max_row': False, + 'gene_rows': None, + 'arrow_interval': 2, + 'arrowhead_included': False, + 'color_utr': 'grey', + 'height_utr': 1, + 'arrow_length': None, + 'all_labels_inside': False, + 'labels_in_margin': False}, + **BedLikeTrack.DEFAULTS_PROPERTIES) + NECESSARY_PROPERTIES = BedLikeTrack.NECESSARY_PROPERTIES + SYNONYMOUS_PROPERTIES = dict({'display': DISPLAY_BED_SYNONYMOUS}, + **BedLikeTrack.SYNONYMOUS_PROPERTIES) + POSSIBLE_PROPERTIES = dict({'style': ['flybase', 'UCSC', 'tssarrow'], + 'display': DISPLAY_BED_VALID}, + **BedLikeTrack.POSSIBLE_PROPERTIES) + BOOLEAN_PROPERTIES = ['labels', 'global_max_row', + 'arrowhead_included', 'all_labels_inside', + 'labels_in_margin'] + BedLikeTrack.BOOLEAN_PROPERTIES + STRING_PROPERTIES = ['style', 'color_utr', 'display'] + \ + BedLikeTrack.STRING_PROPERTIES + FLOAT_PROPERTIES = dict({'max_value': [- np.inf, np.inf], + 'min_value': [- np.inf, np.inf], + 'fontsize': [0, np.inf], + 'height_utr': [0, 1]}, + **BedLikeTrack.FLOAT_PROPERTIES) + INTEGER_PROPERTIES = dict({'gene_rows': [0, np.inf], + 'max_labels': [0, np.inf], + 'arrow_interval': [1, np.inf], + 'arrow_length': [0, np.inf]}, + **BedLikeTrack.INTEGER_PROPERTIES) + + def set_properties_defaults(self): + BedLikeTrack.set_properties_defaults(self) + self.fp = font_manager.FontProperties(size=self.properties['fontsize']) + # to set the distance between rows + self.row_scale = 2.3 + + def get_max_num_row(self, len_w, small_relative): + ''' Process the whole bed regions at the given figure length + and font size to + determine the maximum number of rows required. + :return: + ''' + + self.max_num_row = {} + for chrom in self.interval_tree: + row_last_position = [] # each entry in this list contains the end position + self.max_num_row[chrom] = 0 + for region in sorted(self.interval_tree[chrom][0:500000000]): + bed = region.data + if self.properties['labels']: + bed_extended_end = int(bed.end + (len(bed.name) * len_w)) + else: + bed_extended_end = (bed.end + 2 * small_relative) + + # get smallest free row + if len(row_last_position) == 0: + free_row = 0 + row_last_position.append(bed_extended_end) + else: + # get list of rows that are less than bed.start, then take the min + idx_list = [idx for idx, value in enumerate(row_last_position) if value < bed.start] + if len(idx_list): + free_row = min(idx_list) + row_last_position[free_row] = bed_extended_end + else: + free_row = len(row_last_position) + row_last_position.append(bed_extended_end) + + if free_row > self.max_num_row[bed.chromosome]: + self.max_num_row[bed.chromosome] = free_row + + self.log.debug(f"max number of rows set to {self.max_num_row}") + return self.max_num_row + + def get_y_pos(self, free_row): + """ + The y_pos is set such that regions to be plotted + do not overlap (stacked). To override this + the properties['collapsed'] needs to be set. + + The algorithm uses a interval tree (self.region_interval) + to check the overlaps + and a sort of coverage vector 'rows used' + to identify the row in which to plot + :return: int y position + """ + + # if the interleaved directive is given, + # ypos simply oscilates between 0 and 1 + if self.properties['display'] == 'interleaved': + ypos = 1 \ + if self.counter % 2 == 0 \ + else 0 + # if the collapsed directive is given + # ypos is always 0 + elif self.properties['display'] == 'collapsed': + ypos = 0 + # if it is stacked + # it will got the the free_row + else: + ypos = free_row * self.row_scale + return ypos + + def plot(self, ax, chrom_region, start_region, end_region): + if chrom_region not in self.interval_tree.keys(): + chrom_region_before = chrom_region + chrom_region = change_chrom_names(chrom_region) + if chrom_region not in self.interval_tree.keys(): + self.log.warning("*Warning*\nNo interval was found when " + "overlapping with both " + f"{chrom_region_before}:{start_region - AROUND_REGION}-{end_region + AROUND_REGION}" + f" and {chrom_region}:{start_region - AROUND_REGION}-{end_region + AROUND_REGION}" + " inside the bed file. " + "This will generate an empty track!!\n") + return + chrom_region = self.check_chrom_str_bytes(self.interval_tree, + chrom_region) + + genes_overlap = \ + sorted(self.interval_tree[chrom_region][start_region:end_region]) + + if self.properties['display'] == 'triangles': + self.plot_triangles(ax, genes_overlap) + else: + self.counter = 0 + self.small_relative = 0.004 * (end_region - start_region) + if self.properties['labels']: + self.len_w = get_length_w(ax.get_figure().get_figwidth(), + start_region, end_region, + self.properties['fontsize']) + else: + self.len_w = 1 + + if self.properties['global_max_row']: + self.get_max_num_row(self.len_w, self.small_relative) + + # do not print labels when too many intervals are visible. + if self.properties['labels'] and \ + len(genes_overlap) > self.properties['max_labels']: + self.properties['labels'] = False + + linewidth = self.properties['line_width'] + max_num_row_local = 1 + max_ypos = 0 + # check for the number of other intervals that overlap + # with the given interval + # 1 2 + # 012345678901234567890123456 + # 1========= 4========= + # 2========= + # 3============ + # + # for 1 row_last_position = [9] + # for 2 row_last_position = [9, 14] + # for 3 row_last_position = [9, 14, 19] + # for 4 row_last_position = [26, 14, 19] + + row_last_position = [] + # each entry in this list contains the end position + # of genomic interval. The list index is the row + # in which the genomic interval was plotted. + # Any new genomic interval that wants to be plotted, + # knows the row to use by finding the list index that + # is larger than its start + + # check for overlapping genes including + # label size (if plotted) + + if ax.get_xlim()[0] > ax.get_xlim()[1]: + genes_overlap = reversed(genes_overlap) + for region in genes_overlap: + """ + BED12 gene format with exon locations at the end + chrX 20850 23076 CG17636-RA 0 - 20850 23017 0 3 946,765,64, 0,1031,2162, + + BED9 + bed with rgb at end + chr2L 0 70000 ID_5 0.26864549832 . 0 70000 51,160,44 + + BED6 + bed without rgb + chr2L 0 70000 ID_5 0.26864549832 . + """ + self.counter += 1 + bed = region.data + + if ax.get_xlim()[0] < ax.get_xlim()[1]: + bed_left = bed.start + bed_right = bed.end + + def add_to_right(a, b): + return a + b + + def add_to_left(a, b): + return a - b + + def is_left_to(a, b): + return a < b + + def is_right_to(a, b): + return a > b + + else: + bed_left = bed.end + bed_right = bed.start + + def add_to_right(a, b): + return a - b + + def add_to_left(a, b): + return a + b + + def is_left_to(a, b): + return a > b + + def is_right_to(a, b): + return a < b + + if self.properties['labels']: + num_name_characters = len(bed.name) + 2 + # +2 to account for a space before and after the name + bed_extended_right = int(add_to_right(bed_right, (num_name_characters * self.len_w))) + else: + bed_extended_right = add_to_right(bed_right, 2 * self.small_relative) + + bed_extended_left = bed_left + # get smallest free row + if len(row_last_position) == 0: + free_row = 0 + row_last_position.append(bed_extended_right) + else: + # If all_labels_inside = True + # genes which goes over will have their labels inside + if self.properties['all_labels_inside'] and self.properties['labels'] \ + and is_right_to(bed_extended_right, ax.get_xlim()[1]): + bed_extended_left = int(add_to_left(bed_left, (num_name_characters * self.len_w))) + # Check that the start position is not outside: + if is_left_to(bed_extended_left, ax.get_xlim()[0]): + # If it would be outside, we use the default right label + bed_extended_left = bed_left + else: + # If we keep the label to the left, we update the right extended + bed_extended_right = add_to_right(bed_right, 2 * self.small_relative) + + # get list of rows that are left to bed_extended_left, then take the min + idx_list = [idx for idx, value in enumerate(row_last_position) + if is_left_to(value, bed_extended_left)] + if len(idx_list): + free_row = min(idx_list) + row_last_position[free_row] = bed_extended_right + else: + free_row = len(row_last_position) + row_last_position.append(bed_extended_right) + + rgb = self.get_rgb(bed) + edgecolor = self.get_rgb(bed, param='border_color', default=rgb) + + ypos = self.get_y_pos(free_row) + + # do not plot if the maximum interval rows to plot is reached + if self.properties['gene_rows'] is not None and \ + free_row >= self.properties['gene_rows']: + continue + + if free_row > max_num_row_local: + max_num_row_local = free_row + if ypos > max_ypos: + max_ypos = ypos + + if self.properties['style'] == 'tssarrow': + self.draw_gene_tssarrow_style(ax, bed, ypos, rgb, + linewidth) + elif self.bed_type == 'bed12': + if self.properties['style'] == 'flybase': + self.draw_gene_with_introns_flybase_style(ax, bed, ypos, + rgb, edgecolor, + linewidth) + else: + self.draw_gene_with_introns(ax, bed, ypos, rgb, edgecolor, + linewidth) + else: + self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor, linewidth) + + if not self.properties['labels']: + pass + elif bed_extended_left != bed_left: + # The label will be plotted before + ax.text(add_to_left(bed_left, self.small_relative), + ypos + (1 / 2), + bed.name, horizontalalignment='right', + verticalalignment='center', fontproperties=self.fp) + elif bed_right > start_region and bed_right < end_region: + ax.text(add_to_right(bed_right, self.small_relative), + ypos + 0.5, + bed.name, horizontalalignment='left', + verticalalignment='center', fontproperties=self.fp) + elif self.properties['labels_in_margin'] \ + and (bed_right == end_region or is_right_to(bed_right, end_region)): + ax.text(add_to_right(ax.get_xlim()[1], self.small_relative), + ypos + (1 / 2), + bed.name, horizontalalignment='left', + verticalalignment='center', fontproperties=self.fp) + + if self.counter == 0: + self.log.warning("*Warning* No intervals were found for file" + f" {self.properties['file']} in " + f"section '{self.properties['section_name']}'" + " for the interval plotted" + f" ({chrom_region}:{start_region}-{end_region}).\n") + + epsilon = 0.08 + ymax = - epsilon + + # We set ymin and ymax to have genes centered epsilon from the border + + if self.properties['global_max_row']: + max_ypos = self.max_num_row[chrom_region] * self.row_scale + + elif self.properties['gene_rows'] is not None: + max_ypos = self.properties['gene_rows'] * self.row_scale + + ymin = max_ypos + (1 + epsilon) + + self.log.debug(f"ylim {ymin},{ymax}") + # the axis is inverted (thus, ymax < ymin) + ax.set_ylim(ymin, ymax) + + if self.properties['display'] == 'interleaved': + ax.set_ylim(2 + epsilon, ymax) + elif self.properties['display'] == 'collapsed': + ax.set_ylim(1 + epsilon, ymax) + + def plot_y_axis(self, ax, plot_axis): + if self.colormap is not None: + self.colormap.set_array([]) + + cobar = plt.colorbar(self.colormap, ax=ax, fraction=1, + orientation='vertical') + + cobar.solids.set_edgecolor("face") + cobar.ax.tick_params(labelsize='smaller') + cobar.ax.yaxis.set_ticks_position('left') + # adjust the labels of the colorbar + ticks = cobar.ax.get_yticks() + labels = cobar.ax.set_yticklabels(ticks.astype('float32')) + (vmin, vmax) = cobar.mappable.get_clim() + for idx in np.where(ticks == vmin)[0]: + # if the label is at the start of the colobar + # move it above avoid being cut or overlapping with other track + labels[idx].set_verticalalignment('bottom') + for idx in np.where(ticks == vmax)[0]: + # if the label is at the end of the colobar + # move it a bit inside to avoid overlapping + # with other labels + labels[idx].set_verticalalignment('top') + + def draw_gene_simple(self, ax, bed, ypos, rgb, edgecolor, linewidth): + """ + draws an interval with direction (if given) + """ + + if bed.strand not in ['+', '-']: + ax.add_patch(Rectangle((bed.start, ypos), + bed.end - bed.start, 1, + edgecolor=edgecolor, facecolor=rgb, + linewidth=linewidth)) + else: + vertices = self._draw_arrow(bed.start, bed.end, bed.strand, ypos) + ax.add_patch(Polygon(vertices, closed=True, fill=True, + edgecolor=edgecolor, + facecolor=rgb, + linewidth=linewidth)) + + def draw_gene_with_introns_flybase_style(self, ax, bed, ypos, rgb, + edgecolor, linewidth): + """ + draws a gene like in flybase gbrowse. + """ + if bed.block_count == 0 and bed.thick_start == bed.start and \ + bed.thick_end == bed.end: + self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor, linewidth) + return + half_height = 1 / 2 + # draw 'backbone', a line from the start until the end of the gene + ax.plot([bed.start, bed.end], [ypos + half_height, ypos + half_height], + 'black', linewidth=linewidth, zorder=-1) + + # get start, end of all the blocks + positions = self._split_bed_to_blocks(bed) + + if bed.strand != '.': + # plot all blocks as rectangles except the last if the strand is + or + # the first is the strand is -, which are drawn as arrows. + if bed.strand == '-': + positions = positions[::-1] + + first_pos = positions.pop() + if first_pos[2] == 'UTR': + _rgb = self.get_rgb(bed, param='color_utr', default=rgb) + # The arrow will be centered on + # ypos + 1 / 2 + # The total height will be + # self.properties['height_utr'] + y0 = ypos + (1 - self.properties['height_utr']) / 2 + half_height = self.properties['height_utr'] / 2 + else: + _rgb = rgb + y0 = ypos + half_height = 1 / 2 + + vertices = self._draw_arrow(first_pos[0], first_pos[1], bed.strand, + y0, half_height) + + ax.add_patch(Polygon(vertices, closed=True, fill=True, + edgecolor=edgecolor, + facecolor=_rgb, + linewidth=linewidth)) + + for start_pos, end_pos, _type in positions: + if _type == 'UTR': + _rgb = self.get_rgb(bed, param='color_utr', default=rgb) + y0 = ypos + (1 - self.properties['height_utr']) / 2 + height = self.properties['height_utr'] + else: + _rgb = rgb + y0 = ypos + height = 1 + + vertices = [(start_pos, y0), (start_pos, y0 + height), + (end_pos, y0 + height), (end_pos, y0)] + + ax.add_patch(Polygon(vertices, closed=True, fill=True, + edgecolor=edgecolor, + facecolor=_rgb, + linewidth=linewidth)) + + def _draw_arrow(self, start, end, strand, ypos, half_height=None): + """ + Draws a filled arrow. + :param start: + :param end: + :param strand: + :param ypos: + :param half_height: + :return: None + """ + if half_height is None: + half_height = 1 / 2 + # The y values are common to both strands: + y0 = ypos + y1 = ypos + 2 * half_height + if strand == '+': + x0 = start + if self.properties['arrowhead_included']: + x1 = max(start, end - self.small_relative) + x2 = end + else: + x1 = end + x2 = end + self.small_relative + """ + The vertices correspond to 5 points along the path of a form like the following, + starting in the lower left corner and progressing in a clock wise manner. + + =================> + x0 x1 x2 + + """ + + vertices = [(x0, y0), (x0, y1), (x1, y1), + (x2, y0 + half_height), (x1, y0)] + + else: + if self.properties['arrowhead_included']: + x0 = min(end, start + self.small_relative) + xb = start + else: + x0 = start + xb = start - self.small_relative + x1 = end + """ + The vertices correspond to 5 points along the path of a form like the following, + starting in the lower left corner and progressing in a clock wise manner. + + <================= + xb x0 x1 + """ + vertices = [(x0, y0), (xb, y0 + half_height), (x0, y1), + (x1, y1), (x1, y0)] + + return vertices + + def _split_bed_to_blocks(self, bed): + """ + Split a bed entry into blocks to plot + :param bed: a namedtuple with at least 6 fields + :return: a list of tuple (start, end, type) with type in ['UTR', 'coding'] + """ + if self.bed_type != 'bed12': + # No thick_start, thick_end, block_count: + return [(bed.start, bed.end, 'coding')] + + # get start, end of all the blocks + positions = [] + for idx in range(0, bed.block_count): + # x0 and x1 are the start/end of the current block + x0 = bed.start + bed.block_starts[idx] + x1 = x0 + bed.block_sizes[idx] + # We deal with the special case where + # there is no coding independently + if bed.thick_start == bed.thick_end: + positions.append((x0, x1, 'UTR')) + continue + # If the beginning of the coding region + # is withing the current block + if x0 < bed.thick_start < x1: + # What is before is UTR + positions.append((x0, bed.thick_start, 'UTR')) + # The start of the interval is updated + x0 = bed.thick_start + + # If the end of the coding region + # is withing the current block + if x0 < bed.thick_end < x1: + # What is before is coding + positions.append((x0, bed.thick_end, 'coding')) + # The start of the interval is updated + x0 = bed.thick_end + + if x1 < bed.thick_start or x0 >= bed.thick_end: + type = 'UTR' + else: + type = 'coding' + + positions.append((x0, x1, type)) + + return positions + + def draw_gene_with_introns(self, ax, bed, ypos, rgb, edgecolor, linewidth): + """ + draws a gene like in UCSC + Except that for the moment no arrow are plotted + on the coding part + """ + + if bed.block_count == 0 and bed.thick_start == bed.start and bed.thick_end == bed.end: + self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor, linewidth) + return + + # draw 'backbone', a line from the start until the end of the gene + ax.plot([bed.start, bed.end], [ypos + 1 / 2, ypos + 1 / 2], 'black', linewidth=linewidth, zorder=-1) + + for idx in range(0, bed.block_count): + x0 = bed.start + bed.block_starts[idx] + x1 = x0 + bed.block_sizes[idx] + if x1 < bed.thick_start or x0 > bed.thick_end or \ + bed.thick_start == bed.thick_end: + y0 = ypos + 1 / 4 + y1 = ypos + 3 / 4 + else: + y0 = ypos + y1 = ypos + 1 + + if x0 < bed.thick_start < x1 and x0 < bed.thick_end < x1: + vertices = ([(x0, ypos + 1 / 4), + (x0, ypos + 3 / 4), + (bed.thick_start, ypos + 3 / 4), + (bed.thick_start, ypos + 1), + (bed.thick_end, ypos + 1), + (bed.thick_end, ypos + 3 / 4), + (x1, ypos + 3 / 4), + (x1, ypos + 1 / 4), + (bed.thick_end, ypos + 1 / 4), + (bed.thick_end, ypos), + (bed.thick_start, ypos), + (bed.thick_start, ypos + 1 / 4)]) + elif x0 < bed.thick_start < x1: + vertices = ([(x0, ypos + 1 / 4), + (x0, ypos + 3 / 4), + (bed.thick_start, ypos + 3 / 4), + (bed.thick_start, ypos + 1), + (x1, ypos + 1), + (x1, ypos), + (bed.thick_start, ypos), + (bed.thick_start, ypos + 1 / 4)]) + elif x0 < bed.thick_end < x1: + vertices = ([(x0, ypos), + (x0, ypos + 1), + (bed.thick_end, ypos + 1), + (bed.thick_end, ypos + 3 / 4), + (x1, ypos + 3 / 4), + (x1, ypos + 1 / 4), + (bed.thick_end, ypos + 1 / 4), + (bed.thick_end, ypos)]) + else: + vertices = ([(x0, y0), (x0, y1), (x1, y1), (x1, y0)]) + + ax.add_patch(Polygon(vertices, closed=True, fill=True, + linewidth=linewidth, + edgecolor='none', + facecolor=rgb)) + + if idx < bed.block_count - 1: + # plot small arrows over the back bone + intron_length = bed.block_starts[idx + 1] - (bed.block_starts[idx] + bed.block_sizes[idx]) + arrow_interval = self.properties['arrow_interval'] + if intron_length > self.small_relative: + intron_center = x1 + int(intron_length) / 2 + pos = np.arange(x1 + 1 * self.small_relative, + x1 + intron_length + self.small_relative, + int(arrow_interval * self.small_relative)) + # center them + pos = pos + intron_center - pos.mean() + # plot them + for xpos in pos: + self._plot_small_arrow(ax, xpos, ypos, bed.strand) + + def draw_gene_tssarrow_style(self, ax, bed, ypos, rgb, linewidth): + """ + Draw genes like this: + --> + | + ----------- ^ --- + | | | | + ----------- --- + """ + # get start, end of all the blocks + positions = self._split_bed_to_blocks(bed) + + y_bottom = ypos + 1 + y_top_intron = ypos + 1 / 4 + + if bed.strand in ["+", "-"]: + if self.properties['arrow_length'] is None: + arrow_length = 10 * self.small_relative + else: + arrow_length = self.properties['arrow_length'] + y_arrow = ypos + 1 / 8 + head_width = 1 / 4 + head_length = self.small_relative * 3 + # plot the arrow to indicate tss + if bed.strand == "+": + x = bed.start + dx = arrow_length + else: + x = bed.end + dx = - arrow_length + # First plot the vertical line: + ax.add_line(Line2D((x, x), (y_bottom, y_arrow), color=rgb, linewidth=linewidth)) + # Then the arrow + ax.arrow(x, y_arrow, dx, 0, overhang=1, width=0, + head_width=head_width, + head_length=head_length, + length_includes_head=True, + color=rgb, linewidth=linewidth) + + # plot all blocks as rectangles like in the flybase mode but + # with half the height and no border + # as well as the junction between exons: + last_corner = None + for start_pos, end_pos, _type in positions: + if _type == 'UTR': + _rgb = self.get_rgb(bed, param='color_utr', default=rgb) + y0 = y_bottom - 1 / 2 * (1 - self.properties['height_utr']) / 2 + height = 1 / 2 * self.properties['height_utr'] + else: + _rgb = rgb + y0 = y_bottom + height = 1 / 2 + + vertices = [(start_pos, y0), (start_pos, y0 - height), + (end_pos, y0 - height), (end_pos, y0)] + + ax.add_patch(Polygon(vertices, closed=True, fill=True, + edgecolor="none", + facecolor=_rgb, + linewidth=linewidth)) + if last_corner is not None: + if last_corner[0] < start_pos: + xdata = (last_corner[0], (last_corner[0] + start_pos) / 2, + start_pos) + ydata = (last_corner[1], y_top_intron, + y0 - height) + ax.add_line(Line2D(xdata, ydata, color=last_corner[2], + linewidth=linewidth)) + + last_corner = (end_pos, y0 - height, _rgb) + + def _plot_small_arrow(self, ax, xpos, ypos, strand): + """ + Draws a broken line with 2 parts: + For strand = +: > For strand = -: < + :param xpos: + :param ypos: + :param strand: + : + :return: None + """ + if strand == '.': + return + if strand == '+': + xdata = [xpos - self.small_relative / 4, + xpos + self.small_relative / 4, + xpos - self.small_relative / 4] + else: + xdata = [xpos + self.small_relative / 4, + xpos - self.small_relative / 4, + xpos + self.small_relative / 4] + ydata = [ypos + 1 / 4, + ypos + 1 / 2, + ypos + 3 / 4] + ax.add_line(Line2D(xdata, ydata, color='black', linewidth=self.properties['line_width'])) diff --git a/pygenometracks/tracks/BedGraphLikeTrack.py b/pygenometracks/tracks/BedGraphLikeTrack.py new file mode 100644 index 00000000..cced3d3b --- /dev/null +++ b/pygenometracks/tracks/BedGraphLikeTrack.py @@ -0,0 +1,144 @@ +from . GenomeTrack import GenomeTrack +from .. utilities import file_to_intervaltree, change_chrom_names +import numpy as np +import pysam + +DEFAULT_BEDGRAPH_COLOR = '#a6cee3' + + +class BedGraphLikeTrack(GenomeTrack): + SUPPORTED_ENDINGS = [] + TRACK_TYPE = None + OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + DEFAULTS_PROPERTIES = {} + NECESSARY_PROPERTIES = [] + SYNONYMOUS_PROPERTIES = {} + POSSIBLE_PROPERTIES = {} + BOOLEAN_PROPERTIES = [] + STRING_PROPERTIES = [] + FLOAT_PROPERTIES = {} + INTEGER_PROPERTIES = {} + + def __init__(self, properties_dict): + GenomeTrack.__init__(self, properties_dict) + self.load_file() + + def load_file(self): + self.tbx = None + # try to load a tabix file is available + try: + # from the tabix file is not possible to know the + # global min and max + self.tbx = pysam.TabixFile(self.properties['file']) + except IOError: + # load the file as an interval tree + self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'], + self.properties['region']) + + self.num_fields = None + + def _get_row_data(self, row, tbx_var='self.tbx'): + """ + Returns the chrom, start, end and fields from either a tabix or a + interval tree. + Args: + row: if tabix, the row comes from self.tbx.fetch otherwise + comes from sorted(interval_tree[chrom] ... + + Returns: + start, end, fields where values is a list + + """ + tbx = eval(tbx_var) + if tbx is not None: + fields = row.split("\t") + values = fields[3:] + start = int(fields[1]) + end = int(fields[2]) + + else: + values = row.data + start = row.begin + end = row.end + + # set the num_fields value + # it is expected that the number of fields per row + # is equal. This value is used for regions not covered + # in the file and that should be represented as nans + if self.num_fields is None: + self.num_fields = len(values) + return start, end, values + + def get_scores(self, chrom_region, start_region, end_region, + return_nans=True, tbx_var='self.tbx', inttree_var='self.interval_tree'): + """ + Retrieves the score (or scores or whatever fields are in a bedgraph like file) and the positions + for a given region. + In case there is no item in the region. It returns [], [] + Args: + chrom_region: + start_region: + end_region: + Returns: + tuple: + scores_list, post_list + """ + score_list = [] + pos_list = [] + tbx = eval(tbx_var) + if tbx is not None: + if chrom_region not in tbx.contigs: + chrom_region_before = chrom_region + chrom_region = change_chrom_names(chrom_region) + if chrom_region not in tbx.contigs: + self.log.warning("*Warning*\nNeither " + + chrom_region_before + " nor " + + chrom_region + " exists as a " + "chromosome name inside the bedgraph " + "file. This will generate an empty " + "track!!\n") + return score_list, pos_list + + chrom_region = self.check_chrom_str_bytes(tbx.contigs, + chrom_region) + iterator = tbx.fetch(chrom_region, start_region, end_region) + + else: + inttree = eval(inttree_var) + if chrom_region not in list(inttree): + chrom_region_before = chrom_region + chrom_region = change_chrom_names(chrom_region) + if chrom_region not in list(inttree): + self.log.warning("*Warning*\nNo interval was found when " + "overlapping with both " + f"{chrom_region_before}:{start_region}-{end_region}" + f" and {chrom_region}:{start_region}-{end_region}" + " inside the bedgraph file. " + "This will generate an empty " + "track!!\n") + return score_list, pos_list + chrom_region = self.check_chrom_str_bytes(inttree, chrom_region) + iterator = iter(sorted(inttree[chrom_region][start_region - 10000:end_region + 10000])) + + prev_end = start_region + for row in iterator: + start, end, values = self._get_row_data(row, tbx_var) + # if the region is not consecutive with respect to the previous + # nan values are added. + if return_nans and prev_end < start: + score_list.append(np.repeat(np.nan, self.num_fields)) + pos_list.append((prev_end, start)) + prev_end = end + score_list.append(values) + pos_list.append((start, end)) + + # Add a last value if needed: + if prev_end < end_region and return_nans: + score_list.append(np.repeat(np.nan, self.num_fields)) + pos_list.append((prev_end, end_region)) + + return score_list, pos_list + + def __del__(self): + if self.tbx is not None: + self.tbx.close() diff --git a/pygenometracks/tracks/BedGraphMatrixTrack.py b/pygenometracks/tracks/BedGraphMatrixTrack.py index ba214593..86174a35 100644 --- a/pygenometracks/tracks/BedGraphMatrixTrack.py +++ b/pygenometracks/tracks/BedGraphMatrixTrack.py @@ -1,5 +1,4 @@ -from . BedGraphTrack import BedGraphTrack -from . GenomeTrack import GenomeTrack +from . BedGraphLikeTrack import BedGraphLikeTrack import numpy as np import matplotlib.pyplot as plt from matplotlib import cm @@ -7,10 +6,10 @@ DEFAULT_BEDGRAPHMATRIX_COLORMAP = 'viridis' -class BedGraphMatrixTrack(BedGraphTrack): +class BedGraphMatrixTrack(BedGraphLikeTrack): SUPPORTED_ENDINGS = ['.bm', '.bm.gz', '.bedgraphmatrix', '.bm.bgz'] TRACK_TYPE = 'bedgraph_matrix' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" + OPTIONS_TXT = BedGraphLikeTrack.OPTIONS_TXT + f""" # a bedgraph matrix file is like a bedgraph, except that per bin there # are more than one value separated by tab: E.g. # This file type is produced by HiCExplorer tool hicFindTads and contains @@ -72,11 +71,6 @@ class BedGraphMatrixTrack(BedGraphTrack): INTEGER_PROPERTIES = {} # The color cannot be set for the moment - def __init__(self, properties_dict): - GenomeTrack.__init__(self, properties_dict) - - self.load_file() - def set_properties_defaults(self): # To remove in next 1.0 if 'type' not in self.properties: @@ -86,7 +80,7 @@ def set_properties_defaults(self): " the default type is matrix but in the" " next version it will be lines.\n") # End to remove - GenomeTrack.set_properties_defaults(self) + BedGraphLikeTrack.set_properties_defaults(self) if self.properties['type'] == 'matrix': self.process_color('colormap', colormap_possible=True, colormap_only=True, @@ -152,7 +146,7 @@ def plot(self, ax, chrom_region, start_region, end_region): def plot_y_axis(self, ax, plot_axis): if self.properties['type'] == 'lines': - GenomeTrack.plot_y_axis(self, ax, plot_axis) + BedGraphLikeTrack.plot_y_axis(self, ax, plot_axis) else: try: cobar = plt.colorbar(self.img, ax=ax, fraction=0.95) @@ -175,7 +169,3 @@ def plot_y_axis(self, ax, plot_axis): # move it a bit inside to avoid overlapping # with other labels labels[idx].set_verticalalignment('top') - - def __del__(self): - if self.tbx is not None: - self.tbx.close() diff --git a/pygenometracks/tracks/BedGraphTrack.py b/pygenometracks/tracks/BedGraphTrack.py index 46b5e496..bd354fa9 100644 --- a/pygenometracks/tracks/BedGraphTrack.py +++ b/pygenometracks/tracks/BedGraphTrack.py @@ -1,5 +1,5 @@ -from . GenomeTrack import GenomeTrack -from .. utilities import file_to_intervaltree, plot_coverage, InputError, transform, change_chrom_names +from . BedGraphLikeTrack import BedGraphLikeTrack +from .. utilities import file_to_intervaltree, plot_coverage, InputError, transform import numpy as np import pyBigWig import tempfile @@ -9,13 +9,13 @@ DEFAULT_BEDGRAPH_COLOR = '#a6cee3' -class BedGraphTrack(GenomeTrack): +class BedGraphTrack(BedGraphLikeTrack): SUPPORTED_ENDINGS = ['.bg', '.bg.gz', '.bg.bgz', '.bedgraph', '.bedgraph.gz', '.bedgraph.bgz', '.bedGraph', '.bedGraph.gz', '.bedGraph.bgz', '.bdg', '.bdg.gz', '.bdg.bgz'] TRACK_TYPE = 'bedgraph' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" + OPTIONS_TXT = BedGraphLikeTrack.OPTIONS_TXT + f""" color = green # To use a different color for negative values #negative_color = red @@ -128,9 +128,8 @@ class BedGraphTrack(GenomeTrack): # negative_color can only be a color or None def __init__(self, properties_dict): - super(BedGraphTrack, self).__init__(properties_dict) self.tbx2 = None - self.load_file() + BedGraphLikeTrack.__init__(self, properties_dict) self.interval_tree2 = None @@ -150,8 +149,8 @@ def __init__(self, properties_dict): self.interval_tree2, __, __ = file_to_intervaltree(self.properties['second_file']) def set_properties_defaults(self): - super(BedGraphTrack, self).set_properties_defaults() - super(BedGraphTrack, self).process_type_for_coverage_track() + BedGraphLikeTrack.set_properties_defaults(self) + BedGraphLikeTrack.process_type_for_coverage_track(self) self.process_color('color') if self.properties['negative_color'] is None: self.properties['negative_color'] = self.properties['color'] @@ -179,122 +178,6 @@ def set_properties_defaults(self): " It will be set as 'transformed'.\n") self.properties['y_axis_values'] = 'transformed' - def load_file(self): - self.tbx = None - # try to load a tabix file if available - try: - # from the tabix file is not possible to know the - # global min and max - self.tbx = pysam.TabixFile(self.properties['file']) - except IOError: - # load the file as an interval tree - self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'], - self.properties['region']) - - self.num_fields = None - - def _get_row_data(self, row, tbx_var='self.tbx'): - """ - Returns the chrom, start, end and fields from either a tabix or a - interval tree. - Args: - row: if tabix, the row comes from self.tbx.fetch otherwise - comes from sorted(interval_tree[chrom] ... - - Returns: - start, end, fields where values is a list - - """ - tbx = eval(tbx_var) - if tbx is not None: - fields = row.split("\t") - values = fields[3:] - start = int(fields[1]) - end = int(fields[2]) - - else: - values = row.data - start = row.begin - end = row.end - - # set the num_fields value - # it is expected that the number of fields per row - # is equal. This value is used for regions not covered - # in the file and that should be represented as nans - if self.num_fields is None: - self.num_fields = len(values) - return start, end, values - - def get_scores(self, chrom_region, start_region, end_region, - return_nans=True, tbx_var='self.tbx', inttree_var='self.interval_tree'): - """ - Retrieves the score (or scores or whatever fields are in a bedgraph like file) and the positions - for a given region. If return_nans is True the pos_list goes until at least end_region. - In case there is no item in the region. It returns [], [] - Args: - chrom_region: - start_region: - end_region: - Returns: - tuple: - scores_list, pos_list - """ - score_list = [] - pos_list = [] - tbx = eval(tbx_var) - if tbx is not None: - if chrom_region not in tbx.contigs: - chrom_region_before = chrom_region - chrom_region = change_chrom_names(chrom_region) - if chrom_region not in tbx.contigs: - self.log.warning("*Warning*\nNeither " - + chrom_region_before + " nor " - + chrom_region + " exists as a " - "chromosome name inside the bedgraph " - "file. This will generate an empty " - "track!!\n") - return score_list, pos_list - - chrom_region = self.check_chrom_str_bytes(tbx.contigs, - chrom_region) - iterator = tbx.fetch(chrom_region, start_region, end_region) - - else: - inttree = eval(inttree_var) - if chrom_region not in list(inttree): - chrom_region_before = chrom_region - chrom_region = change_chrom_names(chrom_region) - if chrom_region not in list(inttree): - self.log.warning("*Warning*\nNo interval was found when " - "overlapping with both " - f"{chrom_region_before}:{start_region}-{end_region}" - f" and {chrom_region}:{start_region}-{end_region}" - " inside the bedgraph file. " - "This will generate an empty " - "track!!\n") - return score_list, pos_list - chrom_region = self.check_chrom_str_bytes(inttree, chrom_region) - iterator = iter(sorted(inttree[chrom_region][start_region - 10000:end_region + 10000])) - - prev_end = start_region - for row in iterator: - start, end, values = self._get_row_data(row, tbx_var) - # if the region is not consecutive with respect to the previous - # nan values are added. - if return_nans and prev_end < start: - score_list.append(np.repeat(np.nan, self.num_fields)) - pos_list.append((prev_end, start)) - prev_end = end - score_list.append(values) - pos_list.append((start, end)) - - # Add a last value if needed: - if prev_end < end_region and return_nans: - score_list.append(np.repeat(np.nan, self.num_fields)) - pos_list.append((prev_end, end_region)) - - return score_list, pos_list - def plot(self, ax, chrom_region, start_region, end_region): score_list, pos_list = self.get_scores(chrom_region, start_region, end_region) if pos_list == []: @@ -449,11 +332,11 @@ def get_values_as_bdg(self, score_list, pos_list): return score_list, x_values def plot_y_axis(self, ax, plot_axis): - super(BedGraphTrack, self).plot_y_axis(ax, plot_axis, - self.properties['transform'], - self.properties['log_pseudocount'], - self.properties['y_axis_values'], - self.properties['grid']) + BedGraphLikeTrack.plot_y_axis(self, ax, plot_axis, + self.properties['transform'], + self.properties['log_pseudocount'], + self.properties['y_axis_values'], + self.properties['grid']) def __del__(self): if self.tbx is not None: diff --git a/pygenometracks/tracks/BedLikeTrack.py b/pygenometracks/tracks/BedLikeTrack.py new file mode 100644 index 00000000..5e54fcc2 --- /dev/null +++ b/pygenometracks/tracks/BedLikeTrack.py @@ -0,0 +1,259 @@ +from . GenomeTrack import GenomeTrack +from .. readBed import ReadBed +# To remove next 1.0 +from .. readGtf import ReadGtf +# End to remove +from .. utilities import opener, count_lines, temp_file_from_intersect, TextWrapAxis +import matplotlib +from matplotlib.patches import Polygon +from intervaltree import IntervalTree, Interval +import numpy as np +from tqdm import tqdm + +DEFAULT_BED_COLOR = '#1f78b4' +AROUND_REGION = 100000 + + +class BedLikeTrack(GenomeTrack): + SUPPORTED_ENDINGS = [] + TRACK_TYPE = None + OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + """ +# optional: line_width +#line_width = 0.5 +# optional, default is black. To remove the border, simply set 'border_color' to none +#border_color = black +""" + + DEFAULTS_PROPERTIES = {'orientation': None, + 'color': DEFAULT_BED_COLOR, + 'border_color': 'black', + 'line_width': 0.5, + # To remove in next 1.0 + 'prefered_name': 'transcript_name', + 'merge_transcripts': False, + # end to remove + 'region': None # Cannot be set manually but is set by tracksClass + } + NECESSARY_PROPERTIES = ['file'] + SYNONYMOUS_PROPERTIES = {} + POSSIBLE_PROPERTIES = {'orientation': [None, 'inverted']} + # To remove in next 1.0 + BOOLEAN_PROPERTIES = ['merge_transcripts'] + # And replace by: + # BOOLEAN_PROPERTIES = [] + STRING_PROPERTIES = ['file', 'file_type', + 'overlay_previous', 'orientation', + 'title', 'color', 'border_color', + # To remove in next 1.0 + 'prefered_name'] + FLOAT_PROPERTIES = {'line_width': [0, np.inf], + 'height': [0, np.inf]} + INTEGER_PROPERTIES = {} + + def __init__(self, *args, **kwarg): + GenomeTrack.__init__(self, *args, **kwarg) + self.bed_type = None # once the bed file is processed, + # this is bed3, bed4, bed5, bed6, bed8, bed9 or bed12 + self.interval_tree = {} # interval tree of the bed regions + self.interval_tree, min_score, max_score = self.process_bed(self.properties['region']) + if self.colormap is not None: + if self.properties.get('min_value', None) is not None: + min_score = self.properties['min_value'] + if self.properties.get('max_value', None) is not None: + max_score = self.properties['max_value'] + + norm = matplotlib.colors.Normalize(vmin=min_score, + vmax=max_score) + + cmap = matplotlib.cm.get_cmap(self.colormap) + self.colormap = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap) + + def set_properties_defaults(self): + super(BedLikeTrack, self).set_properties_defaults() + self.colormap = None + self.parametersUsingColormap = [] + # check if the color given is a color map + is_colormap = self.process_color('color', colormap_possible=True, + bed_rgb_possible=True, + default_value_is_colormap=False) + if is_colormap: + self.colormap = self.properties['color'] + self.parametersUsingColormap.append('color') + + # check if border_color and color_utr are colors + # if they are part of self.properties + # (for example, TADsTracks do not have color_utr) + for param in [p for p in ['border_color', 'color_utr'] + if p in self.properties]: + is_colormap = self.process_color(param, colormap_possible=True, + bed_rgb_possible=True) + if is_colormap: + if self.colormap is None: + self.colormap = self.properties[param] + self.parametersUsingColormap.append(param) + else: + if self.colormap == self.properties[param]: + self.parametersUsingColormap.append(param) + else: + self.log.warning("*WARNING* section " + f"{self.properties['section_name']}: " + f"{param} was set to " + f"{self.properties[param]}, but " + f"{self.parametersUsingColormap[0]}" + f" was set to {self.colormap}. " + "It is not possible to have multiple" + f" colormap. {param} set to " + f"{self.DEFAULTS_PROPERTIES[param]}.\n") + self.properties[param] = self.DEFAULTS_PROPERTIES[param] + + def get_bed_handler(self, plot_regions=None): + if not self.properties.get('global_max_row', False): + # I do the intersection: + file_to_open = temp_file_from_intersect(self.properties['file'], + plot_regions, AROUND_REGION) + else: + file_to_open = self.properties['file'] + # To remove in next 1.0 + if self.properties['file'].endswith('gtf') or \ + self.properties['file'].endswith('gtf.gz'): + self.log.warning("Deprecation Warning: " + f"In section {self.properties['section_name']}," + f" file_type was set to {self.TRACK_TYPE}" + " whereas it is a gtf file. In the future" + " only bed files will be accepted, please" + " use file_type = gtf.\n") + bed_file_h = ReadGtf(file_to_open, + self.properties['prefered_name'], + self.properties['merge_transcripts']) + total_length = bed_file_h.length + else: + # end of remove + total_length = count_lines(opener(file_to_open), + asBed=True) + bed_file_h = ReadBed(opener(file_to_open)) + + return(bed_file_h, total_length) + + def process_bed(self, plot_regions=None): + + bed_file_h, total_length = self.get_bed_handler(plot_regions) + self.bed_type = bed_file_h.file_type + + if self.properties['color'] == 'bed_rgb' and \ + self.bed_type not in ['bed12', 'bed9']: + self.log.warning("*WARNING* Color set to 'bed_rgb', " + "but bed file does not have the rgb field. " + f"The color has been set to {DEFAULT_BED_COLOR}.\n") + self.properties['color'] = DEFAULT_BED_COLOR + + valid_intervals = 0 + interval_tree = {} + + max_score = float('-inf') + min_score = float('inf') + for bed in tqdm(bed_file_h, total=total_length): + if bed.score < min_score: + min_score = bed.score + if bed.score > max_score: + max_score = bed.score + + if bed.chromosome not in interval_tree: + interval_tree[bed.chromosome] = IntervalTree() + + interval_tree[bed.chromosome].add(Interval(bed.start, + bed.end, bed)) + valid_intervals += 1 + + try: + bed_file_h.file_handle.close() + except AttributeError: + pass + + if valid_intervals == 0: + self.log.warning("No valid intervals were found in file " + f"{self.properties['file']}.\n") + + return interval_tree, min_score, max_score + + def plot_triangles(self, ax, genes_overlap): + """ + Plots the boundaries as triangles in the given ax. + """ + ymax = 0.001 + valid_regions = 0 + for region in genes_overlap: + """ + ______ y2 + "" + " " + " " + " "_____ y1 + _____________________ + x1 x2 x3 + + """ + x1 = region.begin + x2 = x1 + float(region.end - region.begin) / 2 + x3 = region.end + y1 = 0 + y2 = (region.end - region.begin) + + rgb = self.get_rgb(region.data) + edgecolor = self.get_rgb(region.data, param='border_color', default=rgb) + + triangle = Polygon([[x1, y1], [x2, y2], [x3, y1]], closed=True, + facecolor=rgb, edgecolor=edgecolor, linewidth=self.properties['line_width']) + ax.add_artist(triangle) + valid_regions += 1 + + if y2 > ymax: + ymax = y2 + + if valid_regions == 0: + self.log.warning(f"No regions found for section {self.properties['section_name']}.\n") + + if self.properties['orientation'] == 'inverted': + ax.set_ylim(ymax, 0) + else: + ax.set_ylim(0, ymax) + + def plot_label(self, label_ax, h_align='left'): + if h_align == 'left': + x_pos = 0.05 + elif h_align == 'right': + x_pos = 1 + else: + x_pos = 0.5 + label_ax.add_artist( + TextWrapAxis(label_ax, x_pos, 1, + self.properties['title'], + horizontalalignment=h_align, size='large', + verticalalignment='top', + transform=label_ax.transAxes, + wrap=True)) + + def get_rgb(self, bed, param='color', default=DEFAULT_BED_COLOR): + """ + get the rgb value for the bed and the param given: + :param bed: + :param param: + :param default: the default value if it fails + :return: color + """ + rgb = self.properties[param] + + if self.colormap is not None and param in self.parametersUsingColormap: + # translate value field (in the example above is 0 or 0.2686...) + # into a color + rgb = self.colormap.to_rgba(bed.score) + elif self.properties[param] == 'bed_rgb': + # if rgb is set in the bed line, this overrides the previously + # defined colormap + if self.bed_type in ['bed9', 'bed12'] and len(bed.rgb) == 3: + try: + rgb = [float(x) / 255 for x in bed.rgb] + except IndexError: + rgb = default + else: + rgb = default + return rgb diff --git a/pygenometracks/tracks/BedTrack.py b/pygenometracks/tracks/BedTrack.py index f61d990f..4c7ace3f 100644 --- a/pygenometracks/tracks/BedTrack.py +++ b/pygenometracks/tracks/BedTrack.py @@ -1,35 +1,14 @@ -from . GenomeTrack import GenomeTrack -from .. readBed import ReadBed -# To remove next 1.0 -from .. readGtf import ReadGtf -# End to remove -from .. utilities import opener, get_length_w, count_lines, temp_file_from_intersect, change_chrom_names -import matplotlib -from matplotlib import font_manager -from matplotlib.patches import Rectangle, Polygon -from matplotlib.lines import Line2D -import matplotlib.pyplot as plt -from intervaltree import IntervalTree, Interval +from . BedGeneLikeTrack import BedGeneLikeTrack import numpy as np -from tqdm import tqdm -DEFAULT_BED_COLOR = '#1f78b4' -DISPLAY_BED_VALID = ['collapsed', 'triangles', 'interleaved', 'stacked'] -DISPLAY_BED_SYNONYMOUS = {'interlaced': 'interleaved', 'domain': 'interleaved'} -DEFAULT_DISPLAY_BED = 'stacked' -AROUND_REGION = 100000 - -class BedTrack(GenomeTrack): +class BedTrack(BedGeneLikeTrack): SUPPORTED_ENDINGS = ['bed', 'bed3', 'bed4', 'bed5', 'bed6', 'bed8', 'bed9', 'bed12', 'bed.gz', 'bed3.gz', 'bed4.gz', 'bed5.gz', 'bed6.gz', 'bed9.gz', 'bed12.gz'] TRACK_TYPE = 'bed' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" -# If the bed file contains the exon -# structure (bed 12) then this is plotted. Otherwise -# a region **with direction** is plotted. + OPTIONS_TXT = BedGeneLikeTrack.OPTIONS_TXT + f""" # If the bed file contains a column for color (column 9), then this color can be used by # setting: #color = bed_rgb @@ -42,1016 +21,21 @@ class BedTrack(GenomeTrack): #max_value=100 # If the color is simply a color name, then this color is used and the score is not considered. color = darkblue -# whether printing the labels -labels = false -# optional: -# by default the labels are not printed if you have more than 60 features. -# to change it, just increase the value: -#max_labels = 60 -# optional: font size can be given to override the default size -fontsize = 10 -# optional: line_width -#line_width = 0.5 -# the display parameter defines how the bed file is plotted. -# Default is 'stacked' where regions are plotted on different lines so -# we can see all regions and all labels. -# The other options are ['collapsed', 'interleaved', 'triangles'] -# These options assume that the regions do not overlap. -# `collapsed`: The bed regions are plotted one after the other in one line. -# `interleaved`: The bed regions are plotted in two lines, first up, then down, then up etc. -# optional, default is black. To remove the border, simply set 'border_color' to none -# Not used in tssarrow style -#border_color = black -# style to plot the genes when the display is not triangles -#style = UCSC -#style = flybase -#style = tssarrow -# maximum number of gene rows to be plotted. This -# field is useful to limit large number of close genes -# to be printed over many rows. When several images want -# to be combined this must be set to get equal size -# otherwise, on each image the height of each gene changes -#gene_rows = 10 -# by default the ymax is the number of -# rows occupied by the genes in the region plotted. However, -# by setting this option, the global maximum is used instead. -# This is useful to combine images that are all consistent and -# have the same number of rows. -#global_max_row = true -# If you want to plot all labels inside the plotting region: -#all_labels_inside = true -# If you want to display the name of the gene which goes over the plotted -# region in the right margin put: -#labels_in_margin = true -# if you use UCSC style, you can set the relative distance between 2 arrows on introns -# default is 2 -#arrow_interval = 2 -# if you use tssarrow style, you can choose the length of the arrow in bp -# (default is 4% of the plotted region) -#arrow_length = 5000 -# if you use flybase or tssarrow style, you can choose the color of non-coding intervals: -#color_utr = grey -# as well as the proportion between their height and the one of coding -# (by default they are the same height): -#height_utr = 1 -# By default, for oriented intervals in flybase style, -# or bed files with less than 12 columns, the arrowhead is added -# outside of the interval. -# If you want that the tip of the arrow correspond to -# the extremity of the interval use: -# arrowhead_included = true # optional. If not given is guessed from the file ending. file_type = {TRACK_TYPE} - """ - - DEFAULTS_PROPERTIES = {'fontsize': 12, - 'orientation': None, - 'color': DEFAULT_BED_COLOR, - 'border_color': 'black', - 'labels': True, - 'style': 'flybase', - 'display': DEFAULT_DISPLAY_BED, - 'line_width': 0.5, - 'max_labels': 60, - # To remove in next 1.0 - 'prefered_name': 'transcript_name', - 'merge_transcripts': False, - # end to remove - 'global_max_row': False, - 'gene_rows': None, - 'max_value': None, - 'min_value': None, - 'arrow_interval': 2, - 'arrowhead_included': False, - 'color_utr': 'grey', - 'height_utr': 1, - 'region': None, # Cannot be set manually but is set by tracksClass - 'arrow_length': None, - 'all_labels_inside': False, - 'labels_in_margin': False} - NECESSARY_PROPERTIES = ['file'] - SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None}, - 'min_value': {'auto': None}, - 'display': DISPLAY_BED_SYNONYMOUS} - POSSIBLE_PROPERTIES = {'orientation': [None, 'inverted'], - 'style': ['flybase', 'UCSC', 'tssarrow'], - 'display': DISPLAY_BED_VALID} - BOOLEAN_PROPERTIES = ['labels', 'global_max_row', - 'arrowhead_included', 'all_labels_inside', - 'labels_in_margin', - # To remove in next 1.0 - 'merge_transcripts'] - STRING_PROPERTIES = ['file', 'file_type', - 'overlay_previous', 'orientation', - 'title', 'style', 'color', 'border_color', - 'color_utr', 'display', - # To remove in next 1.0 - 'prefered_name'] - FLOAT_PROPERTIES = {'max_value': [- np.inf, np.inf], - 'min_value': [- np.inf, np.inf], - 'fontsize': [0, np.inf], - 'line_width': [0, np.inf], - 'height': [0, np.inf], - 'height_utr': [0, 1]} - INTEGER_PROPERTIES = {'gene_rows': [0, np.inf], - 'max_labels': [0, np.inf], - 'arrow_interval': [1, np.inf], - 'arrow_length': [0, np.inf]} - - def __init__(self, *args, **kwarg): - super(BedTrack, self).__init__(*args, **kwarg) - self.bed_type = None # once the bed file is processed, - # this is bed3, bed4, bed5, bed6, bed8, bed9 or bed12 - self.len_w = None # this is the length of the letter 'w' given the font size - self.interval_tree = {} # interval tree of the bed regions - self.interval_tree, min_score, max_score = self.process_bed(self.properties['region']) - if self.colormap is not None: - if self.properties['min_value'] is not None: - min_score = self.properties['min_value'] - if self.properties['max_value'] is not None: - max_score = self.properties['max_value'] - - norm = matplotlib.colors.Normalize(vmin=min_score, - vmax=max_score) - - cmap = matplotlib.cm.get_cmap(self.colormap) - self.colormap = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap) - - def set_properties_defaults(self): - super(BedTrack, self).set_properties_defaults() - self.fp = font_manager.FontProperties(size=self.properties['fontsize']) - self.colormap = None - self.parametersUsingColormap = [] - # check if the color given is a color map - is_colormap = self.process_color('color', colormap_possible=True, - bed_rgb_possible=True, - default_value_is_colormap=False) - if is_colormap: - self.colormap = self.properties['color'] - self.parametersUsingColormap.append('color') - - # check if border_color and color_utr are colors - # if they are part of self.properties - # (for example, TADsTracks do not have color_utr) - for param in [p for p in ['border_color', 'color_utr'] - if p in self.properties]: - is_colormap = self.process_color(param, colormap_possible=True, - bed_rgb_possible=True) - if is_colormap: - if self.colormap is None: - self.colormap = self.properties[param] - self.parametersUsingColormap.append(param) - else: - if self.colormap == self.properties[param]: - self.parametersUsingColormap.append(param) - else: - self.log.warning("*WARNING* section " - f"{self.properties['section_name']}: " - f"{param} was set to " - f"{self.properties[param]}, but " - f"{self.parametersUsingColormap[0]}" - f" was set to {self.colormap}. " - "It is not possible to have multiple" - f" colormap. {param} set to " - f"{self.DEFAULTS_PROPERTIES[param]}.\n") - self.properties[param] = self.DEFAULTS_PROPERTIES[param] - - # to set the distance between rows - self.row_scale = 2.3 - - def get_bed_handler(self, plot_regions=None): - if not self.properties['global_max_row']: - # I do the intersection: - file_to_open = temp_file_from_intersect(self.properties['file'], - plot_regions, AROUND_REGION) - else: - file_to_open = self.properties['file'] - # To remove in next 1.0 - if self.properties['file'].endswith('gtf') or \ - self.properties['file'].endswith('gtf.gz'): - self.log.warning("Deprecation Warning: " - f"In section {self.properties['section_name']}," - f" file_type was set to {self.TRACK_TYPE}" - " whereas it is a gtf file. In the future" - " only bed files will be accepted, please" - " use file_type = gtf.\n") - bed_file_h = ReadGtf(file_to_open, - self.properties['prefered_name'], - self.properties['merge_transcripts']) - total_length = bed_file_h.length - else: - # end of remove - total_length = count_lines(opener(file_to_open), - asBed=True) - bed_file_h = ReadBed(opener(file_to_open)) - - return(bed_file_h, total_length) - - def process_bed(self, plot_regions=None): - - bed_file_h, total_length = self.get_bed_handler(plot_regions) - self.bed_type = bed_file_h.file_type - - if self.properties['color'] == 'bed_rgb' and \ - self.bed_type not in ['bed12', 'bed9']: - self.log.warning("*WARNING* Color set to 'bed_rgb', " - "but bed file does not have the rgb field. " - f"The color has been set to {DEFAULT_BED_COLOR}.\n") - self.properties['color'] = DEFAULT_BED_COLOR - - valid_intervals = 0 - interval_tree = {} - - max_score = float('-inf') - min_score = float('inf') - for bed in tqdm(bed_file_h, total=total_length): - if bed.score < min_score: - min_score = bed.score - if bed.score > max_score: - max_score = bed.score - - if bed.chromosome not in interval_tree: - interval_tree[bed.chromosome] = IntervalTree() - - interval_tree[bed.chromosome].add(Interval(bed.start, - bed.end, bed)) - valid_intervals += 1 - - try: - bed_file_h.file_handle.close() - except AttributeError: - pass - - if valid_intervals == 0: - self.log.warning("No valid intervals were found in file " - f"{self.properties['file']}.\n") - - return interval_tree, min_score, max_score - - def get_max_num_row(self, len_w, small_relative): - ''' Process the whole bed regions at the given figure length - and font size to - determine the maximum number of rows required. - :return: - ''' - - self.max_num_row = {} - for chrom in self.interval_tree: - row_last_position = [] # each entry in this list contains the end position - self.max_num_row[chrom] = 0 - for region in sorted(self.interval_tree[chrom][0:500000000]): - bed = region.data - if self.properties['labels']: - bed_extended_end = int(bed.end + (len(bed.name) * len_w)) - else: - bed_extended_end = (bed.end + 2 * small_relative) - - # get smallest free row - if len(row_last_position) == 0: - free_row = 0 - row_last_position.append(bed_extended_end) - else: - # get list of rows that are less than bed.start, then take the min - idx_list = [idx for idx, value in enumerate(row_last_position) if value < bed.start] - if len(idx_list): - free_row = min(idx_list) - row_last_position[free_row] = bed_extended_end - else: - free_row = len(row_last_position) - row_last_position.append(bed_extended_end) - - if free_row > self.max_num_row[bed.chromosome]: - self.max_num_row[bed.chromosome] = free_row - - self.log.debug(f"max number of rows set to {self.max_num_row}") - return self.max_num_row - - def get_y_pos(self, free_row): - """ - The y_pos is set such that regions to be plotted - do not overlap (stacked). To override this - the properties['collapsed'] needs to be set. - - The algorithm uses a interval tree (self.region_interval) - to check the overlaps - and a sort of coverage vector 'rows used' - to identify the row in which to plot - :return: int y position - """ - - # if the interleaved directive is given, - # ypos simply oscilates between 0 and 1 - if self.properties['display'] == 'interleaved': - ypos = 1 \ - if self.counter % 2 == 0 \ - else 0 - # if the collapsed directive is given - # ypos is always 0 - elif self.properties['display'] == 'collapsed': - ypos = 0 - # if it is stacked - # it will got the the free_row - else: - ypos = free_row * self.row_scale - return ypos - - def plot(self, ax, chrom_region, start_region, end_region): - if chrom_region not in self.interval_tree.keys(): - chrom_region_before = chrom_region - chrom_region = change_chrom_names(chrom_region) - if chrom_region not in self.interval_tree.keys(): - self.log.warning("*Warning*\nNo interval was found when " - "overlapping with both " - f"{chrom_region_before}:{start_region - AROUND_REGION}-{end_region + AROUND_REGION}" - f" and {chrom_region}:{start_region - AROUND_REGION}-{end_region + AROUND_REGION}" - " inside the bed file. " - "This will generate an empty track!!\n") - return - chrom_region = self.check_chrom_str_bytes(self.interval_tree, - chrom_region) - - genes_overlap = \ - sorted(self.interval_tree[chrom_region][start_region:end_region]) - - if self.properties['display'] == 'triangles': - self.plot_triangles(ax, genes_overlap) - else: - self.counter = 0 - self.small_relative = 0.004 * (end_region - start_region) - if self.properties['labels']: - self.len_w = get_length_w(ax.get_figure().get_figwidth(), - start_region, end_region, - self.properties['fontsize']) - else: - self.len_w = 1 - - if self.properties['global_max_row']: - self.get_max_num_row(self.len_w, self.small_relative) - - # do not print labels when too many intervals are visible. - if self.properties['labels'] and \ - len(genes_overlap) > self.properties['max_labels']: - self.properties['labels'] = False - - linewidth = self.properties['line_width'] - max_num_row_local = 1 - max_ypos = 0 - # check for the number of other intervals that overlap - # with the given interval - # 1 2 - # 012345678901234567890123456 - # 1========= 4========= - # 2========= - # 3============ - # - # for 1 row_last_position = [9] - # for 2 row_last_position = [9, 14] - # for 3 row_last_position = [9, 14, 19] - # for 4 row_last_position = [26, 14, 19] - - row_last_position = [] - # each entry in this list contains the end position - # of genomic interval. The list index is the row - # in which the genomic interval was plotted. - # Any new genomic interval that wants to be plotted, - # knows the row to use by finding the list index that - # is larger than its start - - # check for overlapping genes including - # label size (if plotted) - - if ax.get_xlim()[0] > ax.get_xlim()[1]: - genes_overlap = reversed(genes_overlap) - for region in genes_overlap: - """ - BED12 gene format with exon locations at the end - chrX 20850 23076 CG17636-RA 0 - 20850 23017 0 3 946,765,64, 0,1031,2162, - - BED9 - bed with rgb at end - chr2L 0 70000 ID_5 0.26864549832 . 0 70000 51,160,44 - - BED6 - bed without rgb - chr2L 0 70000 ID_5 0.26864549832 . - """ - self.counter += 1 - bed = region.data - - if ax.get_xlim()[0] < ax.get_xlim()[1]: - bed_left = bed.start - bed_right = bed.end - - def add_to_right(a, b): - return a + b - - def add_to_left(a, b): - return a - b - - def is_left_to(a, b): - return a < b - - def is_right_to(a, b): - return a > b - - else: - bed_left = bed.end - bed_right = bed.start - - def add_to_right(a, b): - return a - b - - def add_to_left(a, b): - return a + b - - def is_left_to(a, b): - return a > b - - def is_right_to(a, b): - return a < b - - if self.properties['labels']: - num_name_characters = len(bed.name) + 2 - # +2 to account for a space before and after the name - bed_extended_right = int(add_to_right(bed_right, (num_name_characters * self.len_w))) - else: - bed_extended_right = add_to_right(bed_right, 2 * self.small_relative) - - bed_extended_left = bed_left - # get smallest free row - if len(row_last_position) == 0: - free_row = 0 - row_last_position.append(bed_extended_right) - else: - # If all_labels_inside = True - # genes which goes over will have their labels inside - if self.properties['all_labels_inside'] and self.properties['labels'] \ - and is_right_to(bed_extended_right, ax.get_xlim()[1]): - bed_extended_left = int(add_to_left(bed_left, (num_name_characters * self.len_w))) - # Check that the start position is not outside: - if is_left_to(bed_extended_left, ax.get_xlim()[0]): - # If it would be outside, we use the default right label - bed_extended_left = bed_left - else: - # If we keep the label to the left, we update the right extended - bed_extended_right = add_to_right(bed_right, 2 * self.small_relative) - - # get list of rows that are left to bed_extended_left, then take the min - idx_list = [idx for idx, value in enumerate(row_last_position) - if is_left_to(value, bed_extended_left)] - if len(idx_list): - free_row = min(idx_list) - row_last_position[free_row] = bed_extended_right - else: - free_row = len(row_last_position) - row_last_position.append(bed_extended_right) - - rgb = self.get_rgb(bed) - edgecolor = self.get_rgb(bed, param='border_color', default=rgb) - - ypos = self.get_y_pos(free_row) - - # do not plot if the maximum interval rows to plot is reached - if self.properties['gene_rows'] is not None and \ - free_row >= self.properties['gene_rows']: - continue - - if free_row > max_num_row_local: - max_num_row_local = free_row - if ypos > max_ypos: - max_ypos = ypos - - if self.properties['style'] == 'tssarrow': - self.draw_gene_tssarrow_style(ax, bed, ypos, rgb, - linewidth) - elif self.bed_type == 'bed12': - if self.properties['style'] == 'flybase': - self.draw_gene_with_introns_flybase_style(ax, bed, ypos, - rgb, edgecolor, - linewidth) - else: - self.draw_gene_with_introns(ax, bed, ypos, rgb, edgecolor, - linewidth) - else: - self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor, linewidth) - - if not self.properties['labels']: - pass - elif bed_extended_left != bed_left: - # The label will be plotted before - ax.text(add_to_left(bed_left, self.small_relative), - ypos + (1 / 2), - bed.name, horizontalalignment='right', - verticalalignment='center', fontproperties=self.fp) - elif bed_right > start_region and bed_right < end_region: - ax.text(add_to_right(bed_right, self.small_relative), - ypos + 0.5, - bed.name, horizontalalignment='left', - verticalalignment='center', fontproperties=self.fp) - elif self.properties['labels_in_margin'] \ - and (bed_right == end_region or is_right_to(bed_right, end_region)): - ax.text(add_to_right(ax.get_xlim()[1], self.small_relative), - ypos + (1 / 2), - bed.name, horizontalalignment='left', - verticalalignment='center', fontproperties=self.fp) - - if self.counter == 0: - self.log.warning("*Warning* No intervals were found for file" - f" {self.properties['file']} in " - f"section '{self.properties['section_name']}'" - " for the interval plotted" - f" ({chrom_region}:{start_region}-{end_region}).\n") - - epsilon = 0.08 - ymax = - epsilon - - # We set ymin and ymax to have genes centered epsilon from the border - - if self.properties['global_max_row']: - max_ypos = self.max_num_row[chrom_region] * self.row_scale - - elif self.properties['gene_rows'] is not None: - max_ypos = self.properties['gene_rows'] * self.row_scale - - ymin = max_ypos + (1 + epsilon) - - self.log.debug(f"ylim {ymin},{ymax}") - # the axis is inverted (thus, ymax < ymin) - ax.set_ylim(ymin, ymax) - - if self.properties['display'] == 'interleaved': - ax.set_ylim(2 + epsilon, ymax) - elif self.properties['display'] == 'collapsed': - ax.set_ylim(1 + epsilon, ymax) - - def plot_label(self, label_ax, width_dpi, h_align='left'): - if h_align == 'left': - label_ax.text(0.05, 1, self.properties['title'], - horizontalalignment='left', size='large', - verticalalignment='top', - transform=label_ax.transAxes, - wrap=True) - elif h_align == 'right': - txt = label_ax.text(1, 1, self.properties['title'], - horizontalalignment='right', size='large', - verticalalignment='top', - transform=label_ax.transAxes, - wrap=True) - # To be able to wrap to the left: - txt._get_wrap_line_width = lambda: width_dpi - else: - txt = label_ax.text(0.5, 1, self.properties['title'], - horizontalalignment='center', size='large', - verticalalignment='top', - transform=label_ax.transAxes, - wrap=True) - # To be able to wrap to the left: - txt._get_wrap_line_width = lambda: width_dpi - - def plot_y_axis(self, ax, plot_axis): - if self.colormap is not None: - self.colormap.set_array([]) - - cobar = plt.colorbar(self.colormap, ax=ax, fraction=1, - orientation='vertical') - - cobar.solids.set_edgecolor("face") - cobar.ax.tick_params(labelsize='smaller') - cobar.ax.yaxis.set_ticks_position('left') - # adjust the labels of the colorbar - ticks = cobar.ax.get_yticks() - labels = cobar.ax.set_yticklabels(ticks.astype('float32')) - (vmin, vmax) = cobar.mappable.get_clim() - for idx in np.where(ticks == vmin)[0]: - # if the label is at the start of the colobar - # move it above avoid being cut or overlapping with other track - labels[idx].set_verticalalignment('bottom') - for idx in np.where(ticks == vmax)[0]: - # if the label is at the end of the colobar - # move it a bit inside to avoid overlapping - # with other labels - labels[idx].set_verticalalignment('top') - - def get_rgb(self, bed, param='color', default=DEFAULT_BED_COLOR): - """ - get the rgb value for the bed and the param given: - :param bed: - :param param: - :param default: the default value if it fails - :return: color - """ - rgb = self.properties[param] - - if self.colormap is not None and param in self.parametersUsingColormap: - # translate value field (in the example above is 0 or 0.2686...) - # into a color - rgb = self.colormap.to_rgba(bed.score) - elif self.properties[param] == 'bed_rgb': - # if rgb is set in the bed line, this overrides the previously - # defined colormap - if self.bed_type in ['bed9', 'bed12'] and len(bed.rgb) == 3: - try: - rgb = [float(x) / 255 for x in bed.rgb] - except IndexError: - rgb = default - else: - rgb = default - return rgb - - def draw_gene_simple(self, ax, bed, ypos, rgb, edgecolor, linewidth): - """ - draws an interval with direction (if given) - """ - - if bed.strand not in ['+', '-']: - ax.add_patch(Rectangle((bed.start, ypos), - bed.end - bed.start, 1, - edgecolor=edgecolor, facecolor=rgb, - linewidth=linewidth)) - else: - vertices = self._draw_arrow(bed.start, bed.end, bed.strand, ypos) - ax.add_patch(Polygon(vertices, closed=True, fill=True, - edgecolor=edgecolor, - facecolor=rgb, - linewidth=linewidth)) - - def draw_gene_with_introns_flybase_style(self, ax, bed, ypos, rgb, - edgecolor, linewidth): - """ - draws a gene like in flybase gbrowse. - """ - if bed.block_count == 0 and bed.thick_start == bed.start and \ - bed.thick_end == bed.end: - self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor, linewidth) - return - half_height = 1 / 2 - # draw 'backbone', a line from the start until the end of the gene - ax.plot([bed.start, bed.end], [ypos + half_height, ypos + half_height], - 'black', linewidth=linewidth, zorder=-1) - - # get start, end of all the blocks - positions = self._split_bed_to_blocks(bed) - - if bed.strand != '.': - # plot all blocks as rectangles except the last if the strand is + or - # the first is the strand is -, which are drawn as arrows. - if bed.strand == '-': - positions = positions[::-1] - - first_pos = positions.pop() - if first_pos[2] == 'UTR': - _rgb = self.get_rgb(bed, param='color_utr', default=rgb) - # The arrow will be centered on - # ypos + 1 / 2 - # The total height will be - # self.properties['height_utr'] - y0 = ypos + (1 - self.properties['height_utr']) / 2 - half_height = self.properties['height_utr'] / 2 - else: - _rgb = rgb - y0 = ypos - half_height = 1 / 2 - - vertices = self._draw_arrow(first_pos[0], first_pos[1], bed.strand, - y0, half_height) - - ax.add_patch(Polygon(vertices, closed=True, fill=True, - edgecolor=edgecolor, - facecolor=_rgb, - linewidth=linewidth)) - - for start_pos, end_pos, _type in positions: - if _type == 'UTR': - _rgb = self.get_rgb(bed, param='color_utr', default=rgb) - y0 = ypos + (1 - self.properties['height_utr']) / 2 - height = self.properties['height_utr'] - else: - _rgb = rgb - y0 = ypos - height = 1 - - vertices = [(start_pos, y0), (start_pos, y0 + height), - (end_pos, y0 + height), (end_pos, y0)] - - ax.add_patch(Polygon(vertices, closed=True, fill=True, - edgecolor=edgecolor, - facecolor=_rgb, - linewidth=linewidth)) - - def _draw_arrow(self, start, end, strand, ypos, half_height=None): - """ - Draws a filled arrow. - :param start: - :param end: - :param strand: - :param ypos: - :param half_height: - :return: None - """ - if half_height is None: - half_height = 1 / 2 - # The y values are common to both strands: - y0 = ypos - y1 = ypos + 2 * half_height - if strand == '+': - x0 = start - if self.properties['arrowhead_included']: - x1 = max(start, end - self.small_relative) - x2 = end - else: - x1 = end - x2 = end + self.small_relative - """ - The vertices correspond to 5 points along the path of a form like the following, - starting in the lower left corner and progressing in a clock wise manner. - - =================> - x0 x1 x2 - - """ - - vertices = [(x0, y0), (x0, y1), (x1, y1), - (x2, y0 + half_height), (x1, y0)] - - else: - if self.properties['arrowhead_included']: - x0 = min(end, start + self.small_relative) - xb = start - else: - x0 = start - xb = start - self.small_relative - x1 = end - """ - The vertices correspond to 5 points along the path of a form like the following, - starting in the lower left corner and progressing in a clock wise manner. - - <================= - xb x0 x1 - """ - vertices = [(x0, y0), (xb, y0 + half_height), (x0, y1), - (x1, y1), (x1, y0)] - - return vertices - - def _split_bed_to_blocks(self, bed): - """ - Split a bed entry into blocks to plot - :param bed: a namedtuple with at least 6 fields - :return: a list of tuple (start, end, type) with type in ['UTR', 'coding'] - """ - if self.bed_type != 'bed12': - # No thick_start, thick_end, block_count: - return [(bed.start, bed.end, 'coding')] - - # get start, end of all the blocks - positions = [] - for idx in range(0, bed.block_count): - # x0 and x1 are the start/end of the current block - x0 = bed.start + bed.block_starts[idx] - x1 = x0 + bed.block_sizes[idx] - # We deal with the special case where - # there is no coding independently - if bed.thick_start == bed.thick_end: - positions.append((x0, x1, 'UTR')) - continue - # If the beginning of the coding region - # is withing the current block - if x0 < bed.thick_start < x1: - # What is before is UTR - positions.append((x0, bed.thick_start, 'UTR')) - # The start of the interval is updated - x0 = bed.thick_start - - # If the end of the coding region - # is withing the current block - if x0 < bed.thick_end < x1: - # What is before is coding - positions.append((x0, bed.thick_end, 'coding')) - # The start of the interval is updated - x0 = bed.thick_end - - if x1 < bed.thick_start or x0 >= bed.thick_end: - type = 'UTR' - else: - type = 'coding' - - positions.append((x0, x1, type)) - - return positions - - def draw_gene_with_introns(self, ax, bed, ypos, rgb, edgecolor, linewidth): - """ - draws a gene like in UCSC - Except that for the moment no arrow are plotted - on the coding part - """ - - if bed.block_count == 0 and bed.thick_start == bed.start and bed.thick_end == bed.end: - self.draw_gene_simple(ax, bed, ypos, rgb, edgecolor, linewidth) - return - - # draw 'backbone', a line from the start until the end of the gene - ax.plot([bed.start, bed.end], [ypos + 1 / 2, ypos + 1 / 2], 'black', linewidth=linewidth, zorder=-1) - - for idx in range(0, bed.block_count): - x0 = bed.start + bed.block_starts[idx] - x1 = x0 + bed.block_sizes[idx] - if x1 < bed.thick_start or x0 > bed.thick_end or \ - bed.thick_start == bed.thick_end: - y0 = ypos + 1 / 4 - y1 = ypos + 3 / 4 - else: - y0 = ypos - y1 = ypos + 1 - - if x0 < bed.thick_start < x1 and x0 < bed.thick_end < x1: - vertices = ([(x0, ypos + 1 / 4), - (x0, ypos + 3 / 4), - (bed.thick_start, ypos + 3 / 4), - (bed.thick_start, ypos + 1), - (bed.thick_end, ypos + 1), - (bed.thick_end, ypos + 3 / 4), - (x1, ypos + 3 / 4), - (x1, ypos + 1 / 4), - (bed.thick_end, ypos + 1 / 4), - (bed.thick_end, ypos), - (bed.thick_start, ypos), - (bed.thick_start, ypos + 1 / 4)]) - elif x0 < bed.thick_start < x1: - vertices = ([(x0, ypos + 1 / 4), - (x0, ypos + 3 / 4), - (bed.thick_start, ypos + 3 / 4), - (bed.thick_start, ypos + 1), - (x1, ypos + 1), - (x1, ypos), - (bed.thick_start, ypos), - (bed.thick_start, ypos + 1 / 4)]) - elif x0 < bed.thick_end < x1: - vertices = ([(x0, ypos), - (x0, ypos + 1), - (bed.thick_end, ypos + 1), - (bed.thick_end, ypos + 3 / 4), - (x1, ypos + 3 / 4), - (x1, ypos + 1 / 4), - (bed.thick_end, ypos + 1 / 4), - (bed.thick_end, ypos)]) - else: - vertices = ([(x0, y0), (x0, y1), (x1, y1), (x1, y0)]) - - ax.add_patch(Polygon(vertices, closed=True, fill=True, - linewidth=linewidth, - edgecolor='none', - facecolor=rgb)) - - if idx < bed.block_count - 1: - # plot small arrows over the back bone - intron_length = bed.block_starts[idx + 1] - (bed.block_starts[idx] + bed.block_sizes[idx]) - arrow_interval = self.properties['arrow_interval'] - if intron_length > self.small_relative: - intron_center = x1 + int(intron_length) / 2 - pos = np.arange(x1 + 1 * self.small_relative, - x1 + intron_length + self.small_relative, - int(arrow_interval * self.small_relative)) - # center them - pos = pos + intron_center - pos.mean() - # plot them - for xpos in pos: - self._plot_small_arrow(ax, xpos, ypos, bed.strand) - - def draw_gene_tssarrow_style(self, ax, bed, ypos, rgb, linewidth): - """ - Draw genes like this: - --> - | - ----------- ^ --- - | | | | - ----------- --- - """ - # get start, end of all the blocks - positions = self._split_bed_to_blocks(bed) - - y_bottom = ypos + 1 - y_top_intron = ypos + 1 / 4 - - if bed.strand in ["+", "-"]: - if self.properties['arrow_length'] is None: - arrow_length = 10 * self.small_relative - else: - arrow_length = self.properties['arrow_length'] - y_arrow = ypos + 1 / 8 - head_width = 1 / 4 - head_length = self.small_relative * 3 - # plot the arrow to indicate tss - if bed.strand == "+": - x = bed.start - dx = arrow_length - else: - x = bed.end - dx = - arrow_length - # First plot the vertical line: - ax.add_line(Line2D((x, x), (y_bottom, y_arrow), color=rgb, linewidth=linewidth)) - # Then the arrow - ax.arrow(x, y_arrow, dx, 0, overhang=1, width=0, - head_width=head_width, - head_length=head_length, - length_includes_head=True, - color=rgb, linewidth=linewidth) - - # plot all blocks as rectangles like in the flybase mode but - # with half the height and no border - # as well as the junction between exons: - last_corner = None - for start_pos, end_pos, _type in positions: - if _type == 'UTR': - _rgb = self.get_rgb(bed, param='color_utr', default=rgb) - y0 = y_bottom - 1 / 2 * (1 - self.properties['height_utr']) / 2 - height = 1 / 2 * self.properties['height_utr'] - else: - _rgb = rgb - y0 = y_bottom - height = 1 / 2 - - vertices = [(start_pos, y0), (start_pos, y0 - height), - (end_pos, y0 - height), (end_pos, y0)] - - ax.add_patch(Polygon(vertices, closed=True, fill=True, - edgecolor="none", - facecolor=_rgb, - linewidth=linewidth)) - if last_corner is not None: - if last_corner[0] < start_pos: - xdata = (last_corner[0], (last_corner[0] + start_pos) / 2, - start_pos) - ydata = (last_corner[1], y_top_intron, - y0 - height) - ax.add_line(Line2D(xdata, ydata, color=last_corner[2], - linewidth=linewidth)) - - last_corner = (end_pos, y0 - height, _rgb) - - def plot_triangles(self, ax, genes_overlap): - """ - Plots the boundaries as triangles in the given ax. - """ - ymax = 0.001 - valid_regions = 0 - for region in genes_overlap: - """ - ______ y2 - "" - " " - " " - " "_____ y1 - _____________________ - x1 x2 x3 - - """ - x1 = region.begin - x2 = x1 + float(region.end - region.begin) / 2 - x3 = region.end - y1 = 0 - y2 = (region.end - region.begin) - - rgb = self.get_rgb(region.data) - edgecolor = self.get_rgb(region.data, param='border_color', default=rgb) - - triangle = Polygon([[x1, y1], [x2, y2], [x3, y1]], closed=True, - facecolor=rgb, edgecolor=edgecolor, linewidth=self.properties['line_width']) - ax.add_artist(triangle) - valid_regions += 1 - - if y2 > ymax: - ymax = y2 - - if valid_regions == 0: - self.log.warning(f"No regions found for section {self.properties['section_name']}.\n") - - if self.properties['orientation'] == 'inverted': - ax.set_ylim(ymax, 0) - else: - ax.set_ylim(0, ymax) - - def _plot_small_arrow(self, ax, xpos, ypos, strand): - """ - Draws a broken line with 2 parts: - For strand = +: > For strand = -: < - :param xpos: - :param ypos: - :param strand: - : - :return: None - """ - if strand == '.': - return - if strand == '+': - xdata = [xpos - self.small_relative / 4, - xpos + self.small_relative / 4, - xpos - self.small_relative / 4] - else: - xdata = [xpos + self.small_relative / 4, - xpos - self.small_relative / 4, - xpos + self.small_relative / 4] - ydata = [ypos + 1 / 4, - ypos + 1 / 2, - ypos + 3 / 4] - ax.add_line(Line2D(xdata, ydata, color='black', linewidth=self.properties['line_width'])) +""" + + DEFAULTS_PROPERTIES = dict({'max_value': None, + 'min_value': None}, + **BedGeneLikeTrack.DEFAULTS_PROPERTIES) + NECESSARY_PROPERTIES = BedGeneLikeTrack.NECESSARY_PROPERTIES + SYNONYMOUS_PROPERTIES = dict({'max_value': {'auto': None}, + 'min_value': {'auto': None}}, + **BedGeneLikeTrack.SYNONYMOUS_PROPERTIES) + POSSIBLE_PROPERTIES = BedGeneLikeTrack.POSSIBLE_PROPERTIES + BOOLEAN_PROPERTIES = BedGeneLikeTrack.BOOLEAN_PROPERTIES + STRING_PROPERTIES = BedGeneLikeTrack.STRING_PROPERTIES + FLOAT_PROPERTIES = dict({'max_value': [- np.inf, np.inf], + 'min_value': [- np.inf, np.inf]}, + **BedGeneLikeTrack.FLOAT_PROPERTIES) + INTEGER_PROPERTIES = BedGeneLikeTrack.INTEGER_PROPERTIES diff --git a/pygenometracks/tracks/EpilogosTrack.py b/pygenometracks/tracks/EpilogosTrack.py index 03984fcf..70272e8e 100644 --- a/pygenometracks/tracks/EpilogosTrack.py +++ b/pygenometracks/tracks/EpilogosTrack.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import division -from . BedGraphTrack import BedGraphTrack -from . GenomeTrack import GenomeTrack +from . BedGraphLikeTrack import BedGraphLikeTrack import json from matplotlib.patches import Rectangle from matplotlib.collections import PatchCollection @@ -9,7 +8,7 @@ import numpy as np -class EpilogosTrack(BedGraphTrack): +class EpilogosTrack(BedGraphLikeTrack): """ The data format for this type of track can be found at http://wiki.wubrowse.org/QuantitativeCategorySeries. @@ -22,7 +21,7 @@ class EpilogosTrack(BedGraphTrack): """ SUPPORTED_ENDINGS = ['.qcat', '.qcat.bgz'] TRACK_TYPE = 'epilogos' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" + OPTIONS_TXT = BedGraphLikeTrack.OPTIONS_TXT + f""" # The categories file should contain the color information for each category id # A categories file should look like: # {{ @@ -51,13 +50,8 @@ class EpilogosTrack(BedGraphTrack): FLOAT_PROPERTIES = {'height': [0, np.inf]} INTEGER_PROPERTIES = {} - def __init__(self, properties_dict): - GenomeTrack.__init__(self, properties_dict) - - self.load_file() - def set_properties_defaults(self): - GenomeTrack.set_properties_defaults(self) + BedGraphLikeTrack.set_properties_defaults(self) # load categories file if self.properties['categories_file'] is not None: with open(self.properties['categories_file']) as f: @@ -136,10 +130,3 @@ def plot(self, ax, chrom_region, start_region, end_region): if self.properties['orientation'] == 'inverted': ymin, ymax = ax.get_ylim() ax.set_ylim(ymax, ymin) - - def plot_y_axis(self, ax, plot_axis): - GenomeTrack.plot_y_axis(self, ax, plot_axis) - - def __del__(self): - if self.tbx is not None: - self.tbx.close() diff --git a/pygenometracks/tracks/GenomeTrack.py b/pygenometracks/tracks/GenomeTrack.py index 13591948..a5706949 100644 --- a/pygenometracks/tracks/GenomeTrack.py +++ b/pygenometracks/tracks/GenomeTrack.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from .. utilities import to_string, to_bytes +from .. utilities import to_string, to_bytes, TextWrapAxis import logging import numpy as np from matplotlib import colors as mc @@ -219,29 +219,20 @@ def untransform(value, transform, log_pseudocount): ax.set_xlim(0, 1) ax.patch.set_visible(False) - def plot_label(self, label_ax, width_dpi, h_align='left'): + def plot_label(self, label_ax, h_align='left'): if h_align == 'left': - label_ax.text(0.05, 0.5, self.properties['title'], - horizontalalignment='left', size='large', - verticalalignment='center', - transform=label_ax.transAxes, - wrap=True) + x_pos = 0.05 elif h_align == 'right': - txt = label_ax.text(1, 0.5, self.properties['title'], - horizontalalignment='right', size='large', - verticalalignment='center', - transform=label_ax.transAxes, - wrap=True) - # To be able to wrap to the left: - txt._get_wrap_line_width = lambda: width_dpi + x_pos = 1 else: - txt = label_ax.text(0.5, 0.5, self.properties['title'], - horizontalalignment='center', size='large', - verticalalignment='center', - transform=label_ax.transAxes, - wrap=True) - # To be able to wrap to the left: - txt._get_wrap_line_width = lambda: width_dpi + x_pos = 0.5 + label_ax.add_artist( + TextWrapAxis(label_ax, x_pos, 0.5, + self.properties['title'], + horizontalalignment=h_align, size='large', + verticalalignment='center', + transform=label_ax.transAxes, + wrap=True)) def process_type_for_coverage_track(self): default_plot_type = 'fill' diff --git a/pygenometracks/tracks/GtfTrack.py b/pygenometracks/tracks/GtfTrack.py index 3a1a5220..73d94f32 100644 --- a/pygenometracks/tracks/GtfTrack.py +++ b/pygenometracks/tracks/GtfTrack.py @@ -1,21 +1,15 @@ +from . BedGeneLikeTrack import BedGeneLikeTrack from . GenomeTrack import GenomeTrack -from . BedTrack import BedTrack +from . BedLikeTrack import AROUND_REGION from .. readGtf import ReadGtf from matplotlib import font_manager from .. utilities import temp_file_from_intersect -import numpy as np -DEFAULT_BED_COLOR = '#1f78b4' -DISPLAY_BED_VALID = ['collapsed', 'triangles', 'interleaved', 'stacked'] -DISPLAY_BED_SYNONYMOUS = {'interlaced': 'interleaved', 'domain': 'interleaved'} -DEFAULT_DISPLAY_BED = 'stacked' -AROUND_REGION = 100000 - -class GtfTrack(BedTrack): +class GtfTrack(BedGeneLikeTrack): SUPPORTED_ENDINGS = ['gtf', 'gtf.gz'] TRACK_TYPE = 'gtf' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" + OPTIONS_TXT = BedGeneLikeTrack.OPTIONS_TXT + f""" # By default the transcript_name is used. # If you want to use the gene_name: # prefered_name = gene_name @@ -24,132 +18,37 @@ class GtfTrack(BedTrack): # merge_transcripts = true # You can change the color of coding sequences by: color = darkblue -# height of track in cm -height = 5 -# whether printing the labels -labels = false -# optional: -# by default the labels are not printed if you have more than 60 features. -# to change it, just increase the value: -#max_labels = 60 -# optional: font size can be given to override the default size -fontsize = 10 -# optional: line_width -#line_width = 0.5 -# the display parameter defines how the gtf file is plotted. -# Default is 'stacked' where regions are plotted on different lines so -# we can see all regions and all labels. -# The other options are ['collapsed', 'interleaved', 'triangles'] -# These options assume that the regions do not overlap. -# `collapsed`: The gtf regions are plotted one after the other in one line. -# `interleaved`: The gtf regions are plotted in two lines, first up, then down, then up etc. -# optional, default is black. To remove the border, simply set 'border_color' to none -# Not used in tssarrow style -#border_color = black -# style to plot the genes when the display is not triangles -#style = UCSC -#style = flybase -#style = tssarrow -# maximum number of gene rows to be plotted. This -# field is useful to limit large number of close genes -# to be printed over many rows. When several images want -# to be combined this must be set to get equal size -# otherwise, on each image the height of each gene changes -#gene_rows = 10 -# by default the ymax is the number of -# rows occupied by the genes in the region plotted. However, -# by setting this option, the global maximum is used instead. -# This is useful to combine images that are all consistent and -# have the same number of rows. -#global_max_row = true -# If you want to plot all labels inside the plotting region: -#all_labels_inside = true -# If you want to display the name of the gene which goes over the plotted -# region in the right margin put: -#labels_in_margin = true -# if you use UCSC style, you can set the relative distance between 2 arrows on introns -# default is 2 -#arrow_interval = 2 -# if you use tssarrow style, you can choose the length of the arrow in bp -# (default is 4% of the plotted region) -#arrow_length = 5000 -# if you use flybase or tssarrow style, you can choose the color of non-coding intervals: -#color_utr = grey -# as well as the proportion between their height and the one of coding -# (by default they are the same height): -#height_utr = 1 -# By default, for oriented intervals in flybase style, -# or bed files with less than 12 columns, the arrowhead is added -# outside of the interval. -# If you want that the tip of the arrow correspond to -# the extremity of the interval use: -# arrowhead_included = true # optional. If not given is guessed from the file ending. file_type = {TRACK_TYPE} """ - DEFAULTS_PROPERTIES = {'fontsize': 12, - 'orientation': None, - 'color': DEFAULT_BED_COLOR, - 'border_color': 'black', - 'labels': True, - 'style': 'flybase', - 'display': DEFAULT_DISPLAY_BED, - 'line_width': 0.5, - 'max_labels': 60, - 'prefered_name': 'transcript_name', - 'merge_transcripts': False, - 'global_max_row': False, - 'gene_rows': None, - 'arrow_interval': 2, - 'arrowhead_included': False, - 'color_utr': 'grey', - 'height_utr': 1, - 'arrow_length': None, - 'region': None, # Cannot be set manually but is set by tracksClass - 'all_labels_inside': False, - 'labels_in_margin': False} - NECESSARY_PROPERTIES = ['file'] - SYNONYMOUS_PROPERTIES = {'display': DISPLAY_BED_SYNONYMOUS} - POSSIBLE_PROPERTIES = {'orientation': [None, 'inverted'], - 'style': ['flybase', 'UCSC', 'tssarrow'], - 'display': DISPLAY_BED_VALID} - BOOLEAN_PROPERTIES = ['labels', 'merge_transcripts', 'global_max_row', - 'arrowhead_included', 'all_labels_inside', - 'labels_in_margin'] - STRING_PROPERTIES = ['prefered_name', 'file', 'file_type', - 'overlay_previous', 'orientation', - 'title', 'style', 'color', 'border_color', - 'color_utr', 'display'] - FLOAT_PROPERTIES = {'fontsize': [0, np.inf], - 'line_width': [0, np.inf], - 'height': [0, np.inf], - 'height_utr': [0, 1]} - INTEGER_PROPERTIES = {'gene_rows': [0, np.inf], - 'max_labels': [0, np.inf], - 'arrow_interval': [1, np.inf], - 'arrow_length': [0, np.inf]} + DEFAULTS_PROPERTIES = dict({'prefered_name': 'transcript_name', + 'merge_transcripts': False}, + **BedGeneLikeTrack.DEFAULTS_PROPERTIES) + NECESSARY_PROPERTIES = BedGeneLikeTrack.NECESSARY_PROPERTIES + SYNONYMOUS_PROPERTIES = BedGeneLikeTrack.SYNONYMOUS_PROPERTIES + POSSIBLE_PROPERTIES = BedGeneLikeTrack.POSSIBLE_PROPERTIES + BOOLEAN_PROPERTIES = ['merge_transcripts'] + BedGeneLikeTrack.BOOLEAN_PROPERTIES + STRING_PROPERTIES = ['prefered_name'] + BedGeneLikeTrack.STRING_PROPERTIES + FLOAT_PROPERTIES = BedGeneLikeTrack.FLOAT_PROPERTIES + INTEGER_PROPERTIES = BedGeneLikeTrack.INTEGER_PROPERTIES def set_properties_defaults(self): - super(BedTrack, self).set_properties_defaults() + GenomeTrack.set_properties_defaults(self) self.fp = font_manager.FontProperties(size=self.properties['fontsize']) + # to set the distance between rows + self.row_scale = 2.3 + # For compatibility with other parent methods self.colormap = None - # check if the color given is a color map - # Contrary to bed it cannot be a colormap + # Contrary to bed it cannot be a colormap nor bed_rgb self.process_color('color', colormap_possible=False, bed_rgb_possible=False, default_value_is_colormap=False) # check if border_color and color_utr are colors - # if they are part of self.properties - # (for example, TADsTracks do not have color_utr) - for param in [p for p in ['border_color', 'color_utr'] - if p in self.properties]: + for param in ['border_color', 'color_utr']: self.process_color(param, bed_rgb_possible=False) - # to set the distance between rows - self.row_scale = 2.3 - def get_bed_handler(self, plot_regions=None): if not self.properties['global_max_row']: # I do the intersection: diff --git a/pygenometracks/tracks/NarrowPeakTrack.py b/pygenometracks/tracks/NarrowPeakTrack.py index fcecb4df..ef73f116 100644 --- a/pygenometracks/tracks/NarrowPeakTrack.py +++ b/pygenometracks/tracks/NarrowPeakTrack.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -from . GenomeTrack import GenomeTrack -from . BedGraphTrack import BedGraphTrack +from . BedGraphLikeTrack import BedGraphLikeTrack from matplotlib.patches import Rectangle from matplotlib.collections import PatchCollection @@ -12,10 +11,10 @@ DEFAULT_NARROWPEAK_COLOR = '#FF000080' # red, alpha=0.55 -class NarrowPeakTrack(BedGraphTrack): +class NarrowPeakTrack(BedGraphLikeTrack): SUPPORTED_ENDINGS = ['.narrowPeak'] TRACK_TYPE = 'narrow_peak' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" + OPTIONS_TXT = BedGraphLikeTrack.OPTIONS_TXT + f""" color = #FF000080 #max_value = 0.70 show_data_range = true @@ -61,12 +60,8 @@ class NarrowPeakTrack(BedGraphTrack): INTEGER_PROPERTIES = {} # color can only be a color - def __init__(self, properties_dict): - GenomeTrack.__init__(self, properties_dict) - self.load_file() - def set_properties_defaults(self): - GenomeTrack.set_properties_defaults(self) + BedGraphLikeTrack.set_properties_defaults(self) self.process_color('color') def peak_plot(self, start, end, height, center=None, width_adjust=1.5): diff --git a/pygenometracks/tracks/TADsTrack.py b/pygenometracks/tracks/TADsTrack.py index 09a88086..cd516c42 100644 --- a/pygenometracks/tracks/TADsTrack.py +++ b/pygenometracks/tracks/TADsTrack.py @@ -1,14 +1,12 @@ -from . BedTrack import BedTrack -from . GenomeTrack import GenomeTrack +from . BedLikeTrack import BedLikeTrack, AROUND_REGION +from .. utilities import change_chrom_names import numpy as np -DEFAULT_BED_COLOR = '#1f78b4' - -class TADsTrack(BedTrack): +class TADsTrack(BedLikeTrack): SUPPORTED_ENDINGS = ['.domain', '.domains', '.tad', '.tads'] TRACK_TYPE = 'domains' - OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f""" + OPTIONS_TXT = BedLikeTrack.OPTIONS_TXT + f""" # If the bed file contains a column for color (column 9), then this color can be used by # setting: #color = bed_rgb @@ -21,50 +19,47 @@ class TADsTrack(BedTrack): #max_value=100 # If the color is simply a color name, then this color is used and the score is not considered. color = darkblue -# optional: line_width -#line_width = 0.5 -# optional, default is black. To remove the border, simply set 'border_color' to none -#border_color = black # optional. If not given it is guessed from the file ending. file_type = {TRACK_TYPE} """ - DEFAULTS_PROPERTIES = {'orientation': None, - 'color': DEFAULT_BED_COLOR, - 'border_color': 'black', - 'line_width': 0.5, - # To remove in next 1.0 - 'prefered_name': 'transcript_name', - 'merge_transcripts': False, - # End to remove - 'max_value': None, - 'min_value': None, - 'region': None} # Cannot be set manually but is set by tracksClass - NECESSARY_PROPERTIES = ['file'] - SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None}, - 'min_value': {'auto': None}} - POSSIBLE_PROPERTIES = {'orientation': [None, 'inverted']} - # In next 1.0: BOOLEAN_PROPERTIES = [] - BOOLEAN_PROPERTIES = ['merge_transcripts'] - STRING_PROPERTIES = ['file', 'file_type', - 'overlay_previous', 'orientation', - 'title', 'color', 'border_color', - # To remove in next 1.0 - 'prefered_name'] - FLOAT_PROPERTIES = {'max_value': [- np.inf, np.inf], - 'min_value': [- np.inf, np.inf], - 'fontsize': [0, np.inf], - 'line_width': [0, np.inf], - 'height': [0, np.inf]} - INTEGER_PROPERTIES = {} + DEFAULTS_PROPERTIES = dict({'max_value': None, + 'min_value': None}, + **BedLikeTrack.DEFAULTS_PROPERTIES) + NECESSARY_PROPERTIES = BedLikeTrack.NECESSARY_PROPERTIES + SYNONYMOUS_PROPERTIES = dict({'max_value': {'auto': None}, + 'min_value': {'auto': None}}, + **BedLikeTrack.SYNONYMOUS_PROPERTIES) + POSSIBLE_PROPERTIES = BedLikeTrack.POSSIBLE_PROPERTIES + BOOLEAN_PROPERTIES = BedLikeTrack.BOOLEAN_PROPERTIES + STRING_PROPERTIES = BedLikeTrack.STRING_PROPERTIES + FLOAT_PROPERTIES = dict({'max_value': [- np.inf, np.inf], + 'min_value': [- np.inf, np.inf]}, + **BedLikeTrack.FLOAT_PROPERTIES) + INTEGER_PROPERTIES = BedLikeTrack.INTEGER_PROPERTIES # The color can be a color or a colormap if bed_type is bed12 or 'bed_rgb' - # border_color can only be a color def __init__(self, *args, **kwarg): super(TADsTrack, self).__init__(*args, **kwarg) self.properties['display'] = 'triangles' - def set_properties_defaults(self): - self.properties['fontsize'] = 12 - super(TADsTrack, self).set_properties_defaults() - self.properties['global_max_row'] = False + def plot(self, ax, chrom_region, start_region, end_region): + if chrom_region not in self.interval_tree.keys(): + chrom_region_before = chrom_region + chrom_region = change_chrom_names(chrom_region) + if chrom_region not in self.interval_tree.keys(): + self.log.warning("*Warning*\nNo interval was found when " + "overlapping with both " + f"{chrom_region_before}:{start_region - AROUND_REGION}-{end_region + AROUND_REGION}" + f" and {chrom_region}:{start_region - AROUND_REGION}-{end_region + AROUND_REGION}" + " inside the bed file. " + "This will generate an empty track!!\n") + return + chrom_region = self.check_chrom_str_bytes(self.interval_tree, + chrom_region) + + genes_overlap = \ + sorted(self.interval_tree[chrom_region][start_region:end_region]) + + if self.properties['display'] == 'triangles': + self.plot_triangles(ax, genes_overlap) diff --git a/pygenometracks/tracksClass.py b/pygenometracks/tracksClass.py index 0c975b88..75c02dc0 100644 --- a/pygenometracks/tracksClass.py +++ b/pygenometracks/tracksClass.py @@ -41,7 +41,7 @@ DEFAULT_FIGURE_WIDTH = 40 # in centimeters # proportion of width dedicated to (figure, legends) DEFAULT_WIDTH_RATIOS = (0.01, 0.90, 0.1) -DEFAULT_MARGINS = {'left': 0.04, 'right': 0.92, 'bottom': 0.03, 'top': 0.97} +DEFAULT_MARGINS = {'left': 0.04, 'right': 0.98, 'bottom': 0.03, 'top': 0.97} class MultiDict(OrderedDict): @@ -86,7 +86,7 @@ def __init__(self, tracks_file, fig_width=DEFAULT_FIGURE_WIDTH, if track_label_width is None: self.width_ratios = DEFAULT_WIDTH_RATIOS else: - self.width_ratios = (0.01, + self.width_ratios = (DEFAULT_WIDTH_RATIOS[0], 1 - track_label_width, track_label_width) @@ -253,10 +253,6 @@ def plot(self, file_name, chrom, start, end, title=None, label_axis = plt.subplot(grids[idx, 2]) label_axis.set_axis_off() - # I get the width of the label_axis to be able to wrap the - # labels when right or center aligned. - width_inch = label_axis.get_window_extent().width - width_dpi = width_inch * self.dpi / fig.dpi if decreasing_x_axis: plot_axis.set_xlim(end, start) @@ -264,8 +260,7 @@ def plot(self, file_name, chrom, start, end, title=None, plot_axis.set_xlim(start, end) track.plot(plot_axis, chrom, start, end) track.plot_y_axis(y_axis, plot_axis) - track.plot_label(label_axis, width_dpi=width_dpi, - h_align=h_align_titles) + track.plot_label(label_axis, h_align=h_align_titles) if track.properties['overlay_previous'] == 'share-y': plot_axis.set_ylim(ylim) diff --git a/pygenometracks/utilities.py b/pygenometracks/utilities.py index abde263c..ff16fc86 100644 --- a/pygenometracks/utilities.py +++ b/pygenometracks/utilities.py @@ -7,6 +7,9 @@ import pybedtools import tempfile import warnings +import matplotlib as mpl +import matplotlib.pyplot as plt +import math import logging @@ -16,6 +19,85 @@ log.setLevel(logging.DEBUG) +# This is a class for text which +# enable to wrap within an axis +# (the default plt.Text only wrap +# within a figure) +class TextWrapAxis(plt.Text): + + def __init__(self, axis, *args, **kwargs): + super().__init__(*args, **kwargs) + self.axis = axis + + def _get_wrap_line_width(self): + """ + Return the maximum line width for wrapping text based on the current + orientation. + """ + x0, y0 = self.get_transform().transform(self.get_position()) + # In the original plt.Text it is: + # figure_box = self.get_figure().get_window_extent() + # In this Class we use the axis box: + axis_box = self.axis.get_window_extent() + # Calculate available width based on text alignment + alignment = self.get_horizontalalignment() + self.set_rotation_mode('anchor') + rotation = self.get_rotation() + + # In the original plt.Text it was distance to figure_box + left = self._get_dist_to_box(rotation, x0, y0, axis_box) + right = self._get_dist_to_box( + (180 + rotation) % 360, x0, y0, axis_box) + if alignment == 'left': + line_width = left + elif alignment == 'right': + line_width = right + else: + line_width = 2 * min(left, right) + + return line_width + + def _get_dist_to_box(self, rotation, x0, y0, figure_box): + """ + Return the distance from the given points to the boundaries of a + rotated box, in pixels. + """ + + # In the original function, it was assumed that + # figure_box.x0 = 0 and figure_box.y0 = 0 + # This is now added. + # There is an issue because + # math.cos(math.radians(90)) gives 6.123233995736766e-17 + # Thus if rotation = 0 and figure_box.y1 = y0 + # _get_dist_to_box returns 0 + # I add round values of rotation: + if rotation == 0: + return(figure_box.x1 - x0) + elif rotation == 90: + return(figure_box.y1 - y0) + elif rotation == 180: + return(x0 - figure_box.x0) + elif rotation == 270: + return(y0 - figure_box.y0) + elif rotation > 270: + quad = rotation - 270 + h1 = (y0 - figure_box.y0) / math.cos(math.radians(quad)) + h2 = (figure_box.x1 - x0) / math.cos(math.radians(90 - quad)) + elif rotation > 180: + quad = rotation - 180 + h1 = (x0 - figure_box.x0) / math.cos(math.radians(quad)) + h2 = (y0 - figure_box.y0) / math.cos(math.radians(90 - quad)) + elif rotation > 90: + quad = rotation - 90 + h1 = (figure_box.y1 - y0) / math.cos(math.radians(quad)) + h2 = (x0 - figure_box.x0) / math.cos(math.radians(90 - quad)) + else: + h1 = (figure_box.x1 - x0) / math.cos(math.radians(rotation)) + h2 = (figure_box.y1 - y0) / math.cos(math.radians(90 - rotation)) + + return min(h1, h2) + + class InputError(Exception): """Exception raised for errors in the input.""" pass