diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..720fdd6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:20.04 +ENV PIP_BREAK_SYSTEM_PACKAGES 1 +ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +RUN apt-get update +RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata parallel +RUN apt install --assume-yes git python3 python3-pip pkg-config build-essential wget +RUN pip3 install matplotlib +RUN git clone https://github.com/cgroza/graph_peak_caller.git \ +RUN cd graph_peak_caller && pip3 install . + +RUN wget https://github.com/vgteam/vg/releases/download/v1.58.0/vg -O /usr/bin/vg +RUN chmod +x /usr/bin/vg + +ENTRYPOINT [ "/bin/bash", "-l", "-c" ] diff --git a/graph_peak_caller/analysis/util.py b/graph_peak_caller/analysis/util.py index 11379ba..ff0c6e7 100644 --- a/graph_peak_caller/analysis/util.py +++ b/graph_peak_caller/analysis/util.py @@ -46,6 +46,9 @@ def get_linear_paths_in_graph(ob_graph, vg_graph, write_to_file_name=None): intervals = {} for path in vg_graph.paths: obg_interval = path.to_obg(ob_graph=ob_graph) + if not obg_interval: + logging.info("OBG interval for path " + path.name + " is False. Skipping.") + continue obg_interval.name = path.name print("Path name: %s" % path.name) intervals[obg_interval.name] = obg_interval diff --git a/graph_peak_caller/callpeaks.py b/graph_peak_caller/callpeaks.py index affd1e5..078bf89 100644 --- a/graph_peak_caller/callpeaks.py +++ b/graph_peak_caller/callpeaks.py @@ -191,7 +191,7 @@ def __get_max_paths(self): assert max_path.length() >= 0, "Max path %s has negative length" % max_path if max_path.length() == 0: logging.warning("Max path has 0 length: %s" % max_path) - max_path.set_score(0) + max_path.set_score(float(0.0)) continue score = np.max(self.q_values.get_interval_values(max_path)) diff --git a/graph_peak_caller/control/linearmap.py b/graph_peak_caller/control/linearmap.py index 9bae0f6..dc8312b 100644 --- a/graph_peak_caller/control/linearmap.py +++ b/graph_peak_caller/control/linearmap.py @@ -120,7 +120,10 @@ def to_file(self, filename): def find_starts(graph, node_ids=None): if node_ids is None: node_ids = list(graph.get_topological_sorted_node_ids()) - max_dists = np.zeros(len(node_ids)) + # this does not work for node id spaces that are not compactec + # max_dists = np.zeros(len(node_ids)) + # find size of array from the largest named node instead + max_dists = np.zeros(int(graph.get_sorted_node_ids()[-1])) n_processed = 0 for node_id in node_ids: if n_processed % 500000 == 0: @@ -139,7 +142,10 @@ def find_ends(graph, node_ids=None): adj_list = graph.reverse_adj_list if node_ids is None: node_ids = list(graph.get_sorted_node_ids(reverse=True)) - max_dists = np.zeros(len(node_ids)) + # this does not work for node id spaces that are not compactec + # max_dists = np.zeros(len(node_ids)) + # find size of array from the largest named node instead + max_dists = np.zeros(int(graph.get_sorted_node_ids()[-1])) n_processed = 0 for node_id in node_ids: if n_processed % 500000 == 0: diff --git a/graph_peak_caller/legacy/pvalues.py b/graph_peak_caller/legacy/pvalues.py index fb0823d..3c6a8ad 100644 --- a/graph_peak_caller/legacy/pvalues.py +++ b/graph_peak_caller/legacy/pvalues.py @@ -127,6 +127,6 @@ def translation(x): logging.error("P value not found in mapping dict. Could be due to rounding errors.") return self.p_to_q_values[x] - trans = np.vectorize(translation, otypes=[np.float]) + trans = np.vectorize(translation, otypes=[float]) new_values = trans(p_values) return new_values diff --git a/graph_peak_caller/sparsediffs.py b/graph_peak_caller/sparsediffs.py index 827a738..eccfa24 100644 --- a/graph_peak_caller/sparsediffs.py +++ b/graph_peak_caller/sparsediffs.py @@ -62,7 +62,7 @@ def threshold_copy(self, cutoff): return new def to_dense_pileup(self, size): - if self.values.dtype == np.bool: + if self.values.dtype == bool: values = self.values.astype("int") else: values = self.values diff --git a/graph_peak_caller/sparsepvalues.py b/graph_peak_caller/sparsepvalues.py index 3e39969..cf63873 100644 --- a/graph_peak_caller/sparsepvalues.py +++ b/graph_peak_caller/sparsepvalues.py @@ -121,5 +121,5 @@ def get_q_values(self): def get_q_array_from_p_array(self, p_values): assert isinstance(p_values, np.ndarray) - trans = np.vectorize(self.p_to_q_values.get, otypes=[np.float]) + trans = np.vectorize(self.p_to_q_values.get, otypes=[float]) return trans(p_values)