Skip to content

Commit 9fefcc2

Browse files
committed
Remove fabric requirement for data based installs
Complements previous change (5fa9af7) to allow a full install of tools and data with requiring fabric. We now install only locally and use standard python tools like subprocess for execution. Makes code base ready for conversion to also support py3.
1 parent 5fa9af7 commit 9fefcc2

File tree

6 files changed

+416
-323
lines changed

6 files changed

+416
-323
lines changed

cloudbio/biodata/galaxy.py

Lines changed: 55 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
33
http://wiki.galaxyproject.org/Admin/Data%20Integration
44
"""
5+
import os
6+
import shutil
7+
import subprocess
58
from xml.etree import ElementTree
69

7-
from fabric.api import *
8-
from fabric.contrib.files import *
10+
from cloudbio.custom import shared
911

1012
# ## Compatibility definitions
1113

@@ -43,7 +45,7 @@ def __init__(self, config, dbkey, file_path):
4345
self.dbkey1 = config.get('index', dbkey)
4446
self.dbkey2 = config.get('index', dbkey)
4547

46-
def _get_tool_conf(tool_name):
48+
def _get_tool_conf(env, tool_name):
4749
"""
4850
Parse the tool_data_table_conf.xml from installed_files subfolder and extract
4951
values for the 'columns' tag and 'path' parameter for the 'file' tag, returning
@@ -58,12 +60,12 @@ def _get_tool_conf(tool_name):
5860
tool_conf['file'] = t.find('file').attrib.get('path', '')
5961
return tool_conf
6062

61-
def _build_galaxy_loc_line(dbkey, file_path, config, prefix, tool_name):
63+
def _build_galaxy_loc_line(env, dbkey, file_path, config, prefix, tool_name):
6264
"""Prepare genome information to write to a Galaxy *.loc config file.
6365
"""
6466
if tool_name:
6567
str_parts = []
66-
tool_conf = _get_tool_conf(tool_name)
68+
tool_conf = _get_tool_conf(env, tool_name)
6769
loc_cols = LocCols(config, dbkey, file_path)
6870
# Compose the .loc file line as str_parts list by looking for column values
6971
# from the retrieved tool_conf (as defined in tool_data_table_conf.xml).
@@ -77,24 +79,30 @@ def _build_galaxy_loc_line(dbkey, file_path, config, prefix, tool_name):
7779
str_parts.insert(0, prefix)
7880
return str_parts
7981

80-
def update_loc_file(ref_file, line_parts):
82+
def update_loc_file(env, ref_file, line_parts):
8183
"""Add a reference to the given genome to the base index file.
8284
"""
8385
if getattr(env, "galaxy_home", None) is not None:
8486
tools_dir = os.path.join(env.galaxy_home, "tool-data")
85-
if not env.safe_exists(tools_dir):
86-
env.safe_run("mkdir -p %s" % tools_dir)
87+
if not os.path.exists(tools_dir):
88+
subprocess.check_call("mkdir -p %s" % tools_dir, shell=True)
8789
dt_file = os.path.join(env.galaxy_home, "tool_data_table_conf.xml")
88-
if not env.safe_exists(dt_file):
89-
env.safe_put(env.tool_data_table_conf_file, dt_file)
90+
if not os.path.exists(dt_file):
91+
shutil.copy(env.tool_data_table_conf_file, dt_file)
9092
add_str = "\t".join(line_parts)
91-
with cd(tools_dir):
92-
if not env.safe_exists(ref_file):
93-
env.safe_run("touch %s" % ref_file)
94-
if not env.safe_contains(ref_file, add_str):
95-
env.safe_append(ref_file, add_str)
96-
97-
def prep_locs(gid, indexes, config):
93+
with shared.chdir(tools_dir):
94+
if not os.path.exists(ref_file):
95+
subprocess.check_call("touch %s" % ref_file, shell=True)
96+
has_line = False
97+
with open(ref_file) as in_handle:
98+
for line in in_handle:
99+
if line.strip() == add_str.strip():
100+
has_line = True
101+
if not has_line:
102+
with open(ref_file, "a") as out_handle:
103+
out_handle.write(line + "\n")
104+
105+
def prep_locs(env, gid, indexes, config):
98106
"""Prepare Galaxy location files for all available indexes.
99107
"""
100108
for ref_index_file, cur_index, prefix, tool_name in [
@@ -109,27 +117,27 @@ def prep_locs(gid, indexes, config):
109117
("bwa_index.loc", indexes.get("bwa", None), "", 'bwa_indexes'),
110118
("novoalign_indices.loc", indexes.get("novoalign", None), "", "novoalign_indexes")]:
111119
if cur_index:
112-
str_parts = _build_galaxy_loc_line(gid, cur_index, config, prefix, tool_name)
113-
update_loc_file(ref_index_file, str_parts)
120+
str_parts = _build_galaxy_loc_line(env, gid, cur_index, config, prefix, tool_name)
121+
update_loc_file(env, ref_index_file, str_parts)
114122

115123
# ## Finalize downloads
116124

117125
def index_picard(ref_file):
118126
"""Provide a Picard style dict index file for a reference genome.
119127
"""
120128
index_file = "%s.dict" % os.path.splitext(ref_file)[0]
121-
if not env.safe_exists(index_file):
122-
env.safe_run("picard -Xms500m -Xmx3500m CreateSequenceDictionary REFERENCE={ref} OUTPUT={out}"
123-
.format(ref=ref_file, out=index_file))
129+
if not os.path.exists(index_file):
130+
subprocess.check_call("picard -Xms500m -Xmx3500m CreateSequenceDictionary REFERENCE={ref} OUTPUT={out}"
131+
.format(ref=ref_file, out=index_file), shell=True)
124132
return index_file
125133

126134
def _finalize_index_seq(fname):
127135
"""Convert UCSC 2bit file into fasta file.
128136
"""
129137
out_fasta = fname + ".fa"
130-
if not env.safe_exists(out_fasta):
131-
env.safe_run("twoBitToFa {base}.2bit {out}".format(
132-
base=fname, out=out_fasta))
138+
if not os.path.exists(out_fasta):
139+
subprocess.check_call("twoBitToFa {base}.2bit {out}".format(
140+
base=fname, out=out_fasta), shell=True)
133141

134142
finalize_fns = {"ucsc": _finalize_index_seq,
135143
"seq": index_picard}
@@ -159,8 +167,8 @@ def _get_galaxy_genomes(gid, genome_dir, genomes, genome_indexes):
159167
"""
160168
out = {}
161169
org_dir = os.path.join(genome_dir, gid)
162-
if not env.safe_exists(org_dir):
163-
env.safe_run('mkdir -p %s' % org_dir)
170+
if not os.path.exists(org_dir):
171+
subprocess.check_call('mkdir -p %s' % org_dir, shell=True)
164172
for idx in genome_indexes:
165173
galaxy_index_name = index_map.get(idx)
166174
index_file = None
@@ -176,29 +184,32 @@ def _rsync_genome_index(gid, idx, org_dir):
176184
"""Retrieve index for a genome from rsync server, returning path to files.
177185
"""
178186
idx_dir = os.path.join(org_dir, idx)
179-
if not env.safe_exists(idx_dir):
187+
if not os.path.exists(idx_dir):
180188
org_rsync = None
181189
for subdir in galaxy_subdirs:
182190
test_rsync = "{server}/indexes{subdir}/{gid}/{idx}/".format(
183191
server=server, subdir=subdir, gid=gid, idx=idx)
184-
with quiet():
185-
check_dir = env.safe_run("rsync --list-only {server}".format(server=test_rsync))
186-
if check_dir.succeeded:
192+
try:
193+
subprocess.check_output("rsync --list-only {server}".format(server=test_rsync))
187194
org_rsync = test_rsync
188-
break
195+
except subprocess.CalledProcessError:
196+
pass
189197
if org_rsync is None:
190198
raise ValueError("Could not find genome %s on Galaxy rsync" % gid)
191-
with quiet():
192-
check_dir = env.safe_run("rsync --list-only {server}".format(server=org_rsync))
193-
if check_dir.succeeded:
194-
if not env.safe_exists(idx_dir):
195-
env.safe_run('mkdir -p %s' % idx_dir)
199+
try:
200+
subprocess.check_call("rsync --list-only {server}".format(server=org_rsync), shell=True)
201+
if not os.path.exists(idx_dir):
202+
subprocess.check_call('mkdir -p %s' % idx_dir, shell=True)
196203
with cd(idx_dir):
197-
env.safe_run("rsync -avzP {server} {idx_dir}".format(server=org_rsync,
198-
idx_dir=idx_dir))
199-
if env.safe_exists(idx_dir):
200-
with quiet():
201-
has_fa_ext = env.safe_run("ls {idx_dir}/{gid}.fa*".format(idx_dir=idx_dir,
202-
gid=gid))
203-
ext = ".fa" if (has_fa_ext.succeeded and idx not in ["seq"]) else ""
204+
subprocess.check_call("rsync -avzP {server} {idx_dir}".format(server=org_rsync,
205+
idx_dir=idx_dir), shell=True)
206+
except subprocess.CalledProcessError:
207+
pass
208+
if os.path.exists(idx_dir):
209+
try:
210+
subprocess.check_call("ls {idx_dir}/{gid}.fa*".format(idx_dir=idx_dir,
211+
gid=gid), shell=True)
212+
ext = ".fa" if (has_fa_ext.succeeded and idx not in ["seq"]) else ""
213+
except subprocess.CalledProcessError:
214+
pass
204215
return os.path.join(idx_dir, gid + ext)

0 commit comments

Comments
 (0)