2
2
3
3
http://wiki.galaxyproject.org/Admin/Data%20Integration
4
4
"""
5
+ import os
6
+ import shutil
7
+ import subprocess
5
8
from xml .etree import ElementTree
6
9
7
- from fabric .api import *
8
- from fabric .contrib .files import *
10
+ from cloudbio .custom import shared
9
11
10
12
# ## Compatibility definitions
11
13
@@ -43,7 +45,7 @@ def __init__(self, config, dbkey, file_path):
43
45
self .dbkey1 = config .get ('index' , dbkey )
44
46
self .dbkey2 = config .get ('index' , dbkey )
45
47
46
- def _get_tool_conf (tool_name ):
48
+ def _get_tool_conf (env , tool_name ):
47
49
"""
48
50
Parse the tool_data_table_conf.xml from installed_files subfolder and extract
49
51
values for the 'columns' tag and 'path' parameter for the 'file' tag, returning
@@ -58,12 +60,12 @@ def _get_tool_conf(tool_name):
58
60
tool_conf ['file' ] = t .find ('file' ).attrib .get ('path' , '' )
59
61
return tool_conf
60
62
61
- def _build_galaxy_loc_line (dbkey , file_path , config , prefix , tool_name ):
63
+ def _build_galaxy_loc_line (env , dbkey , file_path , config , prefix , tool_name ):
62
64
"""Prepare genome information to write to a Galaxy *.loc config file.
63
65
"""
64
66
if tool_name :
65
67
str_parts = []
66
- tool_conf = _get_tool_conf (tool_name )
68
+ tool_conf = _get_tool_conf (env , tool_name )
67
69
loc_cols = LocCols (config , dbkey , file_path )
68
70
# Compose the .loc file line as str_parts list by looking for column values
69
71
# from the retrieved tool_conf (as defined in tool_data_table_conf.xml).
@@ -77,24 +79,30 @@ def _build_galaxy_loc_line(dbkey, file_path, config, prefix, tool_name):
77
79
str_parts .insert (0 , prefix )
78
80
return str_parts
79
81
80
- def update_loc_file (ref_file , line_parts ):
82
+ def update_loc_file (env , ref_file , line_parts ):
81
83
"""Add a reference to the given genome to the base index file.
82
84
"""
83
85
if getattr (env , "galaxy_home" , None ) is not None :
84
86
tools_dir = os .path .join (env .galaxy_home , "tool-data" )
85
- if not env . safe_exists (tools_dir ):
86
- env . safe_run ("mkdir -p %s" % tools_dir )
87
+ if not os . path . exists (tools_dir ):
88
+ subprocess . check_call ("mkdir -p %s" % tools_dir , shell = True )
87
89
dt_file = os .path .join (env .galaxy_home , "tool_data_table_conf.xml" )
88
- if not env . safe_exists (dt_file ):
89
- env . safe_put (env .tool_data_table_conf_file , dt_file )
90
+ if not os . path . exists (dt_file ):
91
+ shutil . copy (env .tool_data_table_conf_file , dt_file )
90
92
add_str = "\t " .join (line_parts )
91
- with cd (tools_dir ):
92
- if not env .safe_exists (ref_file ):
93
- env .safe_run ("touch %s" % ref_file )
94
- if not env .safe_contains (ref_file , add_str ):
95
- env .safe_append (ref_file , add_str )
96
-
97
- def prep_locs (gid , indexes , config ):
93
+ with shared .chdir (tools_dir ):
94
+ if not os .path .exists (ref_file ):
95
+ subprocess .check_call ("touch %s" % ref_file , shell = True )
96
+ has_line = False
97
+ with open (ref_file ) as in_handle :
98
+ for line in in_handle :
99
+ if line .strip () == add_str .strip ():
100
+ has_line = True
101
+ if not has_line :
102
+ with open (ref_file , "a" ) as out_handle :
103
+ out_handle .write (line + "\n " )
104
+
105
+ def prep_locs (env , gid , indexes , config ):
98
106
"""Prepare Galaxy location files for all available indexes.
99
107
"""
100
108
for ref_index_file , cur_index , prefix , tool_name in [
@@ -109,27 +117,27 @@ def prep_locs(gid, indexes, config):
109
117
("bwa_index.loc" , indexes .get ("bwa" , None ), "" , 'bwa_indexes' ),
110
118
("novoalign_indices.loc" , indexes .get ("novoalign" , None ), "" , "novoalign_indexes" )]:
111
119
if cur_index :
112
- str_parts = _build_galaxy_loc_line (gid , cur_index , config , prefix , tool_name )
113
- update_loc_file (ref_index_file , str_parts )
120
+ str_parts = _build_galaxy_loc_line (env , gid , cur_index , config , prefix , tool_name )
121
+ update_loc_file (env , ref_index_file , str_parts )
114
122
115
123
# ## Finalize downloads
116
124
117
125
def index_picard (ref_file ):
118
126
"""Provide a Picard style dict index file for a reference genome.
119
127
"""
120
128
index_file = "%s.dict" % os .path .splitext (ref_file )[0 ]
121
- if not env . safe_exists (index_file ):
122
- env . safe_run ("picard -Xms500m -Xmx3500m CreateSequenceDictionary REFERENCE={ref} OUTPUT={out}"
123
- .format (ref = ref_file , out = index_file ))
129
+ if not os . path . exists (index_file ):
130
+ subprocess . check_call ("picard -Xms500m -Xmx3500m CreateSequenceDictionary REFERENCE={ref} OUTPUT={out}"
131
+ .format (ref = ref_file , out = index_file ), shell = True )
124
132
return index_file
125
133
126
134
def _finalize_index_seq (fname ):
127
135
"""Convert UCSC 2bit file into fasta file.
128
136
"""
129
137
out_fasta = fname + ".fa"
130
- if not env . safe_exists (out_fasta ):
131
- env . safe_run ("twoBitToFa {base}.2bit {out}" .format (
132
- base = fname , out = out_fasta ))
138
+ if not os . path . exists (out_fasta ):
139
+ subprocess . check_call ("twoBitToFa {base}.2bit {out}" .format (
140
+ base = fname , out = out_fasta ), shell = True )
133
141
134
142
finalize_fns = {"ucsc" : _finalize_index_seq ,
135
143
"seq" : index_picard }
@@ -159,8 +167,8 @@ def _get_galaxy_genomes(gid, genome_dir, genomes, genome_indexes):
159
167
"""
160
168
out = {}
161
169
org_dir = os .path .join (genome_dir , gid )
162
- if not env . safe_exists (org_dir ):
163
- env . safe_run ('mkdir -p %s' % org_dir )
170
+ if not os . path . exists (org_dir ):
171
+ subprocess . check_call ('mkdir -p %s' % org_dir , shell = True )
164
172
for idx in genome_indexes :
165
173
galaxy_index_name = index_map .get (idx )
166
174
index_file = None
@@ -176,29 +184,32 @@ def _rsync_genome_index(gid, idx, org_dir):
176
184
"""Retrieve index for a genome from rsync server, returning path to files.
177
185
"""
178
186
idx_dir = os .path .join (org_dir , idx )
179
- if not env . safe_exists (idx_dir ):
187
+ if not os . path . exists (idx_dir ):
180
188
org_rsync = None
181
189
for subdir in galaxy_subdirs :
182
190
test_rsync = "{server}/indexes{subdir}/{gid}/{idx}/" .format (
183
191
server = server , subdir = subdir , gid = gid , idx = idx )
184
- with quiet ():
185
- check_dir = env .safe_run ("rsync --list-only {server}" .format (server = test_rsync ))
186
- if check_dir .succeeded :
192
+ try :
193
+ subprocess .check_output ("rsync --list-only {server}" .format (server = test_rsync ))
187
194
org_rsync = test_rsync
188
- break
195
+ except subprocess .CalledProcessError :
196
+ pass
189
197
if org_rsync is None :
190
198
raise ValueError ("Could not find genome %s on Galaxy rsync" % gid )
191
- with quiet ():
192
- check_dir = env .safe_run ("rsync --list-only {server}" .format (server = org_rsync ))
193
- if check_dir .succeeded :
194
- if not env .safe_exists (idx_dir ):
195
- env .safe_run ('mkdir -p %s' % idx_dir )
199
+ try :
200
+ subprocess .check_call ("rsync --list-only {server}" .format (server = org_rsync ), shell = True )
201
+ if not os .path .exists (idx_dir ):
202
+ subprocess .check_call ('mkdir -p %s' % idx_dir , shell = True )
196
203
with cd (idx_dir ):
197
- env .safe_run ("rsync -avzP {server} {idx_dir}" .format (server = org_rsync ,
198
- idx_dir = idx_dir ))
199
- if env .safe_exists (idx_dir ):
200
- with quiet ():
201
- has_fa_ext = env .safe_run ("ls {idx_dir}/{gid}.fa*" .format (idx_dir = idx_dir ,
202
- gid = gid ))
203
- ext = ".fa" if (has_fa_ext .succeeded and idx not in ["seq" ]) else ""
204
+ subprocess .check_call ("rsync -avzP {server} {idx_dir}" .format (server = org_rsync ,
205
+ idx_dir = idx_dir ), shell = True )
206
+ except subprocess .CalledProcessError :
207
+ pass
208
+ if os .path .exists (idx_dir ):
209
+ try :
210
+ subprocess .check_call ("ls {idx_dir}/{gid}.fa*" .format (idx_dir = idx_dir ,
211
+ gid = gid ), shell = True )
212
+ ext = ".fa" if (has_fa_ext .succeeded and idx not in ["seq" ]) else ""
213
+ except subprocess .CalledProcessError :
214
+ pass
204
215
return os .path .join (idx_dir , gid + ext )
0 commit comments