Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 60 additions & 2 deletions bulk_webincli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from joblib import Parallel, delayed
from datetime import datetime
from pathlib import Path
import fnmatch
import os
import multiprocessing
import glob

# Mapping the field names between the submitted user metadata spreadsheet and the manifest file fields
spreadsheet_column_mapping = {
Expand Down Expand Up @@ -129,8 +132,52 @@ def get_args():
if Path(args.webinCliPath).exists() is False:
print("> ERROR: Cannot find the Webin CLI jar file. Please set the path to the Webin CLI jar file (--webinCliPath)")
sys.exit()
if not fnmatch.fnmatch(args.webinCliPath, '*.jar'): # check if Webin-CLI jar file already exist
webinCli_file = webinCli_latest_download(args.webinCliPath)
args.webinCliPath = f'{args.webinCliPath}/{webinCli_file}'
return args

def webinCli_latest_download(webinCli_dir):
"""
Checking and retrieving the latest Webin Cli jar file
:param: webinCli_dir: directory path for webin-cli
:return: Latest Webin Cli jar file name
"""
print('checking if webin-cli is the latest release')
# check if Webin-CLI jar file latest release already exist
download_command = 'curl -s https://api.github.com/repos/enasequence/webin-cli/releases/latest | grep "browser_download_url" | head -1 | cut -d : -f 2,3 | tr -d \\"'
sp = subprocess.Popen(download_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
webinCli_file_name = out.decode().split("/")
stdoutOrigin = sys.stdout
webinCli_list = glob.glob(f'{webinCli_dir}/*.jar')
latest_file_name = webinCli_file_name[8].strip()
if len(webinCli_list)!= 0:
for f in webinCli_list:
dir_file_name = os.path.basename(f)
if dir_file_name == latest_file_name: # if the Webin-Cli jar file is up to date, return the file path
print("webin-cli software is up to date")
return dir_file_name
else: # if the Webin-Cli jar file is not up to date, download the latest release
print('downloading the latest release of webin-cli...................................................................')
command = '{} | wget --show-progress -qi - --directory-prefix={}'.format(download_command, webinCli_dir)
sp = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
sys.stderr.write(out.decode())
sys.stderr.write(err.decode())
stdoutOrigin = sys.stdout
return latest_file_name
else: # if the Webin-Cli jar file doesnt exist, download the latest release
print('downloading the latest release of webin-cli...................................................................')
command = '{} | wget --show-progress -qi - --directory-prefix={}'.format(download_command, webinCli_dir)
sp = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
sys.stderr.write(out.decode())
sys.stderr.write(err.decode())
stdoutOrigin = sys.stdout
return latest_file_name



def spreadsheet_format(spreadsheet_file):
"""
Expand Down Expand Up @@ -271,6 +318,8 @@ def file_prep(self):
:return: log_path_err, log_path_out: Directory and file to store error and output
:return: all_error_runs: File which will contain IDs of failed submissions
"""
now = datetime.now()
now_str = now.strftime("%d%m%y-%H%M") # datetime in minutes format
self.manifest_prefix = Path(self.file).stem
# self.manifest_prefix = os.path.splitext(os.path.basename(self.file))[0]

Expand All @@ -283,6 +332,7 @@ def file_prep(self):
print(self.log_path_err, self.log_path_out)

self.all_error_runs = Path(self.args.directory) / "failed_validation.txt"
self.log_path_total = Path(self.args.directory) / "submissions" / f"log_total_{now_str}.txt"

def construct_command(self):
"""
Expand Down Expand Up @@ -341,14 +391,15 @@ def post_process(self, output, error, timestamp):
:param error: The standard error from the run command (.stderr)
:param timestamp: The timestamp of the run command
"""
with open(self.log_path_err, "w") as err_file, open(self.log_path_out, "w") as out_file, open(self.all_error_runs, "a") as all_errors:
with open(self.log_path_err, "w") as err_file, open(self.log_path_out, "w") as out_file, open(self.all_error_runs, "a") as all_errors, open(self.log_path_total, "a") as logs:
if error:
err_file.write(str(error.decode("UTF-8")))
err_file.write("[{}] VALIDATION FAILED - {}\n".format(timestamp, self.file))
all_errors.write("*" * 100 + "\n")
all_errors.write("[{}] {}\n".format(timestamp, self.manifest_prefix))
all_errors.write(str(error.decode("UTF-8")) + "\n")
all_errors.write("*" * 100 + "\n")
logs.write(str(error.decode("UTF-8")))

if output:
if "The submission has been validated successfully." in str(output):
Expand All @@ -363,6 +414,8 @@ def post_process(self, output, error, timestamp):
all_errors.write("[{}] {}\n".format(timestamp, self.manifest_prefix))
all_errors.write(str(output.decode("UTF-8")))
all_errors.write("*" * 100 + "\n")
logs.write(str(output.decode("UTF-8")))



def submit_validate(file, args):
Expand All @@ -379,9 +432,12 @@ def submit_validate(file, args):
webincli_process.post_process(out, err, now) # Post-process - save output accordingly



if __name__ == "__main__":

args = get_args() # Get arguments provided to the tool
to_process = spreadsheet_format(args.spreadsheet) # Create a dataframe of data to be processed (submitted or validated)
to_process = spreadsheet_format(
args.spreadsheet) # Create a dataframe of data to be processed (submitted or validated)

# Generate the manifest files
create_manifests = GenerateManifests(to_process, args.directory, args.geneticContext)
Expand All @@ -394,3 +450,5 @@ def submit_validate(file, args):
else:
for process in processed:
submit_validate(process[0], args)