Skip to content
Draft
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
282fb27
config file
burnout87 Mar 26, 2024
536833d
import
burnout87 Apr 8, 2024
5cbcaba
conf and conf_example
burnout87 Apr 8, 2024
cb6947e
default config
burnout87 Apr 8, 2024
853a62e
default config not from file
burnout87 Apr 8, 2024
96b3c68
string formatting
burnout87 Apr 8, 2024
0121200
max_download_size nbadapter arg for init
burnout87 Apr 8, 2024
8a10a2c
using flaskdynaconf
burnout87 Apr 9, 2024
b885781
Merge branch 'master' into config-file
burnout87 Apr 9, 2024
28019da
passing config to find notebooks
burnout87 Apr 9, 2024
9fe50e9
passing config to find notebooks
burnout87 Apr 9, 2024
de621d8
loggin test_Service
burnout87 Apr 9, 2024
a63cbad
loggin test_Service
burnout87 Apr 9, 2024
ea5f0aa
check download_limit before download
burnout87 Apr 9, 2024
6f77263
check download_limit before download
burnout87 Apr 9, 2024
a02325f
using request lib
burnout87 Apr 10, 2024
145be50
multiple attempts request infos file url
burnout87 Apr 10, 2024
ada5c00
code optimized
burnout87 Apr 10, 2024
8e9491d
code optimized
burnout87 Apr 10, 2024
60b7842
code optimized
burnout87 Apr 10, 2024
70f6146
code optimized
burnout87 Apr 10, 2024
a0da3f4
test adapted
burnout87 Apr 10, 2024
420191b
providing config arg to NotebookAdapter init
burnout87 Apr 10, 2024
f91058a
adapted test
burnout87 Apr 10, 2024
769c795
missing import
burnout87 Apr 17, 2024
bae3e9e
passing config to NotebookAdapter
burnout87 Apr 17, 2024
c648563
no need to pass arg to download_file
burnout87 Apr 17, 2024
402ce9a
no need for conf_dir
burnout87 Apr 17, 2024
8932525
init NotebookAdapter
burnout87 Apr 17, 2024
4cf68f2
sentry_url in the config
burnout87 Apr 17, 2024
ea6be31
Merge branch 'master' into config-file
burnout87 Apr 17, 2024
7da5dd2
default config value
burnout87 Apr 17, 2024
9044564
ontology_path config
burnout87 Apr 17, 2024
7adb8d4
better defaults
burnout87 Apr 17, 2024
fe31a80
better defaults for deploy
burnout87 Apr 17, 2024
f2be97f
better way to extract default ontology path
burnout87 Apr 17, 2024
8cf862b
host and port in the config file
burnout87 Apr 17, 2024
068fa70
config_ontology_path
burnout87 Apr 19, 2024
1615489
Merge branch 'master' into config-file
dsavchenko Apr 23, 2024
ecdceba
removed unused import
burnout87 Apr 23, 2024
870f3ef
Merge branch 'config-file' of github.com:oda-hub/nb2workflow into con…
burnout87 Apr 23, 2024
e454b75
arg settings path
burnout87 Apr 23, 2024
1224d4f
config args
burnout87 Apr 24, 2024
e9f0d72
better way to refer to the property
burnout87 Apr 26, 2024
a97ec5c
settings path for deploy
burnout87 Apr 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nb2workflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pkg_resources
import os

name = "nb2workflow"

Expand Down
33 changes: 23 additions & 10 deletions nb2workflow/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import rdflib
from oda_api.ontology_helper import Ontology
from nb2workflow.nbadapter import NotebookAdapter
from dynaconf import Dynaconf

logger = logging.getLogger(__name__)

Expand All @@ -30,7 +31,9 @@
"filename_pattern": '.*',
}

default_ontology_path = "https://odahub.io/ontology/ontology.ttl"
local_config = Dynaconf(settings_files=['settings.toml'])
config_ontology_path = local_config.get('default.service.ontology_path', 'http://odahub.io/ontology/ontology.ttl')


default_python_version = '3.10'

Expand Down Expand Up @@ -69,8 +72,9 @@ def build_container(git_origin,
engine="docker",
cleanup=False,
nb2wversion=version(),
ontology_path=default_ontology_path,
ontology_path=config_ontology_path,
**kwargs):

if engine == "docker":
return _build_with_docker(git_origin=git_origin,
local=local,
Expand Down Expand Up @@ -200,7 +204,10 @@ def _build_with_kaniko(git_origin,
namespace="oda-staging",
cleanup=True,
nb2wversion=version(),
ontology_path=default_ontology_path):
ontology_path=None):

if ontology_path is None:
ontology_path = local_config.get('default.service.ontology_path', config_ontology_path)

#secret should be created beforehand https://github.com/GoogleContainerTools/kaniko#pushing-to-docker-hub

Expand Down Expand Up @@ -316,7 +323,8 @@ def _build_with_kaniko(git_origin,
return container_metadata


def _extract_resource_requirements(local_repo_path, ontology_path=default_ontology_path):
def _extract_resource_requirements(local_repo_path, ontology_path=config_ontology_path):

ontology = Ontology(ontology_path)
resources = {}

Expand Down Expand Up @@ -345,7 +353,8 @@ def _build_with_docker(git_origin,
source_from='localdir',
cleanup=False,
nb2wversion=version(),
ontology_path=default_ontology_path):
ontology_path=config_ontology_path):

if cleanup:
logger.warning('Post-build cleanup is not implemented for docker builds')

Expand Down Expand Up @@ -575,8 +584,8 @@ def deploy(git_origin,
build_timestamp=False,
cleanup=False,
nb2wversion=version(),
ontology_path=default_ontology_path):
ontology_path=config_ontology_path):

container = build_container(git_origin,
local=local,
run_tests=run_tests,
Expand Down Expand Up @@ -617,10 +626,14 @@ def main():
parser.add_argument('--local', action="store_true", default=False)
parser.add_argument('--build-engine', metavar="build_engine", default="docker")
parser.add_argument('--nb2wversion', metavar="nb2wversion", default=version())
parser.add_argument('--ontology-path', metavar="ontology_path", default=default_ontology_path)
parser.add_argument('--ontology-path', metavar="ontology_path")

args = parser.parse_args()

deploy_ontology_path = args.ontology_path
if deploy_ontology_path is None:
deploy_ontology_path = config_ontology_path

setup_logging()

deploy(args.repository,
Expand All @@ -629,7 +642,7 @@ def main():
local=args.local,
build_engine=args.build_engine,
nb2wversion=args.nb2wversion,
ontology_path=args.ontology_path)
ontology_path=deploy_ontology_path)


if __name__ == "__main__":
Expand Down
9 changes: 5 additions & 4 deletions nb2workflow/galaxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import nbformat
from nbconvert.exporters import ScriptExporter
from dynaconf import Dynaconf

from ensureconda.api import ensureconda
import subprocess as sp
Expand All @@ -29,8 +30,8 @@

logger = logging.getLogger()

default_ontology_path = 'http://odahub.io/ontology/ontology.ttl'
local_config = Dynaconf(settings_files=['settings.toml'])
config_ontology_path = local_config.get('default.service.ontology_path', 'http://odahub.io/ontology/ontology.ttl')

global_req = []

Expand Down Expand Up @@ -539,7 +540,7 @@ def to_galaxy(input_path,
citations_bibfile = None,
help_file = None,
available_channels = ['default', 'conda-forge'],
ontology_path = default_ontology_path,
ontology_path = config_ontology_path,
test_data_baseurl = None
):

Expand Down Expand Up @@ -690,7 +691,7 @@ def main():
tool_version = args.tool_version
ontology_path = args.ontology_path
if ontology_path is None:
ontology_path = default_ontology_path
ontology_path = config_ontology_path
bibfile = args.citations_bibfile
help_file = args.help_file
test_data_baseurl = args.test_data_baseurl
Expand Down
98 changes: 70 additions & 28 deletions nb2workflow/nbadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
import nbformat
from nbconvert import HTMLExporter
from urllib.parse import urlencode, urlparse
from urllib import request
from dynaconf import Dynaconf

from . import logstash

from nb2workflow.sentry import sentry
from nb2workflow.health import current_health
from nb2workflow import workflows
Expand Down Expand Up @@ -249,14 +250,20 @@ class NotebookAdapter:
limit_output_attachment_file = None


def __init__(self, notebook_fn, tempdir_cache=None, n_download_max_tries=10, download_retry_sleep=.5):
def __init__(self, notebook_fn, tempdir_cache=None, config=None):
self.notebook_fn = os.path.abspath(notebook_fn)
self.name = notebook_short_name(notebook_fn)
self.tempdir_cache = tempdir_cache
logger.debug("notebook adapter for %s", self.notebook_fn)
logger.debug(self.extract_parameters())
self.n_download_max_tries = n_download_max_tries
self.download_retry_sleep_s = download_retry_sleep

if config is None:
config = dict()

self.n_download_max_tries = config.get('SERVICE.N_DOWNLOAD_MAX_TRIES', 10)
self.download_retry_sleep_s = config.get('SERVICE.DOWNLOAD_RETRY_SLEEP', .5)
self.max_download_size = config.get('SERVICE.MAX_DOWNLOAD_SIZE', 1e6)
sentry.sentry_url = config.get('SERVICE.SENTRY_URL', None)

@staticmethod
def get_unique_filename_from_url(file_url):
Expand Down Expand Up @@ -567,26 +574,51 @@ def extract_output(self):

def download_file(self, file_url, tmpdir):
n_download_tries_left = self.n_download_max_tries
size_ok = False
file_downloaded = False
file_name = NotebookAdapter.get_unique_filename_from_url(file_url)
while True:
file_path = os.path.join(tmpdir, file_name)
for _ in range(n_download_tries_left):
step = 'getting the file size'
if not size_ok:
response = requests.head(file_url)
if response.status_code == 200:
file_size = int(response.headers.get('Content-Length', 0))
if file_size > self.max_download_size:
msg = ("The file appears to be too large to download, "
f"and the download limit is set to {self.max_download_size} bytes.")
logger.warning(msg)
sentry.capture_message(msg)
raise Exception(msg)
else:
logger.warning(
(f"An issue occurred when attempting to {step} of the file at the url {file_url}. "
f"Sleeping {self.download_retry_sleep_s} seconds until retry")
)
time.sleep(self.download_retry_sleep_s)
continue
size_ok = True
step = 'downloading file'
response = requests.get(file_url)
if response.status_code == 200:
with open(os.path.join(tmpdir, file_name), 'wb') as file:
with open(file_path, 'wb') as file:
file.write(response.content)
file_downloaded = True
break
else:
n_download_tries_left -= 1
if n_download_tries_left > 0:
logger.warning(
f"An issue occurred when attempting to download the file at the url {file_url}, "
f"sleeping {self.download_retry_sleep_s} seconds until retry")
time.sleep(self.download_retry_sleep_s)
else:
msg = (f"An issue occurred when attempting to download the url {file_url}, "
"this might be related to an invalid url, please check the input provided")
logger.warning(msg)
sentry.capture_message(msg)
raise Exception(msg)
logger.warning(
(f"An issue occurred when attempting to {step} the file at the url {file_url}. "
f"Sleeping {self.download_retry_sleep_s} seconds until retry")
)
time.sleep(self.download_retry_sleep_s)
continue

if not (file_downloaded and size_ok):
msg = (f"An issue occurred when attempting to {step} at the url {file_url}. "
"This might be related to an invalid url, please check the input provided")
logger.warning(msg)
sentry.capture_message(msg)
raise Exception(msg)

return file_name

Expand Down Expand Up @@ -712,13 +744,12 @@ def remove_tmpdir(self):
def notebook_short_name(ipynb_fn):
return os.path.basename(ipynb_fn).replace(".ipynb","")

def find_notebooks(source, tests=False, pattern = r'.*') -> Dict[str, NotebookAdapter]:
def find_notebooks(source, tests=False, pattern = r'.*', config=None) -> Dict[str, NotebookAdapter]:

def base_filter(fn):
good = "output" not in fn and "preproc" not in fn
good = good and re.match(pattern, os.path.basename(fn))
return good


if tests:
filt = lambda fn: base_filter(fn) and "/test_" in fn
Expand All @@ -734,23 +765,26 @@ def base_filter(fn):
raise Exception("no notebooks found in the directory:",source)

notebook_adapters=dict([
(notebook_short_name(notebook),NotebookAdapter(notebook)) for notebook in notebooks
(
notebook_short_name(notebook),
NotebookAdapter(notebook, config=config)
) for notebook in notebooks
])
logger.debug("notebook adapters: %s",notebook_adapters)


elif os.path.isfile(source):
if pattern != r'.*':
logger.warning('Filename pattern is set but source %s is a single file. Ignoring pattern.')
notebook_adapters={notebook_short_name(source): NotebookAdapter(source)}
notebook_adapters={notebook_short_name(source): NotebookAdapter(source, config=config)}

else:
raise Exception("requested notebook not found:",source)

return notebook_adapters

def nbinspect(nb_source, out=True, machine_readable=False):
nbas = find_notebooks(nb_source)
def nbinspect(nb_source, out=True, machine_readable=False, config=None):
nbas = find_notebooks(nb_source, config=config)

# class CustomEncoder(json.JSONEncoder):
# def default(self, obj):
Expand Down Expand Up @@ -885,9 +919,9 @@ def json():



def nbrun(nb_source, inp, inplace=False, optional_dispather=True, machine_readable=False):
def nbrun(nb_source, inp, inplace=False, optional_dispather=True, machine_readable=False, config=None):

nbas = find_notebooks(nb_source)
nbas = find_notebooks(nb_source, config=config)

if len(nbas) > 1:
nba = nbas[inp.pop('notebook')]
Expand Down Expand Up @@ -1011,7 +1045,9 @@ def main_inspect():

setup_logging(args.debug)

nbinspect(args.notebook, machine_readable=args.machine_readable)
config = Dynaconf(settings_files=['settings.toml'])

nbinspect(args.notebook, machine_readable=args.machine_readable, config=config)


def main():
Expand All @@ -1034,7 +1070,13 @@ def main():

setup_logging(args.debug)

nbrun(args.notebook, inputs, inplace=args.inplace, optional_dispather=not args.mmoda_validation, machine_readable=args.machine_readable)
config = Dynaconf(settings_files=['settings.toml'])

nbrun(args.notebook, inputs,
inplace=args.inplace,
optional_dispather=not args.mmoda_validation,
machine_readable=args.machine_readable,
config=config)


if __name__ == "__main__":
Expand Down
4 changes: 4 additions & 0 deletions nb2workflow/sentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def sentry_url(self):
self._sentry_url = os.environ.get('SENTRY_URL', "https://[email protected]/4506186624335872")
return self._sentry_url

@sentry_url.setter
def sentry_url(self, url):
self._sentry_url = url

@property
def have_sentry(self):
if self.sentry_url is None or self.sentry_url == '' or sentry_sdk is None:
Expand Down
Loading