From 6207fd8faa14c94da9d390f579aa973f2e537b84 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 12 Dec 2023 10:54:03 -0500 Subject: [PATCH 1/6] For development -- bind mount and deploy local catalog and metalad note: was rebased and I visually copied needed items, hopefully I did not miss any. I did not copy git describe since I do not see need in it now. --- docker-compose.dev.override.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-compose.dev.override.yml b/docker-compose.dev.override.yml index 327ee159..f6778647 100644 --- a/docker-compose.dev.override.yml +++ b/docker-compose.dev.override.yml @@ -11,10 +11,12 @@ services: command: [ "/sbin/my_init", "--", "bash", "-c", - "git config --global --add safe.directory /app && pip3 install -U -e . && flask init-db && exec flask run --host=0.0.0.0 --debug" + "git config --global --add safe.directory /app && pip3 install -U -e . && pip install -e /metalad && pip install -e /catalog && flask init-db && exec flask run --host=0.0.0.0 --debug" ] volumes: - ./:/app + - ../datalad-catalog:/catalog + - ../datalad-metalad:/metalad - ./instance:/app/instance worker: From ecb77a46f34220bcea37b444446ae31cb5098cad Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 12 Dec 2023 10:58:15 -0500 Subject: [PATCH 2/6] Initial steps to interface/show catalog. note: there was conflict during rebase in overview.html - I copied code block in html for adding that "C" link out. --- datalad_registry/overview.py | 35 ++++++++++++++++++++++++ datalad_registry/templates/overview.html | 5 +++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/datalad_registry/overview.py b/datalad_registry/overview.py index f8c5915f..74a56462 100644 --- a/datalad_registry/overview.py +++ b/datalad_registry/overview.py @@ -69,3 +69,38 @@ def overview(): # No type hints due to mypy#7187. search_query=query, search_error=search_error, ) + +import json +from flask import send_from_directory +import datalad.api as dl + + +# @bp.route('/catalog/', defaults={'path': ''}) +# TODO: move from placing dataset identifier within path -- place into query +# TODO: do not use ID may be but use URL, or allow for both -- that would make it possible to make those URLs +# pointing to datasets easier to create/digest for humans +@bp.route('/catalog//') +def send_report(id_, path): + # ds_id = request.args.get("id", None, type=int) + if not path: + path = "index.html" + if path == "index.html": + lgr.warning(f"PATH: {path} id: {id_}") + # let's get metadata for the ds_id + repo_url_row = db.session.execute( + db.select(RepoUrl).filter_by(id=id_) + ).one_or_none() + if repo_url_row: + repo_url_row = repo_url_row[0] + metadatas = {} + for mr in repo_url_row.metadata_: + m = mr.extracted_metadata + m['type'] = 'dataset' + m['dataset_id'] = repo_url_row.ds_id + # Didn't want to translate yet + # metadatas[mr.extractor_name] = dl.catalog_translate(m) + # TEMP: get it without translation + metadatas[mr.extractor_name] = m + lgr.warning(f"ROW: {metadatas}") + # TODO: figure out how to pass all the metadata goodness to the catalog + return send_from_directory('/app-catalog', path) diff --git a/datalad_registry/templates/overview.html b/datalad_registry/templates/overview.html index d2287ce8..92163db3 100644 --- a/datalad_registry/templates/overview.html +++ b/datalad_registry/templates/overview.html @@ -150,7 +150,10 @@

Search query syntax

{%- for i in pagination -%} - {{ i.url }} + + {{ i.url }} + C + {% if i.ds_id is not none %} {{ i.ds_id }} From e001a979d5327c038619e03a2ce6a1dc221af0d5 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 12 Dec 2023 11:10:38 -0500 Subject: [PATCH 3/6] Limit catalog metadata to only 3 extractors, do try to translate --- datalad_registry/overview.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datalad_registry/overview.py b/datalad_registry/overview.py index 74a56462..5fc7ec83 100644 --- a/datalad_registry/overview.py +++ b/datalad_registry/overview.py @@ -94,13 +94,14 @@ def send_report(id_, path): repo_url_row = repo_url_row[0] metadatas = {} for mr in repo_url_row.metadata_: + if mr.extractor_name not in {'metalad_core', 'bids_dataset', 'metalad_studyminimeta'}: + continue m = mr.extracted_metadata m['type'] = 'dataset' m['dataset_id'] = repo_url_row.ds_id # Didn't want to translate yet - # metadatas[mr.extractor_name] = dl.catalog_translate(m) - # TEMP: get it without translation - metadatas[mr.extractor_name] = m + metadatas[mr.extractor_name] = dl.catalog_translate(m) + # metadatas[mr.extractor_name] = m lgr.warning(f"ROW: {metadatas}") # TODO: figure out how to pass all the metadata goodness to the catalog return send_from_directory('/app-catalog', path) From 8392b666bddedde64955f2bbb0da95765fbab067 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 12 Dec 2023 11:16:04 -0500 Subject: [PATCH 4/6] more debug / notes --- datalad_registry/overview.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datalad_registry/overview.py b/datalad_registry/overview.py index 5fc7ec83..f8c4d8cf 100644 --- a/datalad_registry/overview.py +++ b/datalad_registry/overview.py @@ -96,10 +96,13 @@ def send_report(id_, path): for mr in repo_url_row.metadata_: if mr.extractor_name not in {'metalad_core', 'bids_dataset', 'metalad_studyminimeta'}: continue + # TODO: here metadta record had only @context and @graph and no other fields + # figure out if enough.... m = mr.extracted_metadata m['type'] = 'dataset' m['dataset_id'] = repo_url_row.ds_id # Didn't want to translate yet + lgr.warning(f"Translating record with keys {m.keys()}") metadatas[mr.extractor_name] = dl.catalog_translate(m) # metadatas[mr.extractor_name] = m lgr.warning(f"ROW: {metadatas}") From 0e6a808fb58db2c717c4d33f35c8f10f22129eed Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 17 Jan 2024 16:20:04 -0500 Subject: [PATCH 5/6] add isort config from datalad-container --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 8d3753bb..e3e798d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,12 @@ requires = ["setuptools >= 46.4.0", "versioningit ~= 3.0", "wheel ~= 0.32"] build-backend = "setuptools.build_meta" +[tool.isort] +force_grid_wrap = 2 +include_trailing_comma = true +multi_line_output = 3 +combine_as_imports = true + [tool.versioningit] [tool.pytest.ini_options] From 7dd5b76023cf2fb54e60c827684dcaf2c78fd94b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 17 Jan 2024 16:21:31 -0500 Subject: [PATCH 6/6] Found uncomitted: Adding catalog config we need to tune etc note: was conflict in docker-compose.dev.local.override.yml -- copied invocation of datalad-catalog create --- datalad_registry/overview.py | 64 ++++++++++----- .../resources/catalog-config.json | 80 +++++++++++++++++++ docker-compose.dev.override.yml | 2 +- 3 files changed, 125 insertions(+), 21 deletions(-) create mode 100644 datalad_registry/resources/catalog-config.json diff --git a/datalad_registry/overview.py b/datalad_registry/overview.py index f8c4d8cf..c8d455b7 100644 --- a/datalad_registry/overview.py +++ b/datalad_registry/overview.py @@ -3,10 +3,24 @@ import logging -from flask import Blueprint, render_template, request -from sqlalchemy import nullslast, select +import datalad.api as dl +from flask import ( + Blueprint, + render_template, + request, + send_from_directory, +) +from sqlalchemy import ( + nullslast, + select, +) -from datalad_registry.models import RepoUrl, db +from datalad_registry.blueprints.api.url_metadata import URLMetadataModel +from datalad_registry.models import ( + RepoUrl, + URLMetadata, + db, +) from datalad_registry.search import parse_query lgr = logging.getLogger(__name__) @@ -70,16 +84,12 @@ def overview(): # No type hints due to mypy#7187. search_error=search_error, ) -import json -from flask import send_from_directory -import datalad.api as dl - # @bp.route('/catalog/', defaults={'path': ''}) # TODO: move from placing dataset identifier within path -- place into query -# TODO: do not use ID may be but use URL, or allow for both -- that would make it possible to make those URLs -# pointing to datasets easier to create/digest for humans -@bp.route('/catalog//') +# TODO: do not use ID may be but use URL, or allow for both -- that would make it +# possible to make those URLs pointing to datasets easier to create/digest for humans +@bp.route("/catalog//") def send_report(id_, path): # ds_id = request.args.get("id", None, type=int) if not path: @@ -94,17 +104,31 @@ def send_report(id_, path): repo_url_row = repo_url_row[0] metadatas = {} for mr in repo_url_row.metadata_: - if mr.extractor_name not in {'metalad_core', 'bids_dataset', 'metalad_studyminimeta'}: + if mr.extractor_name not in { + "metalad_core", + "bids_dataset", + "metalad_studyminimeta", + }: continue - # TODO: here metadta record had only @context and @graph and no other fields - # figure out if enough.... - m = mr.extracted_metadata - m['type'] = 'dataset' - m['dataset_id'] = repo_url_row.ds_id + # TODO: here metadta record had only @context and @graph and no other + # fields figure out if enough.... + m = URLMetadataModel.from_orm(mr).dict() + # lgr.warning(f"ROW: {m}") + m["type"] = "dataset" + m["dataset_id"] = repo_url_row.ds_id # Didn't want to translate yet lgr.warning(f"Translating record with keys {m.keys()}") - metadatas[mr.extractor_name] = dl.catalog_translate(m) - # metadatas[mr.extractor_name] = m - lgr.warning(f"ROW: {metadatas}") + m_translated = dl.catalog_translate(m)[0]["translated_metadata"] + metadatas[mr.extractor_name] = m_translated + + if "metalad_studyminimeta" not in metadatas: + metadatas["metalad_core"]["name"] = repo_url_row.url + + for m in metadatas.values(): + m["name"] = repo_url_row.url + lgr.warning(f"URL: {repo_url_row.url!r} {type(repo_url_row.url)}") + dl.catalog_add("/app-catalog", metadata=m) # TODO: figure out how to pass all the metadata goodness to the catalog - return send_from_directory('/app-catalog', path) + # f'/app-catalog/dataset/{repo_url_row.ds_id}/' + # f'{metadatas['metalad_core']['dataset_version']}' + return send_from_directory("/app-catalog", path) diff --git a/datalad_registry/resources/catalog-config.json b/datalad_registry/resources/catalog-config.json new file mode 100644 index 00000000..59892a79 --- /dev/null +++ b/datalad_registry/resources/catalog-config.json @@ -0,0 +1,80 @@ +{ + "catalog_name": "DataCat of Registry", + "logo_path": "", + "link_color": "#fba304", + "link_hover_color": "#af7714", + "social_links": { + "about": null, + "documentation": "https://docs.datalad.org/projects/catalog/en/latest/", + "github": "https://github.com/datalad/datalad-catalog", + "mastodon": "https://fosstodon.org/@datalad", + "x": "https://x.com/datalad" + }, + "dataset_options": { + "include_metadata_export": true + }, + "property_sources": { + "dataset": { + "dataset_id": { + "rule": "single", + "source": "metalad_core" + }, + "dataset_version": { + "rule": "single", + "source": "metalad_core" + }, + "type": { + "rule": "single", + "source": "metalad_core" + }, + "children": { + "rule": "merge", + "source": "any" + }, + "short_name": {}, + "description": { + "rule": "priority", + "source": [ + "catalog_readme", + "metalad_studyminimeta", + "datacite_gin", + "bids_dataset" + ] + }, + "doi": {}, + "url": { + "rule": "merge", + "source": "any" + }, + "authors": { + "rule": "merge", + "source": "any" + }, + "keywords": { + "rule": "merge", + "source": "any" + }, + "license": {}, + "funding": { + "rule": "merge", + "source": "any" + }, + "publications": { + "rule": "merge", + "source": "any" + }, + "subdatasets": { + "rule": "merge", + "source": "any" + }, + "additional_display": { + "rule": "merge", + "source": "any" + }, + "top_display": { + "rule": "merge", + "source": "any" + } + } + } +} diff --git a/docker-compose.dev.override.yml b/docker-compose.dev.override.yml index f6778647..0a781a46 100644 --- a/docker-compose.dev.override.yml +++ b/docker-compose.dev.override.yml @@ -11,7 +11,7 @@ services: command: [ "/sbin/my_init", "--", "bash", "-c", - "git config --global --add safe.directory /app && pip3 install -U -e . && pip install -e /metalad && pip install -e /catalog && flask init-db && exec flask run --host=0.0.0.0 --debug" + "git config --global --add safe.directory /app && pip3 install -U -e . && pip install -e /metalad && pip install -e /catalog && flask init-db && datalad catalog-create -c /app-catalog -F /app/datalad_registry/resources/catalog-config.json && exec flask run --host=0.0.0.0 --debug" ] volumes: - ./:/app