diff --git a/datalad_registry/overview.py b/datalad_registry/overview.py
index 7aadcc39..c45f3108 100644
--- a/datalad_registry/overview.py
+++ b/datalad_registry/overview.py
@@ -14,19 +14,39 @@
lgr = logging.getLogger(__name__)
bp = Blueprint("overview", __name__, url_prefix="/overview")
-_SORT_ATTRS = {
- "keys-asc": ("annex_key_count", "asc"),
- "keys-desc": ("annex_key_count", "desc"),
- "update-asc": ("last_update_dt", "asc"),
- "update-desc": ("last_update_dt", "desc"),
- "url-asc": ("url", "asc"),
- "url-desc": ("url", "desc"),
- "annexed_files_in_wt_count-asc": ("annexed_files_in_wt_count", "asc"),
- "annexed_files_in_wt_count-desc": ("annexed_files_in_wt_count", "desc"),
- "annexed_files_in_wt_size-asc": ("annexed_files_in_wt_size", "asc"),
- "annexed_files_in_wt_size-desc": ("annexed_files_in_wt_size", "desc"),
- "git_objects_kb-asc": ("git_objects_kb", "asc"),
- "git_objects_kb-desc": ("git_objects_kb", "desc"),
+# _SORT_ATTRS removed - now using _AVAILABLE_COLUMNS with db_field mapping
+
+# Available columns with their metadata
+_AVAILABLE_COLUMNS = {
+ "url": {"label": "URL", "sortable": True, "db_field": "url"},
+ "dataset": {"label": "Dataset", "sortable": False},
+ "commit": {"label": "Commit", "sortable": False},
+ "head_dt": {"label": "Last commit date", "sortable": True, "db_field": "head_dt"},
+ "keys": {
+ "label": "Annex keys",
+ "sortable": True,
+ "tooltip": "Number of annex keys",
+ "db_field": "annex_key_count",
+ },
+ "annexed_files_count": {
+ "label": "Nr of Annexed files",
+ "sortable": True,
+ "tooltip": "Number of annexed files in working tree",
+ "db_field": "annexed_files_in_wt_count",
+ },
+ "annexed_files_size": {
+ "label": "Size of Annexed files",
+ "sortable": True,
+ "tooltip": "Size of annexed files in working tree",
+ "db_field": "annexed_files_in_wt_size",
+ },
+ "update": {"label": "Last update", "sortable": True, "db_field": "last_update_dt"},
+ "git_objects": {
+ "label": "Size of .git/objects",
+ "sortable": True,
+ "db_field": "git_objects_kb",
+ },
+ "metadata": {"label": "Metadata", "sortable": False},
}
@@ -36,7 +56,8 @@
@bp.get("/")
def overview(): # No type hints due to mypy#7187.
- default_sort_scheme = "update-desc"
+ default_sort_column = "update"
+ default_sort_direction = "desc"
base_select_stmt = select(RepoUrl)
@@ -54,12 +75,34 @@ def overview(): # No type hints due to mypy#7187.
else:
base_select_stmt = base_select_stmt.filter(criteria)
- # Decipher sorting scheme
- sort_by = request.args.get("sort", default_sort_scheme, type=str)
- if sort_by not in _SORT_ATTRS:
- lgr.debug("Ignoring unknown sort parameter: %s", sort_by)
- sort_by = default_sort_scheme
- col, sort_method = _SORT_ATTRS[sort_by]
+ # Handle configurable columns
+ columns_param = request.args.get("columns", None, type=str)
+ if columns_param:
+ # Parse comma-separated column names
+ requested_columns = [col.strip() for col in columns_param.split(",")]
+ # Filter out invalid column names
+ visible_columns = [
+ col for col in requested_columns if col in _AVAILABLE_COLUMNS
+ ]
+ if not visible_columns:
+ visible_columns = list(_AVAILABLE_COLUMNS)
+ else:
+ visible_columns = list(_AVAILABLE_COLUMNS)
+
+ # Handle sorting using new system
+ sort_by_param = request.args.get("sort_by", default_sort_column, type=str)
+ sort_direction = request.args.get("sort", default_sort_direction, type=str)
+ # Validate sort_by parameter
+ if sort_by_param in _AVAILABLE_COLUMNS and _AVAILABLE_COLUMNS[sort_by_param].get(
+ "sortable"
+ ):
+ col = _AVAILABLE_COLUMNS[sort_by_param]["db_field"]
+ sort_method = sort_direction if sort_direction in ["asc", "desc"] else "asc"
+ else:
+ lgr.debug("Ignoring unknown sort_by parameter: %s", sort_by_param)
+ sort_by_param = default_sort_column
+ col = _AVAILABLE_COLUMNS[default_sort_column]["db_field"]
+ sort_method = default_sort_direction
# Apply sorting
select_stmt = base_select_stmt.order_by(
@@ -76,7 +119,11 @@ def overview(): # No type hints due to mypy#7187.
"overview.html",
pagination=pagination,
stats=stats,
- sort_by=sort_by,
+ sort_by_param=sort_by_param,
+ sort_direction=sort_method, # Use the validated sort_method
search_query=query,
search_error=search_error,
+ visible_columns=visible_columns,
+ available_columns=_AVAILABLE_COLUMNS,
+ columns_param=columns_param,
)
diff --git a/datalad_registry/tasks/__init__.py b/datalad_registry/tasks/__init__.py
index 56d52dae..c7db27d4 100644
--- a/datalad_registry/tasks/__init__.py
+++ b/datalad_registry/tasks/__init__.py
@@ -23,6 +23,7 @@
from datalad_registry.utils import StrEnum
from datalad_registry.utils.datalad_tls import (
clone,
+ get_head_commit_date,
get_head_describe,
get_origin_annex_key_count,
get_origin_annex_uuid,
@@ -100,6 +101,7 @@ def _update_dataset_url_info(dataset_url: RepoUrl, ds: Dataset) -> None:
dataset_url.head = ds.repo.get_hexsha("origin/HEAD")
dataset_url.head_describe = get_head_describe(ds)
+ dataset_url.head_dt = get_head_commit_date(ds)
dataset_url.branches = get_origin_branches(ds)
diff --git a/datalad_registry/templates/overview.html b/datalad_registry/templates/overview.html
index 84905c80..43a25bf7 100644
--- a/datalad_registry/templates/overview.html
+++ b/datalad_registry/templates/overview.html
@@ -1,10 +1,10 @@
-{%- macro sort_href(default, other) -%}
- {%- if sort_by == default -%}
- {%- set new_sort = other -%}
+{%- macro sort_href(column_name, current_sort_by, current_direction) -%}
+ {%- if current_sort_by == column_name -%}
+ {%- set new_direction = 'asc' if current_direction == 'desc' else 'desc' -%}
{%- else -%}
- {%- set new_sort = default -%}
+ {%- set new_direction = 'desc' -%}
{%- endif -%}
- href="{{ url_for('.overview', query=search_query, sort='{}'.format(new_sort)) }}"
+ href="{{ url_for('.overview', query=search_query, sort_by=column_name, sort=new_direction, columns=columns_param) }}"
{%- endmacro -%}
{% macro render_pagination_widget(pagination, endpoint) %}
@@ -16,7 +16,7 @@
{% for page in pagination.iter_pages() %}
{% if page %}
{% if page != pagination.page %}
- {{ page }}
+ {{ page }}
{% else %}
{{ page }}
{% endif %}
@@ -60,7 +60,11 @@
{%- endif -%}
placeholder='Search query'
/>
-
+
+
+ {% if columns_param %}
+
+ {% endif %}
{% if search_error -%}
ERROR: {{ search_error }}
@@ -69,7 +73,11 @@
{% if search_query -%}
{%- endif %}
@@ -132,58 +140,53 @@ Search query syntax
- | URL |
- Dataset |
- Commit |
- Annex keys |
-
-
- Nr of Annexed files
-
- Number of annexed files in working tree
- |
-
-
- Size of Annexed files
-
- Size of annexed files in working tree
- |
-
- Last update
- |
-
-
- Size of .git/objects
-
- |
-
- Metadata
- |
+ {%- for column in visible_columns -%}
+
+ {%- if available_columns[column]['sortable'] -%}
+ {{ available_columns[column]['label'] }}
+ {%- else -%}
+ {{ available_columns[column]['label'] }}
+ {%- endif -%}
+ {%- if available_columns[column].get('tooltip') -%}
+ {{ available_columns[column]['tooltip'] }}
+ {%- endif -%}
+ |
+ {%- endfor -%}
{%- for i in pagination -%}
- | {{ i.url }} |
-
- {% if i.ds_id is not none %}
- {{ i.ds_id }}
- {% endif %}
- |
-
- {{ i.head_describe if i.head_describe is not none }}
- |
- {{ i.annex_key_count|intcomma if i.annex_key_count is not none }} |
- {{ i.annexed_files_in_wt_count|intcomma if i.annexed_files_in_wt_count is not none }} |
- {{ i.annexed_files_in_wt_size|filesizeformat if i.annexed_files_in_wt_size is not none }} |
- {{ i.last_update_dt.strftime("%Y-%m-%dT%H:%M:%S%z") if i.last_update_dt is not none }} |
- {{ (i.git_objects_kb * 1024)|filesizeformat if i.git_objects_kb is not none }} |
-
- {%- for data in i.metadata_ -%}
-
- {{ data.extractor_name }}
-
- {{ ", " if not loop.last else "" }}
- {%- endfor -%}
- |
+ {%- for column in visible_columns -%}
+
+ {%- if column == 'url' -%}
+ {{ i.url }}
+ {%- elif column == 'dataset' -%}
+ {% if i.ds_id is not none %}
+ {{ i.ds_id }}
+ {% endif %}
+ {%- elif column == 'commit' -%}
+ {{ i.head_describe if i.head_describe is not none }}
+ {%- elif column == 'head_dt' -%}
+ {{ i.head_dt.strftime("%Y-%m-%dT%H:%M:%S%z") if i.head_dt is not none }}
+ {%- elif column == 'keys' -%}
+ {{ i.annex_key_count|intcomma if i.annex_key_count is not none }}
+ {%- elif column == 'annexed_files_count' -%}
+ {{ i.annexed_files_in_wt_count|intcomma if i.annexed_files_in_wt_count is not none }}
+ {%- elif column == 'annexed_files_size' -%}
+ {{ i.annexed_files_in_wt_size|filesizeformat if i.annexed_files_in_wt_size is not none }}
+ {%- elif column == 'update' -%}
+ {{ i.last_update_dt.strftime("%Y-%m-%dT%H:%M:%S%z") if i.last_update_dt is not none }}
+ {%- elif column == 'git_objects' -%}
+ {{ (i.git_objects_kb * 1024)|filesizeformat if i.git_objects_kb is not none }}
+ {%- elif column == 'metadata' -%}
+ {%- for data in i.metadata_ -%}
+
+ {{ data.extractor_name }}
+
+ {{ ", " if not loop.last else "" }}
+ {%- endfor -%}
+ {%- endif -%}
+ |
+ {%- endfor -%}
{%- endfor -%}
diff --git a/datalad_registry/tests/test_overview.py b/datalad_registry/tests/test_overview.py
index 2763f051..ad9680de 100644
--- a/datalad_registry/tests/test_overview.py
+++ b/datalad_registry/tests/test_overview.py
@@ -127,6 +127,25 @@ class TestOverView:
"https://www.dandiarchive.org",
],
),
+ # Test new head_dt sorting
+ (
+ "head_dt-asc",
+ [
+ "https://www.example.com", # null values come first
+ "http://www.datalad.org", # null values come first
+ "https://handbook.datalad.org", # null values come first
+ "https://www.dandiarchive.org", # null values come first
+ ],
+ ),
+ (
+ "head_dt-desc",
+ [
+ "https://www.example.com", # null values come first
+ "http://www.datalad.org", # null values come first
+ "https://handbook.datalad.org", # null values come first
+ "https://www.dandiarchive.org", # null values come first
+ ],
+ ),
],
)
def test_sorting(
@@ -135,7 +154,35 @@ def test_sorting(
"""
Test for the sorting of dataset URLs in the overview page
"""
- resp = flask_client.get("/overview/", query_string={"sort": sort_by})
+ query_string = {}
+ if sort_by:
+ # Convert old format to new format
+ if sort_by.endswith("-asc"):
+ column = sort_by[:-4]
+ direction = "asc"
+ elif sort_by.endswith("-desc"):
+ column = sort_by[:-5]
+ direction = "desc"
+ else:
+ column, direction = sort_by, "asc"
+
+ # Map old column names to new ones
+ column_mapping = {
+ "keys": "keys",
+ "update": "update",
+ "head_dt": "head_dt",
+ "url": "url",
+ "annexed_files_in_wt_count": "annexed_files_count",
+ "annexed_files_in_wt_size": "annexed_files_size",
+ "git_objects_kb": "git_objects",
+ }
+
+ if column in column_mapping:
+ query_string = {"sort_by": column_mapping[column], "sort": direction}
+ else:
+ query_string = {"sort_by": column, "sort": direction}
+
+ resp = flask_client.get("/overview/", query_string=query_string)
soup = BeautifulSoup(resp.text, "html.parser")
@@ -296,3 +343,151 @@ def test_metadata(self, flask_client):
"https://handbook.datalad.org": {"metalad_core"},
"https://www.dandiarchive.org": set(),
}
+
+ @pytest.mark.usefixtures("populate_with_std_ds_urls")
+ @pytest.mark.parametrize(
+ "columns_param, expected_header_count",
+ [
+ (None, 10), # All available columns
+ ("url,dataset,commit", 3),
+ ("url,head_dt,update", 3),
+ ("url,keys,metadata", 3),
+ ("invalid_column", 10), # Invalid columns fall back to all columns
+ ("url,invalid_column,dataset", 2), # Valid columns only
+ ],
+ )
+ def test_configurable_columns(
+ self, columns_param: Optional[str], expected_header_count: int, flask_client
+ ):
+ """
+ Test the configurable columns functionality
+ """
+ query_params = {}
+ if columns_param:
+ query_params["columns"] = columns_param
+
+ resp = flask_client.get("/overview/", query_string=query_params)
+
+ soup = BeautifulSoup(resp.text, "html.parser")
+
+ # Count header columns
+ header_row = soup.body.table.tr
+ header_count = len(header_row.find_all("th"))
+
+ assert header_count == expected_header_count
+
+ # Verify data rows have matching column count
+ data_rows = soup.body.table.find_all("tr")[1:]
+ if data_rows: # Only check if there are data rows
+ for row in data_rows:
+ assert len(row.find_all("td")) == expected_header_count
+
+ @pytest.mark.usefixtures("populate_with_std_ds_urls")
+ def test_last_commit_date_column_present(self, flask_client):
+ """
+ Test that the last commit date column is displayed when requested
+ """
+ resp = flask_client.get("/overview/", query_string={"columns": "url,head_dt"})
+
+ soup = BeautifulSoup(resp.text, "html.parser")
+
+ # Check that the header contains "Last commit date"
+ headers = [th.text.strip() for th in soup.body.table.tr.find_all("th")]
+ assert "Last commit date" in headers
+
+ # Verify we have exactly 2 columns
+ assert len(headers) == 2
+
+ @pytest.mark.usefixtures("populate_with_std_ds_urls")
+ @pytest.mark.parametrize(
+ "sort_by_param, sort_direction, expected_order",
+ [
+ # Test new sorting system
+ (
+ "keys",
+ "asc",
+ [
+ "https://www.example.com",
+ "http://www.datalad.org",
+ "https://handbook.datalad.org",
+ "https://www.dandiarchive.org",
+ ],
+ ),
+ (
+ "keys",
+ "desc",
+ [
+ "https://handbook.datalad.org",
+ "http://www.datalad.org",
+ "https://www.example.com",
+ "https://www.dandiarchive.org",
+ ],
+ ),
+ (
+ "url",
+ "asc",
+ [
+ "http://www.datalad.org",
+ "https://handbook.datalad.org",
+ "https://www.dandiarchive.org",
+ "https://www.example.com",
+ ],
+ ),
+ (
+ "head_dt",
+ "asc",
+ [
+ "https://www.example.com", # null values come first
+ "http://www.datalad.org", # null values come first
+ "https://handbook.datalad.org", # null values come first
+ "https://www.dandiarchive.org", # null values come first
+ ],
+ ),
+ ],
+ )
+ def test_new_sorting_system(
+ self,
+ sort_by_param: str,
+ sort_direction: str,
+ expected_order: list[str],
+ flask_client,
+ ):
+ """
+ Test the new sorting system with sort_by and sort parameters
+ """
+ resp = flask_client.get(
+ "/overview/",
+ query_string={"sort_by": sort_by_param, "sort": sort_direction},
+ )
+
+ soup = BeautifulSoup(resp.text, "html.parser")
+
+ url_list = [row.td.a.string for row in soup.body.table.find_all("tr")[1:]]
+
+ assert url_list == expected_order
+
+ @pytest.mark.usefixtures("populate_with_std_ds_urls")
+ def test_new_sorting_with_columns(self, flask_client):
+ """
+ Test that new sorting system works with column configuration
+ """
+ resp = flask_client.get(
+ "/overview/",
+ query_string={"sort_by": "keys", "sort": "desc", "columns": "url,keys"},
+ )
+
+ soup = BeautifulSoup(resp.text, "html.parser")
+
+ # Check we have the right number of columns
+ headers = soup.body.table.tr.find_all("th")
+ assert len(headers) == 2
+
+ # Check ordering is still correct
+ url_list = [row.td.a.string for row in soup.body.table.find_all("tr")[1:]]
+ expected_order = [
+ "https://handbook.datalad.org",
+ "http://www.datalad.org",
+ "https://www.example.com",
+ "https://www.dandiarchive.org",
+ ]
+ assert url_list == expected_order
diff --git a/datalad_registry/tests/test_utils/test_datalad_tls.py b/datalad_registry/tests/test_utils/test_datalad_tls.py
index a917ba63..412d1783 100644
--- a/datalad_registry/tests/test_utils/test_datalad_tls.py
+++ b/datalad_registry/tests/test_utils/test_datalad_tls.py
@@ -7,6 +7,7 @@
from datalad_registry.utils.datalad_tls import (
WtAnnexedFileInfo,
clone,
+ get_head_commit_date,
get_origin_annex_key_count,
get_origin_annex_uuid,
get_origin_branches,
@@ -272,3 +273,36 @@ def test_normal_operation(self, ds_name, branch_name, request, tmp_path):
l2_clone.repo.call_git(["push", "-u", "origin", branch_name])
assert get_origin_upstream_branch(l2_clone) == branch_name
+
+
+@pytest.mark.parametrize(
+ "ds_name",
+ [
+ "empty_ds_annex",
+ "two_files_ds_annex",
+ "empty_ds_non_annex",
+ "two_files_ds_non_annex",
+ ],
+)
+def test_get_head_commit_date(ds_name, request, tmp_path):
+ """
+ Test the get_head_commit_date function
+ """
+ from datetime import datetime
+
+ ds: Dataset = request.getfixturevalue(ds_name)
+ ds_clone = clone(source=ds.path, path=tmp_path)
+
+ # Get the commit date using our function
+ commit_date = get_head_commit_date(ds_clone)
+
+ # Verify it's a datetime object
+ assert isinstance(commit_date, datetime)
+
+ # Verify it matches what we get from git directly
+ expected_date_str = ds.repo.call_git(
+ ["log", "-1", "--format=%aI", "origin/HEAD"]
+ ).strip()
+ expected_date = datetime.fromisoformat(expected_date_str)
+
+ assert commit_date == expected_date
diff --git a/datalad_registry/utils/datalad_tls.py b/datalad_registry/utils/datalad_tls.py
index 80d4ef49..311a9d4e 100644
--- a/datalad_registry/utils/datalad_tls.py
+++ b/datalad_registry/utils/datalad_tls.py
@@ -1,4 +1,5 @@
from dataclasses import dataclass
+from datetime import datetime
import re
from typing import Optional
from uuid import UUID
@@ -104,6 +105,26 @@ def get_head_describe(ds: Dataset) -> str:
return ds.repo.describe(tags=True, always=True)
+def get_head_commit_date(ds: Dataset) -> datetime:
+ """
+ Get the commit date of the HEAD commit of the origin remote of a dataset
+
+ :param ds: The given dataset
+ :return: The commit date of the HEAD commit of the origin remote as a
+ datetime object
+ """
+ # Get the commit date of origin/HEAD in ISO8601 format
+ commit_info = ds.repo.for_each_ref_(
+ pattern="refs/remotes/origin/HEAD", fields=["authordate:iso8601-strict"]
+ )
+
+ if not commit_info:
+ raise RuntimeError("Failed to get commit date for origin/HEAD")
+
+ # Parse the ISO8601 date string to datetime object
+ return datetime.fromisoformat(commit_info[0]["authordate:iso8601-strict"])
+
+
def get_origin_branches(ds: Dataset) -> dict[str, dict[str, str]]:
"""
Get the branches of the origin remote of a given dataset