diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
new file mode 100644
index 0000000..e31d81c
--- /dev/null
+++ b/.github/workflows/jekyll-gh-pages.yml
@@ -0,0 +1,51 @@
+# Sample workflow for building and deploying a Jekyll site to GitHub Pages
+name: Deploy Jekyll with GitHub Pages dependencies preinstalled
+
+on:
+ # Runs on pushes targeting the default branch
+ push:
+ branches: ["main"]
+
+ # Allows you to run this workflow manually from the Actions tab
+ workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+ contents: read
+ pages: write
+ id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+ group: "pages"
+ cancel-in-progress: false
+
+jobs:
+ # Build job
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Setup Pages
+ uses: actions/configure-pages@v5
+ - name: Build with Jekyll
+ uses: actions/jekyll-build-pages@v1
+ with:
+ source: ./
+ destination: ./_site
+ - name: Upload artifact
+ uses: actions/upload-pages-artifact@v3
+
+ # Deployment job
+ deploy:
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ runs-on: ubuntu-latest
+ needs: build
+ steps:
+ - name: Deploy to GitHub Pages
+ id: deployment
+ uses: actions/deploy-pages@v4
diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml
new file mode 100644
index 0000000..36306c8
--- /dev/null
+++ b/.github/workflows/poetry-export_dependencies.yml
@@ -0,0 +1,73 @@
+name: Poetry export requirements.txt
+on:
+ push:
+ branches:
+ - '*' # Trigger on any push to any branch
+ paths:
+ - 'requirements.txt'
+ - 'pyproject.toml'
+ - 'poetry.lock'
+jobs:
+ poetry-export_dependencies:
+ strategy:
+ fail-fast: false
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: 3.12
+ - name: Install poetry
+ uses: abatilo/actions-poetry@v4
+ with:
+ poetry-version: 'latest'
+ - name: Install the poetry-plugin-export
+ run: poetry self add poetry-plugin-export
+ - name: Update poetry lock file
+ run: poetry lock
+ - name: Export the project dependencies to requirements.txt
+ run: |
+ poetry export -f requirements.txt --output requirements.txt
+ - name: Get branch name
+ shell: bash
+ run: echo "BRANCH_NAME=${GITHUB_REF#refs/heads/}" >> $GITHUB_ENV
+ - name: Check for changes
+ id: check_changes
+ run: |
+ if [[ -n "$(git status --porcelain requirements.txt poetry.lock)" ]]; then
+ echo "changes=true" >> $GITHUB_OUTPUT
+ else
+ echo "changes=false" >> $GITHUB_OUTPUT
+ fi
+ - name: Configure Git
+ run: |
+ git config --local user.email "github-actions[bot]@users.noreply.github.com"
+ git config --local user.name "github-actions[bot]"
+ - name: Commit and push if changed
+ if: steps.check_changes.outputs.changes == 'true'
+ run: |
+ # Pull with rebase to get latest changes
+ git pull --rebase origin ${{ env.BRANCH_NAME }}
+
+ # Stage and commit changes
+ git add requirements.txt poetry.lock
+ git commit -m "chore: update requirements.txt and poetry.lock [skip ci]"
+
+ # Push with retry logic
+ max_attempts=3
+ attempt=1
+ while [ $attempt -le $max_attempts ]; do
+ if git push origin ${{ env.BRANCH_NAME }}; then
+ break
+ else
+ if [ $attempt -eq $max_attempts ]; then
+ echo "Failed to push after $max_attempts attempts"
+ exit 1
+ fi
+ echo "Push failed, attempt $attempt of $max_attempts. Pulling and retrying..."
+ git pull --rebase origin ${{ env.BRANCH_NAME }}
+ attempt=$((attempt + 1))
+ fi
+ done
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/CITATION.cff b/CITATION.cff
index 4700932..97e309c 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -4,7 +4,7 @@ authors:
- family-names: "Lui"
given-names: "Lok Hei"
orcid: "https://orcid.org/0000-0001-5077-1530"
-title: "Dataverse metadata Crawler"
+title: "Dataverse Metadata Crawler"
version: 0.1.0
date-released: 2025-01-16
-url: "https://github.com/kenlhlui/dataverse-metadata-crawler-p"
\ No newline at end of file
+url: "https://github.com/scholarsportal/dataverse-metadata-crawler"
diff --git a/README.md b/README.md
index cf4b45d..6189d31 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
2. Change to the project directory
```sh
- cd ~/dataverse-metadata-export-p
+ cd ./dataverse-metadata-crawler
```
3. Create an environment file (.env)
@@ -65,6 +65,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALIAS -v VERSION
```
**Required arguments:**
+
| **Option** | **Short** | **Type** | **Description** | **Default** |
|--------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
| --collection_alias | -c | TEXT | Name of the collection to crawl.
**[required]** | None |
@@ -72,6 +73,7 @@ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALI
**Optional arguments:**
+
| **Option** | **Short** | **Type** | **Description** | **Default** |
|----------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
| --auth | -a | TEXT | Authentication token to access the Dataverse repository.
If | None |
@@ -96,6 +98,7 @@ python3 dvmeta/main.py -c demo -v 1.0 -d -s -p -a xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx
```
## 📂Output Structure
+
| File | Description |
|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------|
| ds_metadata_yyyymmdd-HHMMSS.json | Datasets' their data files' metadata in JSON format. |
@@ -145,21 +148,20 @@ If you use this software in your work, please cite it using the following metada
APA:
```
-Lui, L. H. (2025). Dataverse metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/kenlhlui/dataverse-metadata-crawler-p
+Lui, L. H. (2025). Dataverse Metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/scholarsportal/dataverse-metadata-crawler
```
BibTeX:
```
-@software{Lui_Dataverse_metadata_Crawler_2025,
+@software{Lui_Dataverse_Metadata_Crawler_2025,
author = {Lui, Lok Hei},
month = jan,
-title = {{Dataverse metadata Crawler}},
-url = {https://github.com/kenlhlui/dataverse-metadata-crawler-p},
+title = {{Dataverse Metadata Crawler}},
+url = {https://github.com/scholarsportal/dataverse-metadata-crawler},
version = {0.1.0},
year = {2025}
}
```
## ✍️Authors
-Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - kenlh.lui@utoronto.ca
-
+Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - [kenlh.lui@utoronto.ca](mailto:kenlh.lui@utoronto.ca)
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 0000000..2217569
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,19 @@
+# Site settings
+title: Dataverse Metadata Crawler
+description: A Python CLI tool for extracting and exporting metadata from Dataverse repositories to JSON and CSV formats.
+baseurl: "/dataverse-metadata-crawler" # Base URL (leave blank for root deployment)
+url: "https://scholarsportal.github.io" # Your GitHub Pages URL
+
+remote_theme: pages-themes/primer
+plugins:
+- jekyll-remote-theme # add this line to the plugins list if you already have one
+- jekyll-seo-tag # Required by primer theme
+
+# Markdown settings
+markdown: kramdown
+kramdown:
+ input: GFM # Enables GitHub Flavored Markdown (GFM)
+
+# Build settings
+source: ./
+destination: ./_site