From 3ef6dab2fe9583c710775fd0fac2a44b280c941a Mon Sep 17 00:00:00 2001 From: Ken Lui <116421546+kenlhlui@users.noreply.github.com> Date: Mon, 20 Jan 2025 18:28:16 -0500 Subject: [PATCH] 1. Added GitHub Actions workflows for Jekyll deployment and Poetry dependency export 2. Updated CITATION.cff & README --- .github/workflows/jekyll-gh-pages.yml | 51 +++++++++++++ .../workflows/poetry-export_dependencies.yml | 73 +++++++++++++++++++ CITATION.cff | 4 +- README.md | 16 ++-- _config.yml | 19 +++++ 5 files changed, 154 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/jekyll-gh-pages.yml create mode 100644 .github/workflows/poetry-export_dependencies.yml create mode 100644 _config.yml diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml new file mode 100644 index 0000000..e31d81c --- /dev/null +++ b/.github/workflows/jekyll-gh-pages.yml @@ -0,0 +1,51 @@ +# Sample workflow for building and deploying a Jekyll site to GitHub Pages +name: Deploy Jekyll with GitHub Pages dependencies preinstalled + +on: + # Runs on pushes targeting the default branch + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Build job + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Build with Jekyll + uses: actions/jekyll-build-pages@v1 + with: + source: ./ + destination: ./_site + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + + # Deployment job + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml new file mode 100644 index 0000000..36306c8 --- /dev/null +++ b/.github/workflows/poetry-export_dependencies.yml @@ -0,0 +1,73 @@ +name: Poetry export requirements.txt +on: + push: + branches: + - '*' # Trigger on any push to any branch + paths: + - 'requirements.txt' + - 'pyproject.toml' + - 'poetry.lock' +jobs: + poetry-export_dependencies: + strategy: + fail-fast: false + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.12 + - name: Install poetry + uses: abatilo/actions-poetry@v4 + with: + poetry-version: 'latest' + - name: Install the poetry-plugin-export + run: poetry self add poetry-plugin-export + - name: Update poetry lock file + run: poetry lock + - name: Export the project dependencies to requirements.txt + run: | + poetry export -f requirements.txt --output requirements.txt + - name: Get branch name + shell: bash + run: echo "BRANCH_NAME=${GITHUB_REF#refs/heads/}" >> $GITHUB_ENV + - name: Check for changes + id: check_changes + run: | + if [[ -n "$(git status --porcelain requirements.txt poetry.lock)" ]]; then + echo "changes=true" >> $GITHUB_OUTPUT + else + echo "changes=false" >> $GITHUB_OUTPUT + fi + - name: Configure Git + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + - name: Commit and push if changed + if: steps.check_changes.outputs.changes == 'true' + run: | + # Pull with rebase to get latest changes + git pull --rebase origin ${{ env.BRANCH_NAME }} + + # Stage and commit changes + git add requirements.txt poetry.lock + git commit -m "chore: update requirements.txt and poetry.lock [skip ci]" + + # Push with retry logic + max_attempts=3 + attempt=1 + while [ $attempt -le $max_attempts ]; do + if git push origin ${{ env.BRANCH_NAME }}; then + break + else + if [ $attempt -eq $max_attempts ]; then + echo "Failed to push after $max_attempts attempts" + exit 1 + fi + echo "Push failed, attempt $attempt of $max_attempts. Pulling and retrying..." + git pull --rebase origin ${{ env.BRANCH_NAME }} + attempt=$((attempt + 1)) + fi + done + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CITATION.cff b/CITATION.cff index 4700932..97e309c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ authors: - family-names: "Lui" given-names: "Lok Hei" orcid: "https://orcid.org/0000-0001-5077-1530" -title: "Dataverse metadata Crawler" +title: "Dataverse Metadata Crawler" version: 0.1.0 date-released: 2025-01-16 -url: "https://github.com/kenlhlui/dataverse-metadata-crawler-p" \ No newline at end of file +url: "https://github.com/scholarsportal/dataverse-metadata-crawler" diff --git a/README.md b/README.md index cf4b45d..6189d31 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/ 2. Change to the project directory ```sh - cd ~/dataverse-metadata-export-p + cd ./dataverse-metadata-crawler ``` 3. Create an environment file (.env) @@ -65,6 +65,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALIAS -v VERSION ``` **Required arguments:** + | **Option** | **Short** | **Type** | **Description** | **Default** | |--------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------| | --collection_alias | -c | TEXT | Name of the collection to crawl.
**[required]** | None | @@ -72,6 +73,7 @@ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALI **Optional arguments:** + | **Option** | **Short** | **Type** | **Description** | **Default** | |----------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------| | --auth | -a | TEXT | Authentication token to access the Dataverse repository.
If | None | @@ -96,6 +98,7 @@ python3 dvmeta/main.py -c demo -v 1.0 -d -s -p -a xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx ``` ## 📂Output Structure + | File | Description | |-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------| | ds_metadata_yyyymmdd-HHMMSS.json | Datasets' their data files' metadata in JSON format. | @@ -145,21 +148,20 @@ If you use this software in your work, please cite it using the following metada APA: ``` -Lui, L. H. (2025). Dataverse metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/kenlhlui/dataverse-metadata-crawler-p +Lui, L. H. (2025). Dataverse Metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/scholarsportal/dataverse-metadata-crawler ``` BibTeX: ``` -@software{Lui_Dataverse_metadata_Crawler_2025, +@software{Lui_Dataverse_Metadata_Crawler_2025, author = {Lui, Lok Hei}, month = jan, -title = {{Dataverse metadata Crawler}}, -url = {https://github.com/kenlhlui/dataverse-metadata-crawler-p}, +title = {{Dataverse Metadata Crawler}}, +url = {https://github.com/scholarsportal/dataverse-metadata-crawler}, version = {0.1.0}, year = {2025} } ``` ## ✍️Authors -Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - kenlh.lui@utoronto.ca - +Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - [kenlh.lui@utoronto.ca](mailto:kenlh.lui@utoronto.ca) diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..2217569 --- /dev/null +++ b/_config.yml @@ -0,0 +1,19 @@ +# Site settings +title: Dataverse Metadata Crawler +description: A Python CLI tool for extracting and exporting metadata from Dataverse repositories to JSON and CSV formats. +baseurl: "/dataverse-metadata-crawler" # Base URL (leave blank for root deployment) +url: "https://scholarsportal.github.io" # Your GitHub Pages URL + +remote_theme: pages-themes/primer +plugins: +- jekyll-remote-theme # add this line to the plugins list if you already have one +- jekyll-seo-tag # Required by primer theme + +# Markdown settings +markdown: kramdown +kramdown: + input: GFM # Enables GitHub Flavored Markdown (GFM) + +# Build settings +source: ./ +destination: ./_site