From 833f8baee91625837608b64e0e4f0e147868f679 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Fri, 17 Jan 2025 17:25:46 -0500
Subject: [PATCH 01/28] Create poetry-export_dependencies.yml

Added CI/CD workflow for exporting requirements.txt when poetry.lock/requirements.txt/pyproject.toml is changed.
---
 .../workflows/poetry-export_dependencies.yml  | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 .github/workflows/poetry-export_dependencies.yml

diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml
new file mode 100644
index 0000000..36306c8
--- /dev/null
+++ b/.github/workflows/poetry-export_dependencies.yml
@@ -0,0 +1,73 @@
+name: Poetry export requirements.txt
+on:
+  push:
+    branches:
+      - '*'  # Trigger on any push to any branch
+    paths:
+      - 'requirements.txt'
+      - 'pyproject.toml'
+      - 'poetry.lock'
+jobs:
+  poetry-export_dependencies:
+    strategy:
+      fail-fast: false
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Install poetry
+        uses: abatilo/actions-poetry@v4
+        with:
+          poetry-version: 'latest'
+      - name: Install the poetry-plugin-export
+        run: poetry self add poetry-plugin-export
+      - name: Update poetry lock file
+        run: poetry lock
+      - name: Export the project dependencies to requirements.txt
+        run: |
+          poetry export -f requirements.txt --output requirements.txt
+      - name: Get branch name
+        shell: bash
+        run: echo "BRANCH_NAME=${GITHUB_REF#refs/heads/}" >> $GITHUB_ENV
+      - name: Check for changes
+        id: check_changes
+        run: |
+          if [[ -n "$(git status --porcelain requirements.txt poetry.lock)" ]]; then
+            echo "changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "changes=false" >> $GITHUB_OUTPUT
+          fi
+      - name: Configure Git
+        run: |
+          git config --local user.email "github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+      - name: Commit and push if changed
+        if: steps.check_changes.outputs.changes == 'true'
+        run: |
+          # Pull with rebase to get latest changes
+          git pull --rebase origin ${{ env.BRANCH_NAME }}
+          
+          # Stage and commit changes
+          git add requirements.txt poetry.lock
+          git commit -m "chore: update requirements.txt and poetry.lock [skip ci]"
+          
+          # Push with retry logic
+          max_attempts=3
+          attempt=1
+          while [ $attempt -le $max_attempts ]; do
+            if git push origin ${{ env.BRANCH_NAME }}; then
+              break
+            else
+              if [ $attempt -eq $max_attempts ]; then
+                echo "Failed to push after $max_attempts attempts"
+                exit 1
+              fi
+              echo "Push failed, attempt $attempt of $max_attempts. Pulling and retrying..."
+              git pull --rebase origin ${{ env.BRANCH_NAME }}
+              attempt=$((attempt + 1))
+            fi
+          done
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From f23640f9badf95caabe07bcd2145a85fafc5ffcf Mon Sep 17 00:00:00 2001
From: Ken Lui <kenlh.lui@utoronto.ca>
Date: Mon, 20 Jan 2025 18:28:16 -0500
Subject: [PATCH 02/28] 1. Added GitHub Actions workflows for Jekyll deployment
 and Poetry dependency export 2. Updated CITATION.cff & README

---
 .github/workflows/jekyll-gh-pages.yml         | 51 +++++++++++++
 .../workflows/poetry-export_dependencies.yml  | 73 +++++++++++++++++++
 CITATION.cff                                  |  4 +-
 README.md                                     | 16 ++--
 _config.yml                                   | 19 +++++
 5 files changed, 154 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/jekyll-gh-pages.yml
 create mode 100644 .github/workflows/poetry-export_dependencies.yml
 create mode 100644 _config.yml

diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
new file mode 100644
index 0000000..e31d81c
--- /dev/null
+++ b/.github/workflows/jekyll-gh-pages.yml
@@ -0,0 +1,51 @@
+# Sample workflow for building and deploying a Jekyll site to GitHub Pages
+name: Deploy Jekyll with GitHub Pages dependencies preinstalled
+
+on:
+  # Runs on pushes targeting the default branch
+  push:
+    branches: ["main"]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Build job
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Build with Jekyll
+        uses: actions/jekyll-build-pages@v1
+        with:
+          source: ./
+          destination: ./_site
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+
+  # Deployment job
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml
new file mode 100644
index 0000000..36306c8
--- /dev/null
+++ b/.github/workflows/poetry-export_dependencies.yml
@@ -0,0 +1,73 @@
+name: Poetry export requirements.txt
+on:
+  push:
+    branches:
+      - '*'  # Trigger on any push to any branch
+    paths:
+      - 'requirements.txt'
+      - 'pyproject.toml'
+      - 'poetry.lock'
+jobs:
+  poetry-export_dependencies:
+    strategy:
+      fail-fast: false
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Install poetry
+        uses: abatilo/actions-poetry@v4
+        with:
+          poetry-version: 'latest'
+      - name: Install the poetry-plugin-export
+        run: poetry self add poetry-plugin-export
+      - name: Update poetry lock file
+        run: poetry lock
+      - name: Export the project dependencies to requirements.txt
+        run: |
+          poetry export -f requirements.txt --output requirements.txt
+      - name: Get branch name
+        shell: bash
+        run: echo "BRANCH_NAME=${GITHUB_REF#refs/heads/}" >> $GITHUB_ENV
+      - name: Check for changes
+        id: check_changes
+        run: |
+          if [[ -n "$(git status --porcelain requirements.txt poetry.lock)" ]]; then
+            echo "changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "changes=false" >> $GITHUB_OUTPUT
+          fi
+      - name: Configure Git
+        run: |
+          git config --local user.email "github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+      - name: Commit and push if changed
+        if: steps.check_changes.outputs.changes == 'true'
+        run: |
+          # Pull with rebase to get latest changes
+          git pull --rebase origin ${{ env.BRANCH_NAME }}
+          
+          # Stage and commit changes
+          git add requirements.txt poetry.lock
+          git commit -m "chore: update requirements.txt and poetry.lock [skip ci]"
+          
+          # Push with retry logic
+          max_attempts=3
+          attempt=1
+          while [ $attempt -le $max_attempts ]; do
+            if git push origin ${{ env.BRANCH_NAME }}; then
+              break
+            else
+              if [ $attempt -eq $max_attempts ]; then
+                echo "Failed to push after $max_attempts attempts"
+                exit 1
+              fi
+              echo "Push failed, attempt $attempt of $max_attempts. Pulling and retrying..."
+              git pull --rebase origin ${{ env.BRANCH_NAME }}
+              attempt=$((attempt + 1))
+            fi
+          done
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/CITATION.cff b/CITATION.cff
index 4700932..97e309c 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -4,7 +4,7 @@ authors:
 - family-names: "Lui"
   given-names: "Lok Hei"
   orcid: "https://orcid.org/0000-0001-5077-1530"
-title: "Dataverse metadata Crawler"
+title: "Dataverse Metadata Crawler"
 version: 0.1.0
 date-released: 2025-01-16
-url: "https://github.com/kenlhlui/dataverse-metadata-crawler-p"
\ No newline at end of file
+url: "https://github.com/scholarsportal/dataverse-metadata-crawler"
diff --git a/README.md b/README.md
index cf4b45d..6189d31 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
 
 2. Change to the project directory
    ```sh
-   cd ~/dataverse-metadata-export-p
+   cd ./dataverse-metadata-crawler
    ```
 
 3. Create an environment file (.env)
@@ -65,6 +65,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
 python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALIAS -v VERSION
 ```
 **Required arguments:**
+
 | **Option**         | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**     |
 |--------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
 | --collection_alias | -c        | TEXT     | Name of the collection to crawl. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
@@ -72,6 +73,7 @@ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALI
 
 
 **Optional arguments:**
+
 | **Option**           | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**               |
 |----------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
 | --auth               | -a        | TEXT     | Authentication token to access the Dataverse repository. <br/> If                                                                                                                                                                                                          | None                      |
@@ -96,6 +98,7 @@ python3 dvmeta/main.py -c demo -v 1.0 -d -s -p -a xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx
 ```
 
 ## 📂Output Structure
+
 | File                                      | Description                                                                                                                             |
 |-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------|
 | ds_metadata_yyyymmdd-HHMMSS.json          | Datasets' their data files' metadata in JSON format.                                                                                    |
@@ -145,21 +148,20 @@ If you use this software in your work, please cite it using the following metada
 
 APA:
 ```
-Lui, L. H. (2025). Dataverse metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/kenlhlui/dataverse-metadata-crawler-p
+Lui, L. H. (2025). Dataverse Metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/scholarsportal/dataverse-metadata-crawler
 ```
 
 BibTeX:
 ```
-@software{Lui_Dataverse_metadata_Crawler_2025,
+@software{Lui_Dataverse_Metadata_Crawler_2025,
 author = {Lui, Lok Hei},
 month = jan,
-title = {{Dataverse metadata Crawler}},
-url = {https://github.com/kenlhlui/dataverse-metadata-crawler-p},
+title = {{Dataverse Metadata Crawler}},
+url = {https://github.com/scholarsportal/dataverse-metadata-crawler},
 version = {0.1.0},
 year = {2025}
 }
 ```
 
 ## ✍️Authors
-Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - kenlh.lui@utoronto.ca
-
+Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - [kenlh.lui@utoronto.ca](mailto:kenlh.lui@utoronto.ca)
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 0000000..2217569
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,19 @@
+# Site settings
+title: Dataverse Metadata Crawler
+description: A Python CLI tool for extracting and exporting metadata from Dataverse repositories to JSON and CSV formats.
+baseurl: "/dataverse-metadata-crawler" # Base URL (leave blank for root deployment)
+url: "https://scholarsportal.github.io" # Your GitHub Pages URL
+
+remote_theme: pages-themes/primer
+plugins:
+- jekyll-remote-theme # add this line to the plugins list if you already have one
+- jekyll-seo-tag    # Required by primer theme
+
+# Markdown settings
+markdown: kramdown
+kramdown:
+  input: GFM # Enables GitHub Flavored Markdown (GFM)
+
+# Build settings
+source: ./
+destination: ./_site

From 4380e3cedd9cc5832379e3c1af79b59588f72595 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 12:48:03 -0500
Subject: [PATCH 03/28] 1. Updated gitignore.

---
 .gitignore | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..29e00d5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,177 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# exported_files folder
+exported_files/
\ No newline at end of file

From d16781c22c45f0f110c40a24302249eaaedddc06 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 13:13:01 -0500
Subject: [PATCH 04/28] Update README.md

---
 README.md | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 6189d31..965fe8d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
-[![Licnese: MIT](https://img.shields.io/badge/Licnese-MIT-blue)](https://opensource.org/license/mit)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue)](https://opensource.org/license/mit)
 [![Dataverse](https://img.shields.io/badge/Dataverse-FFA500?)](https://dataverse.org/)
 [![Code Style: Black](https://img.shields.io/badge/code_style-black-black?)](https://github.com/psf/black)
 
@@ -7,10 +7,10 @@
 ![Screencapture of the CLI tool](res/screenshot.png)
 
 ## 📜Description
-A Python CLI tool for extracting and exporting metadata from [Dataverse](https://dataverse.org/) repositories. It supports bulk extraction of dataverses, datasets, and data file metadata from any chosen level of dataverse collection (whole Dataverse repository/sub-Dataverse), with flexible export options to JSON and CSV formats.
+A Python CLI tool for extracting and exporting metadata from [Dataverse](https://dataverse.org/) repositories. It supports bulk extraction of dataverses, datasets, and data file metadata from any chosen level of dataverse collection (an entire Dataverse repository/sub-Dataverse), with flexible export options to JSON and CSV formats.
 
 ## ✨Features
-1. Bulk metadata extraction from Dataverse repositories from any chosen level of collection (top level or selected collection)
+1. Bulk metadata extraction from Dataverse repositories at any chosen level of collection (top level or selected collection)
 2. JSON & CSV file export options
 
 ## 📦Prerequisites
@@ -38,11 +38,11 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
    notepad .env
    ```
 
-4. Configure environment file using your text editor at your choice
+4. Configure the environment (.env) file using the text editor of your choice.
    ```sh
    # .env file
    BASE_URL = "TARGET_REPO_URL"  # e.g., "https://demo.borealisdata.ca/"
-   API_KEY = "YOUR_API_KEY"      # Find in your Dataverse account settings. You may also specify it in the CLI interface (with -a flag)
+   API_KEY = "YOUR_API_KEY"      # Found in your Dataverse account settings. Can also be specified in the CLI interface using the -a flag.
    ```
 
 5. Set up virtual environment (recommended)
@@ -68,7 +68,7 @@ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALI
 
 | **Option**         | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**     |
 |--------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
-| --collection_alias | -c        | TEXT     | Name of the collection to crawl. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
+| --collection_alias | -c        | TEXT     | The alias of the collection to crawl. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
 | --version          | -v        | TEXT     | The Dataset version to crawl. Options include: <br/> • `draft` - The draft version, if any <br/> • `latest` - Either a draft (if exists) or the latest published version <br/> • `latest-published` - The latest published version <br/> • `x.y` - A specific version <br/> **[required]** | None (required) |
 
 
@@ -76,7 +76,7 @@ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALI
 
 | **Option**           | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**               |
 |----------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
-| --auth               | -a        | TEXT     | Authentication token to access the Dataverse repository. <br/> If                                                                                                                                                                                                          | None                      |
+| --auth               | -a        | TEXT     | Authentication token to access the Dataverse repository. <br/>                                                                                                                                                                                                          | None                      |
 | --log <br/> --no-log | -l        |          | Output a log file. <br/> Use `--no-log` to disable logging.                                                                                                                                                                                                      | `log` (unless `--no-log`) |
 | --dvdfds_metadata    | -d        |          | Output a JSON file containing metadata of Dataverses, Datasets, and Data Files.                                                                                                                                                                                                             |                           |
 | --permission         | -p        |          | Output a JSON file that stores permission metadata for all Datasets in the repository.                                                                                                                                                                                                     |                           |
@@ -101,13 +101,13 @@ python3 dvmeta/main.py -c demo -v 1.0 -d -s -p -a xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx
 
 | File                                      | Description                                                                                                                             |
 |-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------|
-| ds_metadata_yyyymmdd-HHMMSS.json          | Datasets' their data files' metadata in JSON format.                                                                                    |
+| ds_metadata_yyyymmdd-HHMMSS.json          | Datasets representation & data files metadata in JSON format.                                                                                    |
 | empty_dv_yyyymmdd-HHMMSS.json             | The id of empty dataverse(s) in list format.                                                                                            |
 | failed_metadata_uris_yyyymmdd-HHMMSS.json | The URIs (URL) of datasets failed to be downloaded.                                                                                     |
 | permission_dict_yyyymmdd-HHMMSS.json      | The perission metadata of datasets with their dataset id.                                                                               |
 | pid_dict_yyyymmdd-HHMMSS.json             | Datasets' basic info with hierarchical information dictionary.Only exported if -p (permission) flag is used without -d (metadata) flag. |
 | pid_dict_dd_yyyymmdd-HHMMSS.json          | The Hierarchical information of deaccessioned/draft datasets.                                                                           |
-| ds_metadata_yyyymmdd-HHMMSS.csv           | Datasets' their data files' metadata in CSV format.                                                                                     |
+| ds_metadata_yyyymmdd-HHMMSS.csv           | Datasets and their data files' metadata in CSV format.                                                                                     |
 | log_yyyymmdd-HHMMSS.txt                   | Summary of the crawling work.                                                                                                           |
 
 ```sh
@@ -129,8 +129,8 @@ exported_files/
 No tests have been written yet. Contributions welcome!
 
 ## 💻Development
-1. Dependencies managment: [poetry](https://python-poetry.org/) - Update the pyproject.toml dependencies changes
-2. Linter: [ruff](https://docs.astral.sh/ruff/) - Linting rules are outlined in the pyproject.toml
+1. Dependencies managment: [poetry](https://python-poetry.org/) - Use `poetry` to manage dependencies and reflect changes in the `pyproject.toml` file.
+2. Linter: [ruff](https://docs.astral.sh/ruff/) - Follow the linting rules outlined in the `pyproject.toml` file.
 
 ## 🙌Contributing
 1. Fork the repository
@@ -148,18 +148,18 @@ If you use this software in your work, please cite it using the following metada
 
 APA:
 ```
-Lui, L. H. (2025). Dataverse Metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/scholarsportal/dataverse-metadata-crawler
+Lui, L. H. (2025). Dataverse Metadata Crawler (Version 0.1.1) [Computer software]. https://github.com/scholarsportal/dataverse-metadata-crawler
 ```
 
 BibTeX:
 ```
 @software{Lui_Dataverse_Metadata_Crawler_2025,
-author = {Lui, Lok Hei},
-month = jan,
-title = {{Dataverse Metadata Crawler}},
-url = {https://github.com/scholarsportal/dataverse-metadata-crawler},
-version = {0.1.0},
-year = {2025}
+  author = {Lui, Lok Hei},
+  month = {jan},
+  title = {Dataverse Metadata Crawler},
+  url = {https://github.com/scholarsportal/dataverse-metadata-crawler},
+  version = {0.1.1},
+  year = {2025}
 }
 ```
 

From d91a1dea5f33392bd381d05a52de4f02b35afc42 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 14:33:50 -0500
Subject: [PATCH 05/28] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 965fe8d..7ddda7b 100644
--- a/README.md
+++ b/README.md
@@ -68,7 +68,7 @@ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALI
 
 | **Option**         | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**     |
 |--------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
-| --collection_alias | -c        | TEXT     | The alias of the collection to crawl. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
+| --collection_alias | -c        | TEXT     | The alias of the collection to crawl. <br/> See the guide [here](https://github.com/scholarsportal/dataverse-metadata-crawler/wiki/Guide:-How-to-find-the-COLLECTION_ALIAS-of-a-Dataverse-collection) to learn how to look for a the collection alias. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
 | --version          | -v        | TEXT     | The Dataset version to crawl. Options include: <br/> • `draft` - The draft version, if any <br/> • `latest` - Either a draft (if exists) or the latest published version <br/> • `latest-published` - The latest published version <br/> • `x.y` - A specific version <br/> **[required]** | None (required) |
 
 

From d2915b781be9e0fd135abebb0cf705d55abc2ee4 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 14:35:16 -0500
Subject: [PATCH 06/28] Update CITATION.cff

---
 CITATION.cff | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index 97e309c..1386c30 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,10 +1,10 @@
-cff-version: 0.1.0
+cff-version: 0.1.1
 message: "If you use this software, please cite it as below."
 authors:
 - family-names: "Lui"
   given-names: "Lok Hei"
   orcid: "https://orcid.org/0000-0001-5077-1530"
 title: "Dataverse Metadata Crawler"
-version: 0.1.0
-date-released: 2025-01-16
+version: 0.1.1
+date-released: 2025-01-23
 url: "https://github.com/scholarsportal/dataverse-metadata-crawler"

From df17fc0451b7dc46bf48360c6fa7828716ec4667 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 16:35:42 -0500
Subject: [PATCH 07/28] 1. Updated spreadsheet.py for inclusion of
 DF_Hierarchy, DF_Tags & DF_Description. 2. Updated res/spreadsheet_order.csv

---
 dvmeta/spreadsheet.py     | 36 ++++++++++++++++++++++++++++++++++--
 res/spreadsheet_order.csv |  7 +------
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/dvmeta/spreadsheet.py b/dvmeta/spreadsheet.py
index 826a8e1..822ff0e 100644
--- a/dvmeta/spreadsheet.py
+++ b/dvmeta/spreadsheet.py
@@ -177,7 +177,7 @@ def _get_dataset_subjects(dictionary: dict) -> dict:
         return result_dict
 
     @staticmethod
-    def _get_metadata_blocks(dictionary: dict) -> dict:
+    def _get_metadata_blocks_usage(dictionary: dict) -> dict:
         metadata_block_dict = {
             'Meta_Geo': 'geospatial',
             'Meta_SSHM': 'socialscience',
@@ -194,6 +194,33 @@ def _get_metadata_blocks(dictionary: dict) -> dict:
 
         return result_dict
 
+    @staticmethod
+    def _get_datafile_meta_usage(dictionary: dict) -> dict:
+        # Get the use of data file directoryLabel (DF_Hierarchy),
+        # tags (categories; DF_Tags) & description (DF_Description).
+        if dictionary.get('data', {}).get('files'):
+            file_nested_list = jmespath.search('data.files[*]', dictionary)
+
+            # Get the count of directoryLabel if it is not None
+            directorylabel_count = len([file for file in file_nested_list if file.get('directoryLabel') is not None])
+
+            # Get the count of categories if it is not None
+            categories_count = len([
+                file for file in file_nested_list
+                if file.get('dataFile', {}).get('categories') is not None
+            ])
+
+            # Get the count of description if it is not None
+            description_count = len([
+                file for file in file_nested_list
+                if file.get('dataFile', {}).get('description') is not None
+            ])
+
+            return {'DF_Hierarchy': directorylabel_count,
+                    'DF_Tags': categories_count,
+                    'DF_Description': description_count}
+        return {'DF_Hierarchy': 0, 'DF_Tags': 0, 'DF_Description': 0}
+
     def _get_spreadsheet_order(self) -> list[str]:
         with Path(self.spreadsheet_order_file_path).open(encoding='utf-8') as file:
             return file.read().splitlines()
@@ -224,6 +251,11 @@ def make_csv(self, meta_dict: dict) -> tuple[str, str]:
         holding_list = []
         for key, _value in meta_dict.items():
             jmespath_dict: dict = jmespath.search(f'{self.search_string}', meta_dict[key])
+
+            # Get the use of data file hierarchy (folders, DF_Hierarchy),
+            # file tags (categories; DF_Tags) &  description (DF_Description)
+            jmespath_dict.update(self._get_datafile_meta_usage(meta_dict[key]))
+
             # Get the file size and count
             jmespath_dict['FileSize'] = self._get_data_files_size(meta_dict[key])
             jmespath_dict['FileSize_normalized'] = convert_size(jmespath_dict['FileSize'])
@@ -245,7 +277,7 @@ def make_csv(self, meta_dict: dict) -> tuple[str, str]:
             jmespath_dict.update(self._get_dataset_subjects(jmespath_dict))
 
             # Get the metadata blocks and add them to the result dictionary
-            jmespath_dict.update(self._get_metadata_blocks(jmespath_dict))
+            jmespath_dict.update(self._get_metadata_blocks_usage(jmespath_dict))
 
             # Drop the versionNumber and versionMinorNumber keys from the dictionary
             jmespath_dict.pop('versionNumber', None)
diff --git a/res/spreadsheet_order.csv b/res/spreadsheet_order.csv
index a547b43..b5cff61 100644
--- a/res/spreadsheet_order.csv
+++ b/res/spreadsheet_order.csv
@@ -11,14 +11,11 @@ Version
 FileCount
 FileSize
 FileSize_normalized
-FileFormat
 DataverseSubCollection
 License
 RestrictedFiles
 RequestAcces
 TermsAccess
-TermsUse
-Citationrequirements
 DF_Hierarchy
 DF_Tags
 DF_Description
@@ -113,6 +110,4 @@ DS_Contrib
 DS_ContribPlus
 DS_Curator
 DS_FileDown
-DS_Member
-DS_UOFT_Admin
-DS_Groups
+DS_Member
\ No newline at end of file

From dbebd7cd4ebac4d0a06a03d27e1213631b37cc46 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 16:54:01 -0500
Subject: [PATCH 08/28] 1. Added CM_AltURL, CM_Agency, CM_ID, CM_CollectionEnd
 2. Fixed CM_AuthorAff, CM_TimeEnd

---
 dvmeta/spreadsheet.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/dvmeta/spreadsheet.py b/dvmeta/spreadsheet.py
index 822ff0e..80fb291 100644
--- a/dvmeta/spreadsheet.py
+++ b/dvmeta/spreadsheet.py
@@ -33,8 +33,11 @@ def __init__(self, config: dict) -> None:
             versionMinorNumber: data.versionMinorNumber,
             CM_Subtitle: data.metadataBlocks.citation.fields[?typeName==`subtitle`].value|[]
             CM_AltTitle: data.metadataBlocks.citation.fields[?typeName==`alternativeTitle`].value|[]
+            CM_AltURL: data.metadataBlocks.citation.fields[?typeName==`alternativeURL`].value|[]
+            CM_Agency: data.metadataBlocks.citation.fields[?typeName==`otherId`].value|[*]|[].otherIdAgency.value
+            CM_ID: data.metadataBlocks.citation.fields[?typeName==`otherId`].value|[*]|[].otherIdValue.value
             CM_Author: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorName.value
-            CM_ContactAff: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorAffiliation.value
+            CM_AuthorAff: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorAffiliation.value
             CM_AuthorID: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorIdentifier.value
             CM_AuthorIDType: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorIdentifierScheme.value
             CM_ContactName: data.metadataBlocks.citation.fields[?typeName==`datasetContact`].value|[*]|[].datasetContactName.value
@@ -74,8 +77,9 @@ def __init__(self, config: dict) -> None:
             CM_Depositor: data.metadataBlocks.citation.fields[?typeName==`depositor`].value|[]
             CM_DepositDate: data.metadataBlocks.citation.fields[?typeName==`dateOfDeposit`].value|[]
             CM_TimeStart: data.metadataBlocks.citation.fields[?typeName==`timePeriodCovered`].value|[].timePeriodCoveredStart.value
-            CM_TimeEnd: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionStart.value
-            CM_CollectionStart: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionEnd.value
+            CM_TimeEnd: data.metadataBlocks.citation.fields[?typeName==`timePeriodCovered`].value|[].timePeriodCoveredEnd.value
+            CM_CollectionStart: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionStart.value
+            CM_CollectionEnd: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionEnd.value
             CM_DataType: data.metadataBlocks.citation.fields[?typeName==`kindOfData`].value|[]
             CM_SeriesName: data.metadataBlocks.citation.fields[?typeName==`series`].value|[].seriesName.value
             CM_SeriesInfo: data.metadataBlocks.citation.fields[?typeName==`series`].value|[].seriesInformation.value

From fbbe2b1ec8ce8db6418db304864869b9f428a7c5 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 16:54:45 -0500
Subject: [PATCH 09/28] Update .gitignore to include test.ipynb

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 29e00d5..56bdf5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,4 +174,7 @@ cython_debug/
 .pypirc
 
 # exported_files folder
-exported_files/
\ No newline at end of file
+exported_files/
+
+# test.ipynb
+test.ipynb
\ No newline at end of file

From cb2254c8ad697c88e724433c6f1dd98b10fd8f11 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Thu, 23 Jan 2025 17:26:06 -0500
Subject: [PATCH 10/28] 1. Modularize the cm_metadata_holding_list creation &
 make_csv 2. Moved spreadsheet to the last section (to preprare for
 integrating permission_dict writing).

---
 dvmeta/main.py        | 23 ++++++++++++++---------
 dvmeta/spreadsheet.py | 27 +++++++++++++++++++++------
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/dvmeta/main.py b/dvmeta/main.py
index b587516..a0955bd 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -101,7 +101,12 @@ def main(
         sys.exit(1)
 
     # Crawl the collection tree metadata
-    collections_tree = metadata_crawler.get_collections_tree(collection_alias).json()
+    response = metadata_crawler.get_collections_tree(collection_alias)
+    if response is None:
+        print('Error: Failed to retrieve collections tree. The API request returned None.')
+        sys.exit(1)
+
+    collections_tree = response.json()
 
     # Add collection id and alias to config
     if collections_tree['status'] == 'OK':
@@ -148,7 +153,7 @@ async def main_crawler():
         failed_metadata_uris = []
         if dvdfds_matadata:
             # Export dataverse_contents
-            print('\nCrawling Representation and File metadata of datasets...\n')
+            print('Crawling Representation and File metadata of datasets...\n')
             pid_list = list(pid_dict)
             meta_dict, failed_metadata_uris = await metadata_crawler.get_datasets_meta(pid_list)
 
@@ -190,13 +195,6 @@ async def main_crawler():
                     }
                 )
 
-            if spreadsheet:
-                # Export the metadata to a CSV file
-                csv_file_path, csv_file_checksum = Spreadsheet(config).make_csv(meta_dict)
-                json_file_checksum_dict.append(
-                    {'type': 'Dataset Metadata CSV', 'path': csv_file_path, 'checksum': csv_file_checksum}
-                )
-
         if permission:
             print('\nCrawling Permission metadata of datasets...\n')
             ds_id_list = [item['ds_id'] for item in pid_dict.values()]
@@ -232,6 +230,13 @@ async def main_crawler():
                 {'type': 'Empty Dataverses', 'path': empty_dv_json, 'checksum': empty_dv_checksum}
             )
 
+        if spreadsheet:
+            # Export the metadata to a CSV file
+            csv_file_path, csv_file_checksum = Spreadsheet(config).make_csv_file(meta_dict)
+            json_file_checksum_dict.append(
+                {'type': 'Dataset Metadata CSV', 'path': csv_file_path, 'checksum': csv_file_checksum}
+            )
+
         return meta_dict, json_file_checksum_dict, failed_metadata_uris, collections_tree_flatten
 
     meta_dict, json_file_checksum_dict, failed_metadata_uris, collections_tree_flatten = asyncio.run(main_crawler())
diff --git a/dvmeta/spreadsheet.py b/dvmeta/spreadsheet.py
index 80fb291..f8fc0c1 100644
--- a/dvmeta/spreadsheet.py
+++ b/dvmeta/spreadsheet.py
@@ -243,14 +243,14 @@ def _reoder_df_columns(self, df: pd.DataFrame) -> pd.DataFrame:
 
         return df[final_column_order]
 
-    def make_csv(self, meta_dict: dict) -> tuple[str, str]:
-        """Create a CSV file from the metadata dictionary.
+    def _make_cm_meta_holding_list(self, meta_dict: dict) -> list[dict]:
+        """Create a nested list of metadata dictionaries.
 
         Args:
-            meta_dict (dict): Metadata dictionary
+            meta_dict (dict): Dataset metadata dictionary.
 
         Returns:
-            tuple[str, str]: Path to the CSV file, Checksum of the CSV file
+            list[dict]: List of metadata dictionaries (nested)
         """
         holding_list = []
         for key, _value in meta_dict.items():
@@ -292,9 +292,24 @@ def make_csv(self, meta_dict: dict) -> tuple[str, str]:
 
             holding_list.append(jmespath_dict)
 
-        df = pd.DataFrame(holding_list)
+        return holding_list
+
+    def make_csv_file(self, meta_dict: dict) -> tuple[str, str]:
+        """Create a CSV file from the nested metadata list.
+
+        Args:
+            meta_dict (dict): Dataset metadata dictionary
+
+        Returns:
+            tuple[str, str]: Path to the CSV file, Checksum of the CSV file
+        """
+        # Create a DataFrame from the nested list
+
+        cm_meta_holding_list = self._make_cm_meta_holding_list(meta_dict)
+
+        df = pd.DataFrame(cm_meta_holding_list)
 
-        # Reoder the columns in the DataFrame
+        # Reoder the columns in the DataFrame according to to the preset order (/res/spreadsheet_order.csv)
         df = self._reoder_df_columns(df)
 
         # Create the CSV file

From 956664d11887fc6f198eb93b8be657c58829fa0c Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Fri, 24 Jan 2025 18:22:18 -0500
Subject: [PATCH 11/28] 1. Added GitHub Actions workflows for Jekyll deployment
 and Poetry dependency export 2. Updated CITATION.cff & README 3. Updated
 merging representation and permission metadata into one dictionary (& JSON
 file). 4. Updated valid spreadsheet order fileds.

---
 .github/workflows/jekyll-gh-pages.yml |  51 ++++++++++
 .gitignore                            |   5 +-
 CITATION.cff                          |  10 +-
 README.md                             |  46 ++++-----
 _config.yml                           |  19 ++++
 dvmeta/func.py                        |  72 +++++++++++---
 dvmeta/main.py                        | 134 ++++++++++++++++----------
 dvmeta/spreadsheet.py                 |  73 +++++++++++---
 res/spreadsheet_order.csv             |   7 +-
 9 files changed, 308 insertions(+), 109 deletions(-)
 create mode 100644 .github/workflows/jekyll-gh-pages.yml
 create mode 100644 _config.yml

diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
new file mode 100644
index 0000000..e31d81c
--- /dev/null
+++ b/.github/workflows/jekyll-gh-pages.yml
@@ -0,0 +1,51 @@
+# Sample workflow for building and deploying a Jekyll site to GitHub Pages
+name: Deploy Jekyll with GitHub Pages dependencies preinstalled
+
+on:
+  # Runs on pushes targeting the default branch
+  push:
+    branches: ["main"]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Build job
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Build with Jekyll
+        uses: actions/jekyll-build-pages@v1
+        with:
+          source: ./
+          destination: ./_site
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+
+  # Deployment job
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
index 29e00d5..56bdf5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,4 +174,7 @@ cython_debug/
 .pypirc
 
 # exported_files folder
-exported_files/
\ No newline at end of file
+exported_files/
+
+# test.ipynb
+test.ipynb
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
index 4700932..1386c30 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,10 +1,10 @@
-cff-version: 0.1.0
+cff-version: 0.1.1
 message: "If you use this software, please cite it as below."
 authors:
 - family-names: "Lui"
   given-names: "Lok Hei"
   orcid: "https://orcid.org/0000-0001-5077-1530"
-title: "Dataverse metadata Crawler"
-version: 0.1.0
-date-released: 2025-01-16
-url: "https://github.com/kenlhlui/dataverse-metadata-crawler-p"
\ No newline at end of file
+title: "Dataverse Metadata Crawler"
+version: 0.1.1
+date-released: 2025-01-23
+url: "https://github.com/scholarsportal/dataverse-metadata-crawler"
diff --git a/README.md b/README.md
index cf4b45d..7ddda7b 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
-[![Licnese: MIT](https://img.shields.io/badge/Licnese-MIT-blue)](https://opensource.org/license/mit)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue)](https://opensource.org/license/mit)
 [![Dataverse](https://img.shields.io/badge/Dataverse-FFA500?)](https://dataverse.org/)
 [![Code Style: Black](https://img.shields.io/badge/code_style-black-black?)](https://github.com/psf/black)
 
@@ -7,10 +7,10 @@
 ![Screencapture of the CLI tool](res/screenshot.png)
 
 ## 📜Description
-A Python CLI tool for extracting and exporting metadata from [Dataverse](https://dataverse.org/) repositories. It supports bulk extraction of dataverses, datasets, and data file metadata from any chosen level of dataverse collection (whole Dataverse repository/sub-Dataverse), with flexible export options to JSON and CSV formats.
+A Python CLI tool for extracting and exporting metadata from [Dataverse](https://dataverse.org/) repositories. It supports bulk extraction of dataverses, datasets, and data file metadata from any chosen level of dataverse collection (an entire Dataverse repository/sub-Dataverse), with flexible export options to JSON and CSV formats.
 
 ## ✨Features
-1. Bulk metadata extraction from Dataverse repositories from any chosen level of collection (top level or selected collection)
+1. Bulk metadata extraction from Dataverse repositories at any chosen level of collection (top level or selected collection)
 2. JSON & CSV file export options
 
 ## 📦Prerequisites
@@ -26,7 +26,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
 
 2. Change to the project directory
    ```sh
-   cd ~/dataverse-metadata-export-p
+   cd ./dataverse-metadata-crawler
    ```
 
 3. Create an environment file (.env)
@@ -38,11 +38,11 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
    notepad .env
    ```
 
-4. Configure environment file using your text editor at your choice
+4. Configure the environment (.env) file using the text editor of your choice.
    ```sh
    # .env file
    BASE_URL = "TARGET_REPO_URL"  # e.g., "https://demo.borealisdata.ca/"
-   API_KEY = "YOUR_API_KEY"      # Find in your Dataverse account settings. You may also specify it in the CLI interface (with -a flag)
+   API_KEY = "YOUR_API_KEY"      # Found in your Dataverse account settings. Can also be specified in the CLI interface using the -a flag.
    ```
 
 5. Set up virtual environment (recommended)
@@ -65,16 +65,18 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
 python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALIAS -v VERSION
 ```
 **Required arguments:**
+
 | **Option**         | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**     |
 |--------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
-| --collection_alias | -c        | TEXT     | Name of the collection to crawl. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
+| --collection_alias | -c        | TEXT     | The alias of the collection to crawl. <br/> See the guide [here](https://github.com/scholarsportal/dataverse-metadata-crawler/wiki/Guide:-How-to-find-the-COLLECTION_ALIAS-of-a-Dataverse-collection) to learn how to look for a the collection alias. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
 | --version          | -v        | TEXT     | The Dataset version to crawl. Options include: <br/> • `draft` - The draft version, if any <br/> • `latest` - Either a draft (if exists) or the latest published version <br/> • `latest-published` - The latest published version <br/> • `x.y` - A specific version <br/> **[required]** | None (required) |
 
 
 **Optional arguments:**
+
 | **Option**           | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**               |
 |----------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
-| --auth               | -a        | TEXT     | Authentication token to access the Dataverse repository. <br/> If                                                                                                                                                                                                          | None                      |
+| --auth               | -a        | TEXT     | Authentication token to access the Dataverse repository. <br/>                                                                                                                                                                                                          | None                      |
 | --log <br/> --no-log | -l        |          | Output a log file. <br/> Use `--no-log` to disable logging.                                                                                                                                                                                                      | `log` (unless `--no-log`) |
 | --dvdfds_metadata    | -d        |          | Output a JSON file containing metadata of Dataverses, Datasets, and Data Files.                                                                                                                                                                                                             |                           |
 | --permission         | -p        |          | Output a JSON file that stores permission metadata for all Datasets in the repository.                                                                                                                                                                                                     |                           |
@@ -96,15 +98,16 @@ python3 dvmeta/main.py -c demo -v 1.0 -d -s -p -a xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx
 ```
 
 ## 📂Output Structure
+
 | File                                      | Description                                                                                                                             |
 |-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------|
-| ds_metadata_yyyymmdd-HHMMSS.json          | Datasets' their data files' metadata in JSON format.                                                                                    |
+| ds_metadata_yyyymmdd-HHMMSS.json          | Datasets representation & data files metadata in JSON format.                                                                                    |
 | empty_dv_yyyymmdd-HHMMSS.json             | The id of empty dataverse(s) in list format.                                                                                            |
 | failed_metadata_uris_yyyymmdd-HHMMSS.json | The URIs (URL) of datasets failed to be downloaded.                                                                                     |
 | permission_dict_yyyymmdd-HHMMSS.json      | The perission metadata of datasets with their dataset id.                                                                               |
 | pid_dict_yyyymmdd-HHMMSS.json             | Datasets' basic info with hierarchical information dictionary.Only exported if -p (permission) flag is used without -d (metadata) flag. |
 | pid_dict_dd_yyyymmdd-HHMMSS.json          | The Hierarchical information of deaccessioned/draft datasets.                                                                           |
-| ds_metadata_yyyymmdd-HHMMSS.csv           | Datasets' their data files' metadata in CSV format.                                                                                     |
+| ds_metadata_yyyymmdd-HHMMSS.csv           | Datasets and their data files' metadata in CSV format.                                                                                     |
 | log_yyyymmdd-HHMMSS.txt                   | Summary of the crawling work.                                                                                                           |
 
 ```sh
@@ -126,8 +129,8 @@ exported_files/
 No tests have been written yet. Contributions welcome!
 
 ## 💻Development
-1. Dependencies managment: [poetry](https://python-poetry.org/) - Update the pyproject.toml dependencies changes
-2. Linter: [ruff](https://docs.astral.sh/ruff/) - Linting rules are outlined in the pyproject.toml
+1. Dependencies managment: [poetry](https://python-poetry.org/) - Use `poetry` to manage dependencies and reflect changes in the `pyproject.toml` file.
+2. Linter: [ruff](https://docs.astral.sh/ruff/) - Follow the linting rules outlined in the `pyproject.toml` file.
 
 ## 🙌Contributing
 1. Fork the repository
@@ -145,21 +148,20 @@ If you use this software in your work, please cite it using the following metada
 
 APA:
 ```
-Lui, L. H. (2025). Dataverse metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/kenlhlui/dataverse-metadata-crawler-p
+Lui, L. H. (2025). Dataverse Metadata Crawler (Version 0.1.1) [Computer software]. https://github.com/scholarsportal/dataverse-metadata-crawler
 ```
 
 BibTeX:
 ```
-@software{Lui_Dataverse_metadata_Crawler_2025,
-author = {Lui, Lok Hei},
-month = jan,
-title = {{Dataverse metadata Crawler}},
-url = {https://github.com/kenlhlui/dataverse-metadata-crawler-p},
-version = {0.1.0},
-year = {2025}
+@software{Lui_Dataverse_Metadata_Crawler_2025,
+  author = {Lui, Lok Hei},
+  month = {jan},
+  title = {Dataverse Metadata Crawler},
+  url = {https://github.com/scholarsportal/dataverse-metadata-crawler},
+  version = {0.1.1},
+  year = {2025}
 }
 ```
 
 ## ✍️Authors
-Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - kenlh.lui@utoronto.ca
-
+Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - [kenlh.lui@utoronto.ca](mailto:kenlh.lui@utoronto.ca)
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 0000000..2217569
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,19 @@
+# Site settings
+title: Dataverse Metadata Crawler
+description: A Python CLI tool for extracting and exporting metadata from Dataverse repositories to JSON and CSV formats.
+baseurl: "/dataverse-metadata-crawler" # Base URL (leave blank for root deployment)
+url: "https://scholarsportal.github.io" # Your GitHub Pages URL
+
+remote_theme: pages-themes/primer
+plugins:
+- jekyll-remote-theme # add this line to the plugins list if you already have one
+- jekyll-seo-tag    # Required by primer theme
+
+# Markdown settings
+markdown: kramdown
+kramdown:
+  input: GFM # Enables GitHub Flavored Markdown (GFM)
+
+# Build settings
+source: ./
+destination: ./_site
diff --git a/dvmeta/func.py b/dvmeta/func.py
index 8784d83..759a437 100644
--- a/dvmeta/func.py
+++ b/dvmeta/func.py
@@ -1,6 +1,7 @@
 """This module contains functions used in the dvmeta package."""
 import os
 import re
+from typing import Optional
 
 import httpx
 import jmespath
@@ -26,21 +27,21 @@ def get_pids(read_dict: dict, config: dict) -> tuple:
     write_dict = {}
     for key, _item in read_dict.items():
         result = jmespath.search(
-            "data[?type=='dataset'].{ds_id: id, protocol: protocol, authority: authority, identifier: identifier, path: path, path_ids: path_ids}",  # noqa: E501
+            "data[?type=='dataset'].{id: id, protocol: protocol, authority: authority, identifier: identifier, path: path, path_ids: path_ids}",  # noqa: E501
             read_dict[key],  # noqa: PLR1733
         )
         if result:
             for item in result:
                 pid = f"{item['protocol']}:{item['authority']}/{item['identifier']}"
-                ds_id = item['ds_id']
+                id = item['id']
                 path = '/' + item['path'] if item['path'] else None
                 path_ids = item['path_ids']
                 dict_to_append = {
-                    str(pid): {  # pid needs to be converted to string if it's not already
+                    str(id): {  # pid needs to be converted to string if it's not already
                         'collection_alias': config['COLLECTION_ALIAS'],
                         'collection_id': config['COLLECTION_ID'],
                         'pid': pid,
-                        'ds_id': ds_id,
+                        'id': id,
                         'path': path,
                         'path_ids': path_ids,
                     }
@@ -160,15 +161,41 @@ def add_path_to_dataverse_contents(des_dict: dict, ref_dict: dict) -> dict:
     return des_dict
 
 
-def add_path_info(meta_dict: dict, pid_dict: dict) -> tuple:
-    """Add path_info to the metadata dictionary."""
-    pid_dict_copy = pid_dict.copy()
-    for key in list(pid_dict_copy.keys()):
-        if key in meta_dict:
-            meta_dict[key]['path_info'] = pid_dict_copy[key]
-            pid_dict_copy.pop(key)
+def add_path_info(meta_dict: dict, ds_dict: dict) -> tuple:
+    """Add path_info to the metadata dictionary, handling nested structures."""
+    ds_dict_copy = ds_dict.copy()
+    for pid_key, pid_value in list(ds_dict_copy.items()):
+        pid_key_str = str(pid_key)
+        # Traverse the meta_dict to find matching datasetId
+        for _meta_key, meta_value in meta_dict.items():
+            if isinstance(meta_value, dict) and meta_value.get('data', {}).get('datasetId') == int(pid_key_str):
+                # Add path_info to the appropriate nested dictionary
+                meta_value['path_info'] = pid_value
+                # Remove from ds_dict_copy
+                ds_dict_copy.pop(pid_key)
+                break
 
-    return meta_dict, pid_dict_copy
+    return meta_dict, ds_dict_copy
+
+
+def add_perrmission_info(meta_dict: dict, permission_dict: Optional[dict] = None) -> tuple:
+    """Add permission_info to the metadata dictionary, handling nested structures."""
+    if isinstance(permission_dict, dict):
+        permission_dict_copy = permission_dict.copy()
+        for pid_key, pid_value in list(permission_dict_copy.items()):
+            pid_key_str = str(pid_key)
+            # Traverse the meta_dict to find matching datasetId
+            for _meta_key, meta_value in meta_dict.items():
+                if isinstance(meta_value, dict) and meta_value.get('data', {}).get('datasetId') == int(pid_key_str):
+                    # Add path_info to the appropriate nested dictionary
+                    meta_value['permission_info'] = pid_value
+                    # Remove from permission_dict_copy
+                    permission_dict_copy.pop(pid_key)
+                    break
+
+        return meta_dict, permission_dict_copy
+
+    return meta_dict, None
 
 
 def load_env() -> dict:
@@ -190,3 +217,24 @@ def load_env() -> dict:
     else:
         config['HEADERS'] = {'Accept': 'application/json'}
     return config
+
+
+def replace_key_with_dataset_id(dictionary: dict) -> dict:
+    """Replace the top-level key in the dictionary with the value of 'datasetId' in the nested 'data'.
+
+    Args:
+        dictionary (dict): The original dictionary.
+
+    Returns:
+        dict: A new dictionary with keys replaced by the value of 'datasetId'.
+    """
+    new_dict = {}
+    for old_key, value in dictionary.items():
+        # Check if the 'data' key exists and has 'id'
+        if isinstance(value, dict) and value.get('data', {}).get('datasetId'):
+            new_key = value.get('data', {}).get('datasetId')  # Get the value of 'datasetId'
+            new_dict[new_key] = value  # Use it as the new key
+        else:
+            # Keep the original key if 'id' is missing
+            new_dict[old_key] = value
+    return new_dict
diff --git a/dvmeta/main.py b/dvmeta/main.py
index b587516..4f307a5 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -101,7 +101,12 @@ def main(
         sys.exit(1)
 
     # Crawl the collection tree metadata
-    collections_tree = metadata_crawler.get_collections_tree(collection_alias).json()
+    response = metadata_crawler.get_collections_tree(collection_alias)
+    if response is None:
+        print('Error: Failed to retrieve collections tree. The API request returned None.')
+        sys.exit(1)
+
+    collections_tree = response.json()
 
     # Add collection id and alias to config
     if collections_tree['status'] == 'OK':
@@ -120,9 +125,10 @@ def main(
 
     async def main_crawler():
         # Initialize empty dict and list to store metadata
-        pid_dict = {'pid': []}
+        ds_dict = {'pid': []}
         failed_metadata_ids = []
         json_file_checksum_dict = []
+        permission_dict = {}
 
         # Flatten the collections tree
         collections_tree_flatten = utils.flatten_collection(collections_tree)
@@ -140,43 +146,48 @@ async def main_crawler():
         # Add path_ids and path to dataverse_contents from collections_tree_flatten
         dataverse_contents = func.add_path_to_dataverse_contents(dataverse_contents, collections_tree_flatten)
 
-        # Get URIs in collections_tree_flatten and append them to pid_dict, and return empty dataverse to empty_dv
-        empty_dv_dict, pid_dict = func.get_pids(dataverse_contents, config)
+        # Get URIs in collections_tree_flatten and append them to ds_dict, and return empty dataverse to empty_dv
+        empty_dv_dict, ds_dict = func.get_pids(dataverse_contents, config)
 
         # Optional arguments
         meta_dict = {}
         failed_metadata_uris = []
         if dvdfds_matadata:
             # Export dataverse_contents
-            print('\nCrawling Representation and File metadata of datasets...\n')
-            pid_list = list(pid_dict)
+            print('Crawling Representation and File metadata of datasets...\n')
+            pid_list = [item['pid'] for item in ds_dict.values()]
             meta_dict, failed_metadata_uris = await metadata_crawler.get_datasets_meta(pid_list)
 
+            # Replace the key with the Data #TEMPORARY FIX
+            meta_dict = func.replace_key_with_dataset_id(meta_dict)
+
             # Add the path_info to the metadata
-            meta_dict, pid_dict_dd = func.add_path_info(meta_dict, pid_dict)
+            meta_dict, pid_dict_dd = func.add_path_info(meta_dict, ds_dict)
 
-            # Export the metadata to a JSON file
-            meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict')
-            json_file_checksum_dict.append(
-                {
-                    'type': 'Dataset Metadata (Representation & File)',
-                    'path': meta_json_file_path,
-                    'checksum': meta_json_checksum,
-                }
-            )
-            print(
-                f'Successfully crawled {utils.count_key(meta_dict)} metadata of dataset representation and file in total.\n'
-            )
+            if not permission:  # Delay the merging of permission metadata until the permission metadata is crawled
 
-            # Export the updated pid_dict_dd (Which contains deaccessioned/draft datasets) to a JSON file
-            pid_dict_json, pid_dict_checksum = utils.orjson_export(pid_dict_dd, 'pid_dict_dd')
-            json_file_checksum_dict.append(
-                {
-                    'type': 'Hierarchical Information of Datasets(deaccessioned/draft)',
-                    'path': pid_dict_json,
-                    'checksum': pid_dict_checksum,
-                }
-            )
+                # Export the metadata to a JSON file
+                meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict')
+                json_file_checksum_dict.append(
+                    {
+                        'type': 'Dataset Metadata (Representation & File)',
+                        'path': meta_json_file_path,
+                        'checksum': meta_json_checksum,
+                    }
+                )
+                print(
+                    f'Successfully crawled {utils.count_key(meta_dict)} metadata of dataset representation and file in total.\n'
+                )
+
+                # Export the updated pid_dict_dd (Which contains deaccessioned/draft datasets) to a JSON file
+                pid_dict_json, pid_dict_checksum = utils.orjson_export(pid_dict_dd, 'pid_dict_dd')
+                json_file_checksum_dict.append(
+                    {
+                        'type': 'Hierarchical Information of Datasets(deaccessioned/draft)',
+                        'path': pid_dict_json,
+                        'checksum': pid_dict_checksum,
+                    }
+                )
 
             if failed:
                 failed_metadata_uris_json, failed_metadata_uris_checksum = utils.orjson_export(
@@ -190,34 +201,29 @@ async def main_crawler():
                     }
                 )
 
-            if spreadsheet:
-                # Export the metadata to a CSV file
-                csv_file_path, csv_file_checksum = Spreadsheet(config).make_csv(meta_dict)
-                json_file_checksum_dict.append(
-                    {'type': 'Dataset Metadata CSV', 'path': csv_file_path, 'checksum': csv_file_checksum}
-                )
-
         if permission:
             print('\nCrawling Permission metadata of datasets...\n')
-            ds_id_list = [item['ds_id'] for item in pid_dict.values()]
+            ds_id_list = [item['id'] for item in ds_dict.values()]
             permission_dict, failed_permission_uris = await (metadata_crawler.get_datasets_permissions(ds_id_list))
-            permission_json_file_path, permission_json_checksum = utils.orjson_export(
-                permission_dict, 'permission_dict'
-            )
-            json_file_checksum_dict.append(
-                {
-                    'type': 'Dataset Metadata (Permission)',
-                    'path': permission_json_file_path,
-                    'checksum': permission_json_checksum,
-                }
-            )
-            print(
-                f'Successfully crawled permission metadata for {utils.count_key(permission_dict)} datasets in total.\n'
-            )
 
-            # Export the pid_dict to a JSON file, if dfdfds_metadata is not provided
-            if not dvdfds_matadata:
-                pid_dict_json, pid_dict_checksum = utils.orjson_export(pid_dict, 'pid_dict')
+            if not dvdfds_matadata:  # Delay the merging of permission metadata until the representation/file metadata is crawled
+                # Export the permission metadata to a JSON file
+                permission_json_file_path, permission_json_checksum = utils.orjson_export(
+                    permission_dict, 'permission_dict'
+                )
+                json_file_checksum_dict.append(
+                    {
+                        'type': 'Dataset Metadata (Permission)',
+                        'path': permission_json_file_path,
+                        'checksum': permission_json_checksum,
+                    }
+                )
+                print(
+                    f'Successfully crawled permission metadata for {utils.count_key(permission_dict)} datasets in total.\n'
+                )
+
+                # Export the pid_dict to a JSON file, if dfdfds_metadata is not provided
+                pid_dict_json, pid_dict_checksum = utils.orjson_export(ds_dict, 'pid_dict')
                 json_file_checksum_dict.append(
                     {
                         'type': 'Hierarchical Information of Datasets',
@@ -226,12 +232,35 @@ async def main_crawler():
                     }
                 )
 
+        # Combine the metadata and permission metadata
+        if dvdfds_matadata and permission:
+            if isinstance(permission_dict, dict):
+                meta_dict = func.add_perrmission_info(meta_dict, permission_dict)[0]
+
+            # Export the metadata to a JSON file
+
+            meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict_with_permission')
+            json_file_checksum_dict.append(
+                {
+                    'type': 'Dataset Metadata (Representation, File & Permission)',
+                    'path': meta_json_file_path,
+                    'checksum': meta_json_checksum,
+                }
+            )
+
         if empty_dv:
             empty_dv_json, empty_dv_checksum = utils.orjson_export(empty_dv_dict, 'empty_dv')
             json_file_checksum_dict.append(
                 {'type': 'Empty Dataverses', 'path': empty_dv_json, 'checksum': empty_dv_checksum}
             )
 
+        if spreadsheet:
+            # Export the metadata to a CSV file
+            csv_file_path, csv_file_checksum = Spreadsheet(config).make_csv_file(meta_dict)
+            json_file_checksum_dict.append(
+                {'type': 'Dataset Metadata CSV', 'path': csv_file_path, 'checksum': csv_file_checksum}
+            )
+
         return meta_dict, json_file_checksum_dict, failed_metadata_uris, collections_tree_flatten
 
     meta_dict, json_file_checksum_dict, failed_metadata_uris, collections_tree_flatten = asyncio.run(main_crawler())
@@ -255,5 +284,6 @@ async def main_crawler():
                      failed_metadata_uris,
                      json_file_checksum_dict)
 
+
 if __name__ == '__main__':
     app()
diff --git a/dvmeta/spreadsheet.py b/dvmeta/spreadsheet.py
index 826a8e1..f8fc0c1 100644
--- a/dvmeta/spreadsheet.py
+++ b/dvmeta/spreadsheet.py
@@ -33,8 +33,11 @@ def __init__(self, config: dict) -> None:
             versionMinorNumber: data.versionMinorNumber,
             CM_Subtitle: data.metadataBlocks.citation.fields[?typeName==`subtitle`].value|[]
             CM_AltTitle: data.metadataBlocks.citation.fields[?typeName==`alternativeTitle`].value|[]
+            CM_AltURL: data.metadataBlocks.citation.fields[?typeName==`alternativeURL`].value|[]
+            CM_Agency: data.metadataBlocks.citation.fields[?typeName==`otherId`].value|[*]|[].otherIdAgency.value
+            CM_ID: data.metadataBlocks.citation.fields[?typeName==`otherId`].value|[*]|[].otherIdValue.value
             CM_Author: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorName.value
-            CM_ContactAff: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorAffiliation.value
+            CM_AuthorAff: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorAffiliation.value
             CM_AuthorID: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorIdentifier.value
             CM_AuthorIDType: data.metadataBlocks.citation.fields[?typeName==`author`].value|[*]|[].authorIdentifierScheme.value
             CM_ContactName: data.metadataBlocks.citation.fields[?typeName==`datasetContact`].value|[*]|[].datasetContactName.value
@@ -74,8 +77,9 @@ def __init__(self, config: dict) -> None:
             CM_Depositor: data.metadataBlocks.citation.fields[?typeName==`depositor`].value|[]
             CM_DepositDate: data.metadataBlocks.citation.fields[?typeName==`dateOfDeposit`].value|[]
             CM_TimeStart: data.metadataBlocks.citation.fields[?typeName==`timePeriodCovered`].value|[].timePeriodCoveredStart.value
-            CM_TimeEnd: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionStart.value
-            CM_CollectionStart: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionEnd.value
+            CM_TimeEnd: data.metadataBlocks.citation.fields[?typeName==`timePeriodCovered`].value|[].timePeriodCoveredEnd.value
+            CM_CollectionStart: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionStart.value
+            CM_CollectionEnd: data.metadataBlocks.citation.fields[?typeName==`dateOfCollection`].value|[].dateOfCollectionEnd.value
             CM_DataType: data.metadataBlocks.citation.fields[?typeName==`kindOfData`].value|[]
             CM_SeriesName: data.metadataBlocks.citation.fields[?typeName==`series`].value|[].seriesName.value
             CM_SeriesInfo: data.metadataBlocks.citation.fields[?typeName==`series`].value|[].seriesInformation.value
@@ -177,7 +181,7 @@ def _get_dataset_subjects(dictionary: dict) -> dict:
         return result_dict
 
     @staticmethod
-    def _get_metadata_blocks(dictionary: dict) -> dict:
+    def _get_metadata_blocks_usage(dictionary: dict) -> dict:
         metadata_block_dict = {
             'Meta_Geo': 'geospatial',
             'Meta_SSHM': 'socialscience',
@@ -194,6 +198,33 @@ def _get_metadata_blocks(dictionary: dict) -> dict:
 
         return result_dict
 
+    @staticmethod
+    def _get_datafile_meta_usage(dictionary: dict) -> dict:
+        # Get the use of data file directoryLabel (DF_Hierarchy),
+        # tags (categories; DF_Tags) & description (DF_Description).
+        if dictionary.get('data', {}).get('files'):
+            file_nested_list = jmespath.search('data.files[*]', dictionary)
+
+            # Get the count of directoryLabel if it is not None
+            directorylabel_count = len([file for file in file_nested_list if file.get('directoryLabel') is not None])
+
+            # Get the count of categories if it is not None
+            categories_count = len([
+                file for file in file_nested_list
+                if file.get('dataFile', {}).get('categories') is not None
+            ])
+
+            # Get the count of description if it is not None
+            description_count = len([
+                file for file in file_nested_list
+                if file.get('dataFile', {}).get('description') is not None
+            ])
+
+            return {'DF_Hierarchy': directorylabel_count,
+                    'DF_Tags': categories_count,
+                    'DF_Description': description_count}
+        return {'DF_Hierarchy': 0, 'DF_Tags': 0, 'DF_Description': 0}
+
     def _get_spreadsheet_order(self) -> list[str]:
         with Path(self.spreadsheet_order_file_path).open(encoding='utf-8') as file:
             return file.read().splitlines()
@@ -212,18 +243,23 @@ def _reoder_df_columns(self, df: pd.DataFrame) -> pd.DataFrame:
 
         return df[final_column_order]
 
-    def make_csv(self, meta_dict: dict) -> tuple[str, str]:
-        """Create a CSV file from the metadata dictionary.
+    def _make_cm_meta_holding_list(self, meta_dict: dict) -> list[dict]:
+        """Create a nested list of metadata dictionaries.
 
         Args:
-            meta_dict (dict): Metadata dictionary
+            meta_dict (dict): Dataset metadata dictionary.
 
         Returns:
-            tuple[str, str]: Path to the CSV file, Checksum of the CSV file
+            list[dict]: List of metadata dictionaries (nested)
         """
         holding_list = []
         for key, _value in meta_dict.items():
             jmespath_dict: dict = jmespath.search(f'{self.search_string}', meta_dict[key])
+
+            # Get the use of data file hierarchy (folders, DF_Hierarchy),
+            # file tags (categories; DF_Tags) &  description (DF_Description)
+            jmespath_dict.update(self._get_datafile_meta_usage(meta_dict[key]))
+
             # Get the file size and count
             jmespath_dict['FileSize'] = self._get_data_files_size(meta_dict[key])
             jmespath_dict['FileSize_normalized'] = convert_size(jmespath_dict['FileSize'])
@@ -245,7 +281,7 @@ def make_csv(self, meta_dict: dict) -> tuple[str, str]:
             jmespath_dict.update(self._get_dataset_subjects(jmespath_dict))
 
             # Get the metadata blocks and add them to the result dictionary
-            jmespath_dict.update(self._get_metadata_blocks(jmespath_dict))
+            jmespath_dict.update(self._get_metadata_blocks_usage(jmespath_dict))
 
             # Drop the versionNumber and versionMinorNumber keys from the dictionary
             jmespath_dict.pop('versionNumber', None)
@@ -256,9 +292,24 @@ def make_csv(self, meta_dict: dict) -> tuple[str, str]:
 
             holding_list.append(jmespath_dict)
 
-        df = pd.DataFrame(holding_list)
+        return holding_list
+
+    def make_csv_file(self, meta_dict: dict) -> tuple[str, str]:
+        """Create a CSV file from the nested metadata list.
+
+        Args:
+            meta_dict (dict): Dataset metadata dictionary
+
+        Returns:
+            tuple[str, str]: Path to the CSV file, Checksum of the CSV file
+        """
+        # Create a DataFrame from the nested list
+
+        cm_meta_holding_list = self._make_cm_meta_holding_list(meta_dict)
+
+        df = pd.DataFrame(cm_meta_holding_list)
 
-        # Reoder the columns in the DataFrame
+        # Reoder the columns in the DataFrame according to to the preset order (/res/spreadsheet_order.csv)
         df = self._reoder_df_columns(df)
 
         # Create the CSV file
diff --git a/res/spreadsheet_order.csv b/res/spreadsheet_order.csv
index a547b43..b5cff61 100644
--- a/res/spreadsheet_order.csv
+++ b/res/spreadsheet_order.csv
@@ -11,14 +11,11 @@ Version
 FileCount
 FileSize
 FileSize_normalized
-FileFormat
 DataverseSubCollection
 License
 RestrictedFiles
 RequestAcces
 TermsAccess
-TermsUse
-Citationrequirements
 DF_Hierarchy
 DF_Tags
 DF_Description
@@ -113,6 +110,4 @@ DS_Contrib
 DS_ContribPlus
 DS_Curator
 DS_FileDown
-DS_Member
-DS_UOFT_Admin
-DS_Groups
+DS_Member
\ No newline at end of file

From e09856538fb042604e8a8f8703765ca50d1acca5 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Sat, 25 Jan 2025 03:13:28 -0500
Subject: [PATCH 12/28] 1. Integrated permission metadata into spreadsheet

---
 dvmeta/func.py            |  4 ++++
 dvmeta/spreadsheet.py     | 33 ++++++++++++++++++++++++++++++++-
 res/spreadsheet_order.csv |  5 +++--
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/dvmeta/func.py b/dvmeta/func.py
index 759a437..2db69cc 100644
--- a/dvmeta/func.py
+++ b/dvmeta/func.py
@@ -192,6 +192,10 @@ def add_perrmission_info(meta_dict: dict, permission_dict: Optional[dict] = None
                     # Remove from permission_dict_copy
                     permission_dict_copy.pop(pid_key)
                     break
+        for _meta_key, meta_value in meta_dict.items():
+            if isinstance(meta_value, dict) and meta_value.get('data', {}).get('datasetId'):
+                if 'permission_info' not in meta_value:
+                    meta_value['permission_info'] = {'status': 'NA', 'data': []}
 
         return meta_dict, permission_dict_copy
 
diff --git a/dvmeta/spreadsheet.py b/dvmeta/spreadsheet.py
index f8fc0c1..1467136 100644
--- a/dvmeta/spreadsheet.py
+++ b/dvmeta/spreadsheet.py
@@ -19,6 +19,7 @@ def __init__(self, config: dict) -> None:
         self.config = config
         self.search_string = """{
             DatasetTitle: data.metadataBlocks.citation.fields[?typeName==`title`].value|[]
+            DS_Path: path_info.path
             DatasetPersistentId: data.datasetPersistentId,
             ID: data.id,
             DatasetId: data.datasetId,
@@ -92,7 +93,14 @@ def __init__(self, config: dict) -> None:
             CM_OriginSources: data.metadataBlocks.citation.fields[?typeName==`originOfSources`].value|[]
             CM_CharSources: data.metadataBlocks.citation.fields[?typeName==`characteristicOfSources`].value|[]
             CM_DocSources: data.metadataBlocks.citation.fields[?typeName==`accessToSources`].value|[]
-            DataverseSubCollection: path_info.path
+            DS_Permission: permission_info.data
+            DS_Collab: length(permission_info.data)
+            DS_Admin: length(permission_info.data[?_roleAlias=='admin'])
+            DS_Contrib: length(permission_info.data[?_roleAlias=='contributor'])
+            DS_ContribPlus: length(permission_info.data[?_roleAlias=='fullContributor'])
+            DS_Curator: length(permission_info.data[?_roleAlias=='curator'])
+            DS_FileDown: length(permission_info.data[?_roleAlias=='fileDownloader'])
+            DS_Member: length(permission_info.data[?_roleAlias=='member'])
             }"""  # noqa: E501
         self.csv_file_dir = DirManager().csv_files_dir()
         self.spreadsheet_order_file_path = Path(DirManager().res_dir) / 'spreadsheet_order.csv'
@@ -225,6 +233,24 @@ def _get_datafile_meta_usage(dictionary: dict) -> dict:
                     'DF_Description': description_count}
         return {'DF_Hierarchy': 0, 'DF_Tags': 0, 'DF_Description': 0}
 
+    @staticmethod
+    def _parse_permission_values(dictionary: dict) -> dict | None:
+        """Parse the NA value to permission_info.data, if the value is not available."""
+        if dictionary.get('permission_info', {}).get('status', {}) == 'NA':
+            # If the status is NA, set the DS_Permission, DS_Collab, DS_Admin, DS_Contrib
+            # DS_ContribPlus, DS_Curator, DS_FileDown, DS_Member to NA
+            return {
+                'DS_Permission': False,
+                'DS_Collab': 'NA',
+                'DS_Admin': 'NA',
+                'DS_Contrib': 'NA',
+                'DS_ContribPlus': 'NA',
+                'DS_Curator': 'NA',
+                'DS_FileDown': 'NA',
+                'DS_Member': 'NA'
+            }
+        return {'DS_Permission': True}
+
     def _get_spreadsheet_order(self) -> list[str]:
         with Path(self.spreadsheet_order_file_path).open(encoding='utf-8') as file:
             return file.read().splitlines()
@@ -287,6 +313,9 @@ def _make_cm_meta_holding_list(self, meta_dict: dict) -> list[dict]:
             jmespath_dict.pop('versionNumber', None)
             jmespath_dict.pop('versionMinorNumber', None)
 
+            # Update the permission info if the status is NA
+            jmespath_dict.update(self._parse_permission_values(meta_dict[key]) or {})
+
             # Last step: Turn the lists in the dictionary into strings
             jmespath_dict = {key: list_to_string(value) if isinstance(value, list) else value for key, value in jmespath_dict.items()}
 
@@ -307,6 +336,8 @@ def make_csv_file(self, meta_dict: dict) -> tuple[str, str]:
 
         cm_meta_holding_list = self._make_cm_meta_holding_list(meta_dict)
 
+        
+
         df = pd.DataFrame(cm_meta_holding_list)
 
         # Reoder the columns in the DataFrame according to to the preset order (/res/spreadsheet_order.csv)
diff --git a/res/spreadsheet_order.csv b/res/spreadsheet_order.csv
index b5cff61..b784c9a 100644
--- a/res/spreadsheet_order.csv
+++ b/res/spreadsheet_order.csv
@@ -1,7 +1,8 @@
 DatasetTitle
 DatasetURL
-DatasetPersistentId
+DS_Path
 ID
+DatasetPersistentId
 DatasetId
 VersionState
 LastUpdateTime
@@ -11,7 +12,6 @@ Version
 FileCount
 FileSize
 FileSize_normalized
-DataverseSubCollection
 License
 RestrictedFiles
 RequestAcces
@@ -101,6 +101,7 @@ Meta_Astro
 Meta_LS
 Meta_Journal
 Meta_CWF
+DS_Permission
 DS_Collab
 DS_Collab_In
 DS_Collab_Ex

From 00d74b310626428a45e006b6d5ce6768c4506e5a Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Sat, 25 Jan 2025 03:18:13 -0500
Subject: [PATCH 13/28] 1. Added TermsOfUse field.

---
 dvmeta/spreadsheet.py     | 3 ++-
 res/spreadsheet_order.csv | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/dvmeta/spreadsheet.py b/dvmeta/spreadsheet.py
index 1467136..39ee74d 100644
--- a/dvmeta/spreadsheet.py
+++ b/dvmeta/spreadsheet.py
@@ -28,8 +28,9 @@ def __init__(self, config: dict) -> None:
             ReleaseTime: data.releaseTime,
             CreateTime: data.createTime,
             License: data.license.name
-            TermsAccess: data.termsOfAccess
+            TermsOfUse: data.termsOfUse
             RequestAcces: data.fileAccessRequest
+            TermsAccess: data.termsOfAccess
             versionNumber: data.versionNumber,
             versionMinorNumber: data.versionMinorNumber,
             CM_Subtitle: data.metadataBlocks.citation.fields[?typeName==`subtitle`].value|[]
diff --git a/res/spreadsheet_order.csv b/res/spreadsheet_order.csv
index b784c9a..4e90cbf 100644
--- a/res/spreadsheet_order.csv
+++ b/res/spreadsheet_order.csv
@@ -14,6 +14,7 @@ FileSize
 FileSize_normalized
 License
 RestrictedFiles
+TermsOfUse
 RequestAcces
 TermsAccess
 DF_Hierarchy

From cf968f578e82d9c62c70bc82d1c4bc86e3d2d7ab Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Mon, 27 Jan 2025 10:56:06 -0500
Subject: [PATCH 14/28] 1. Foramting changes

---
 dvmeta/spreadsheet.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dvmeta/spreadsheet.py b/dvmeta/spreadsheet.py
index 39ee74d..d954e44 100644
--- a/dvmeta/spreadsheet.py
+++ b/dvmeta/spreadsheet.py
@@ -337,8 +337,6 @@ def make_csv_file(self, meta_dict: dict) -> tuple[str, str]:
 
         cm_meta_holding_list = self._make_cm_meta_holding_list(meta_dict)
 
-        
-
         df = pd.DataFrame(cm_meta_holding_list)
 
         # Reoder the columns in the DataFrame according to to the preset order (/res/spreadsheet_order.csv)

From 1d4591db1371dfceaa79d5a101d142ec01f7a514 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Mon, 27 Jan 2025 11:13:20 -0500
Subject: [PATCH 15/28] 1. Unify the use of `datasetId` acorss reading native
 API & search API

---
 dvmeta/func.py | 8 ++++----
 dvmeta/main.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/dvmeta/func.py b/dvmeta/func.py
index 2db69cc..ba489e5 100644
--- a/dvmeta/func.py
+++ b/dvmeta/func.py
@@ -27,13 +27,13 @@ def get_pids(read_dict: dict, config: dict) -> tuple:
     write_dict = {}
     for key, _item in read_dict.items():
         result = jmespath.search(
-            "data[?type=='dataset'].{id: id, protocol: protocol, authority: authority, identifier: identifier, path: path, path_ids: path_ids}",  # noqa: E501
+            "data[?type=='dataset'].{datasetId: id, protocol: protocol, authority: authority, identifier: identifier, path: path, path_ids: path_ids}",  # noqa: E501
             read_dict[key],  # noqa: PLR1733
         )
         if result:
             for item in result:
                 pid = f"{item['protocol']}:{item['authority']}/{item['identifier']}"
-                id = item['id']
+                id = item['datasetId']
                 path = '/' + item['path'] if item['path'] else None
                 path_ids = item['path_ids']
                 dict_to_append = {
@@ -41,7 +41,7 @@ def get_pids(read_dict: dict, config: dict) -> tuple:
                         'collection_alias': config['COLLECTION_ALIAS'],
                         'collection_id': config['COLLECTION_ID'],
                         'pid': pid,
-                        'id': id,
+                        'datasetId': id,
                         'path': path,
                         'path_ids': path_ids,
                     }
@@ -234,7 +234,7 @@ def replace_key_with_dataset_id(dictionary: dict) -> dict:
     """
     new_dict = {}
     for old_key, value in dictionary.items():
-        # Check if the 'data' key exists and has 'id'
+        # Check if the 'data' key exists and has 'datasetId'
         if isinstance(value, dict) and value.get('data', {}).get('datasetId'):
             new_key = value.get('data', {}).get('datasetId')  # Get the value of 'datasetId'
             new_dict[new_key] = value  # Use it as the new key
diff --git a/dvmeta/main.py b/dvmeta/main.py
index 4f307a5..a24d4e0 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -203,7 +203,7 @@ async def main_crawler():
 
         if permission:
             print('\nCrawling Permission metadata of datasets...\n')
-            ds_id_list = [item['id'] for item in ds_dict.values()]
+            ds_id_list = [item['datasetId'] for item in ds_dict.values()]
             permission_dict, failed_permission_uris = await (metadata_crawler.get_datasets_permissions(ds_id_list))
 
             if not dvdfds_matadata:  # Delay the merging of permission metadata until the representation/file metadata is crawled

From 6abf1a1933d1156f61a8448b37c33f1638663c78 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Mon, 27 Jan 2025 11:25:19 -0500
Subject: [PATCH 16/28] 1. Changed path_ids to pathIds. 2. Changed
 collection_alias to CollectionAlias 3. Changed pid to datasetPersistentId

---
 dvmeta/func.py  | 18 +++++++++---------
 dvmeta/main.py  |  6 +++---
 dvmeta/utils.py |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/dvmeta/func.py b/dvmeta/func.py
index ba489e5..52347bd 100644
--- a/dvmeta/func.py
+++ b/dvmeta/func.py
@@ -27,7 +27,7 @@ def get_pids(read_dict: dict, config: dict) -> tuple:
     write_dict = {}
     for key, _item in read_dict.items():
         result = jmespath.search(
-            "data[?type=='dataset'].{datasetId: id, protocol: protocol, authority: authority, identifier: identifier, path: path, path_ids: path_ids}",  # noqa: E501
+            "data[?type=='dataset'].{datasetId: id, protocol: protocol, authority: authority, identifier: identifier, path: path, pathIds: pathIds}",  # noqa: E501
             read_dict[key],  # noqa: PLR1733
         )
         if result:
@@ -35,15 +35,15 @@ def get_pids(read_dict: dict, config: dict) -> tuple:
                 pid = f"{item['protocol']}:{item['authority']}/{item['identifier']}"
                 id = item['datasetId']
                 path = '/' + item['path'] if item['path'] else None
-                path_ids = item['path_ids']
+                path_ids = item['pathIds']
                 dict_to_append = {
                     str(id): {  # pid needs to be converted to string if it's not already
-                        'collection_alias': config['COLLECTION_ALIAS'],
-                        'collection_id': config['COLLECTION_ID'],
-                        'pid': pid,
+                        'CollectionAlias': config['COLLECTION_ALIAS'],
+                        'CollectionID': config['COLLECTION_ID'],
+                        'datasetPersistentId': pid,
                         'datasetId': id,
                         'path': path,
-                        'path_ids': path_ids,
+                        'pathIds': path_ids,
                     }
                 }
                 write_dict.update(dict_to_append)
@@ -140,7 +140,7 @@ def count_files_size(read_dict: dict) -> tuple:
 
 
 def add_path_to_dataverse_contents(des_dict: dict, ref_dict: dict) -> dict:
-    """Add path_ids and path to dataverse_contents from collections_tree_flatten.
+    """Add pathIds and path to dataverse_contents from collections_tree_flatten.
 
     Args:
         des_dict (dict): Dictionary containing the metadata of datasets
@@ -154,10 +154,10 @@ def add_path_to_dataverse_contents(des_dict: dict, ref_dict: dict) -> dict:
             if value['data']:
                 for item in value['data']:
                     item.update({'path': ref_dict[key]['path']})
-                    item.update({'path_ids': ref_dict[key]['path_ids']})
+                    item.update({'pathIds': ref_dict[key]['pathIds']})
             else:
                 value['data'].append({'path': ref_dict[key]['path']})
-                value['data'].append({'path_ids': ref_dict[key]['path_ids']})
+                value['data'].append({'pathIds': ref_dict[key]['pathIds']})
     return des_dict
 
 
diff --git a/dvmeta/main.py b/dvmeta/main.py
index a24d4e0..20499c0 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -125,7 +125,7 @@ def main(
 
     async def main_crawler():
         # Initialize empty dict and list to store metadata
-        ds_dict = {'pid': []}
+        ds_dict = {'datasetPersistentId': []}
         failed_metadata_ids = []
         json_file_checksum_dict = []
         permission_dict = {}
@@ -143,7 +143,7 @@ async def main_crawler():
         print('Getting basic metadata of datasets in across dataverses (incl. all children)...\n')
         dataverse_contents, failed_dataverse_contents = await metadata_crawler.get_dataverse_contents(collection_id_list)
 
-        # Add path_ids and path to dataverse_contents from collections_tree_flatten
+        # Add pathIds and path to dataverse_contents from collections_tree_flatten
         dataverse_contents = func.add_path_to_dataverse_contents(dataverse_contents, collections_tree_flatten)
 
         # Get URIs in collections_tree_flatten and append them to ds_dict, and return empty dataverse to empty_dv
@@ -155,7 +155,7 @@ async def main_crawler():
         if dvdfds_matadata:
             # Export dataverse_contents
             print('Crawling Representation and File metadata of datasets...\n')
-            pid_list = [item['pid'] for item in ds_dict.values()]
+            pid_list = [item['datasetPersistentId'] for item in ds_dict.values()]
             meta_dict, failed_metadata_uris = await metadata_crawler.get_datasets_meta(pid_list)
 
             # Replace the key with the Data #TEMPORARY FIX
diff --git a/dvmeta/utils.py b/dvmeta/utils.py
index e2c1ab1..732cef3 100644
--- a/dvmeta/utils.py
+++ b/dvmeta/utils.py
@@ -169,7 +169,7 @@ def loop_item(dictionary_data, path_name='', path_ids=[]):
 
             current_path_ids = path_ids + [item['id']]
 
-            new_item['path_ids'] = current_path_ids
+            new_item['pathIds'] = current_path_ids
             new_item['path'] = f"{path_name}/{item['name']}" if path_name else item['name']
             new_item.pop('children', None)
             write_dict[item['id']] = new_item

From bde6f3e4ccac4b07b8024165509c695ad4b957bc Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Mon, 27 Jan 2025 23:01:54 -0500
Subject: [PATCH 17/28] 1. Updated CLI description 2. Updated minor syntax

---
 dvmeta/main.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/dvmeta/main.py b/dvmeta/main.py
index 20499c0..89389f1 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -64,15 +64,13 @@ def main(
         False, '--spreadsheet', '-s', help='Output a CSV file of the metadata of datasets'
     ),
 ):
-    """A command line utility that crawls a dataverse repository, extracting metadata for dataverses, datasets, and permissions, and then stores it in JSON format."""
-    # Load the environment variables #! This need to be modified as it nullifies the auth token provided by the user
+    """A Python CLI tool for extracting and exporting metadata from Dataverse repositories to JSON and CSV formats."""
+    # Load the environment variables
     config: dict = func.load_env()
 
     config['COLLECTION_ALIAS'] = collection_alias
     config['VERSION'] = version
-    config['API_KEY'] = (
-        auth if auth else config['API_KEY']
-    ) # Reassign the API_KEY and replace it specified in the .env file
+    config['API_KEY'] = (auth if auth else config['API_KEY'])  # Reassign the API_KEY and replace it specified in the .env file, if provided in the CLI interface
 
     # Check if -s flag is provided without -d flag
     func.validate_spreadsheet(spreadsheet, dvdfds_matadata)

From 4d0ade56b2d052034aaacb9f968bcf3df4a58603 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Mon, 27 Jan 2025 23:49:58 -0500
Subject: [PATCH 18/28] 1. Hotfix for failed_metadata_dict parsing, handling
 error request.

---
 dvmeta/httpxclient.py     | 6 +++---
 dvmeta/metadatacrawler.py | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/dvmeta/httpxclient.py b/dvmeta/httpxclient.py
index c353684..8812173 100644
--- a/dvmeta/httpxclient.py
+++ b/dvmeta/httpxclient.py
@@ -63,7 +63,7 @@ async def __aexit__(self,
         await self.async_client.aclose()
         self.sync_client.close()
 
-    async def _async_semaphore_client(self, url: str) -> httpx.Response | None:
+    async def _async_semaphore_client(self, url: str) -> httpx.Response | list[str]:
         """Asynchronous HTTP client with semaphore.
 
         Args:
@@ -79,9 +79,9 @@ async def _async_semaphore_client(self, url: str) -> httpx.Response | None:
                     # print(f'HTTP request Error for {url}: {response.status_code}')
                     return response
                 return response
-            except (httpx.HTTPStatusError, httpx.RequestError) as exc:
+            except (httpx.HTTPStatusError, httpx.RequestError):
                 # print(f'HTTP request Error for {url}: {exc}')
-                return None
+                return [url, 'Error']
 
     def sync_get(self, url: str) -> httpx.Response | None:
         """Synchronous GET request.
diff --git a/dvmeta/metadatacrawler.py b/dvmeta/metadatacrawler.py
index 481ca14..2d7dca5 100644
--- a/dvmeta/metadatacrawler.py
+++ b/dvmeta/metadatacrawler.py
@@ -100,8 +100,10 @@ async def get_datasets_meta(self, id_list: list) -> tuple[dict, dict]:
             if item and item.status_code == self.http_success_status and item.json():
                 dataset_persistent_idd = item.json().get('data').get('datasetPersistentId')
                 dataset_meta[dataset_persistent_idd] = item.json()
-            else:
+            elif item and item.status_code != self.http_success_status:
                 failed_dataset_meta[str(item.url)] = item.status_code
+            elif isinstance(item, list):
+                failed_dataset_meta[item[0]] = item[1]
 
         return dataset_meta, failed_dataset_meta
 

From dc6629da0b5bd839f93e2f71f42ae293b438e0f6 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 00:28:34 -0500
Subject: [PATCH 19/28] 1. Added dummy value to the meta_dict even if
 permission flag is not enabled. This is to prevent error when making csv,
 without specify -p flag (jmespath length error) 2. Revamped the above logic.

---
 dvmeta/func.py | 20 +++++++----------
 dvmeta/main.py | 61 +++++++++++++++++++-------------------------------
 2 files changed, 31 insertions(+), 50 deletions(-)

diff --git a/dvmeta/func.py b/dvmeta/func.py
index 52347bd..7cc72c5 100644
--- a/dvmeta/func.py
+++ b/dvmeta/func.py
@@ -178,28 +178,24 @@ def add_path_info(meta_dict: dict, ds_dict: dict) -> tuple:
     return meta_dict, ds_dict_copy
 
 
-def add_perrmission_info(meta_dict: dict, permission_dict: Optional[dict] = None) -> tuple:
+def add_permission_info(meta_dict: dict, permission_dict: Optional[dict] = None) -> dict:
     """Add permission_info to the metadata dictionary, handling nested structures."""
     if isinstance(permission_dict, dict):
-        permission_dict_copy = permission_dict.copy()
-        for pid_key, pid_value in list(permission_dict_copy.items()):
+        for pid_key, pid_value in list(permission_dict.items()):
             pid_key_str = str(pid_key)
             # Traverse the meta_dict to find matching datasetId
             for _meta_key, meta_value in meta_dict.items():
                 if isinstance(meta_value, dict) and meta_value.get('data', {}).get('datasetId') == int(pid_key_str):
                     # Add path_info to the appropriate nested dictionary
                     meta_value['permission_info'] = pid_value
-                    # Remove from permission_dict_copy
-                    permission_dict_copy.pop(pid_key)
+                    # Remove from permission_dict
+                    permission_dict.pop(pid_key)
                     break
-        for _meta_key, meta_value in meta_dict.items():
-            if isinstance(meta_value, dict) and meta_value.get('data', {}).get('datasetId'):
-                if 'permission_info' not in meta_value:
-                    meta_value['permission_info'] = {'status': 'NA', 'data': []}
-
-        return meta_dict, permission_dict_copy
+    for _meta_key, meta_value in meta_dict.items():
+        if 'permission_info' not in meta_value:
+            meta_value['permission_info'] = {'status': 'NA', 'data': []}
 
-    return meta_dict, None
+    return meta_dict
 
 
 def load_env() -> dict:
diff --git a/dvmeta/main.py b/dvmeta/main.py
index 89389f1..d1bc468 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -162,30 +162,15 @@ async def main_crawler():
             # Add the path_info to the metadata
             meta_dict, pid_dict_dd = func.add_path_info(meta_dict, ds_dict)
 
-            if not permission:  # Delay the merging of permission metadata until the permission metadata is crawled
-
-                # Export the metadata to a JSON file
-                meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict')
-                json_file_checksum_dict.append(
-                    {
-                        'type': 'Dataset Metadata (Representation & File)',
-                        'path': meta_json_file_path,
-                        'checksum': meta_json_checksum,
-                    }
-                )
-                print(
-                    f'Successfully crawled {utils.count_key(meta_dict)} metadata of dataset representation and file in total.\n'
-                )
-
-                # Export the updated pid_dict_dd (Which contains deaccessioned/draft datasets) to a JSON file
-                pid_dict_json, pid_dict_checksum = utils.orjson_export(pid_dict_dd, 'pid_dict_dd')
-                json_file_checksum_dict.append(
-                    {
-                        'type': 'Hierarchical Information of Datasets(deaccessioned/draft)',
-                        'path': pid_dict_json,
-                        'checksum': pid_dict_checksum,
-                    }
-                )
+            # Export the updated pid_dict_dd (Which contains deaccessioned/draft datasets) to a JSON file
+            pid_dict_json, pid_dict_checksum = utils.orjson_export(pid_dict_dd, 'pid_dict_dd')
+            json_file_checksum_dict.append(
+                {
+                    'type': 'Hierarchical Information of Datasets(deaccessioned/draft)',
+                    'path': pid_dict_json,
+                    'checksum': pid_dict_checksum,
+                }
+            )
 
             if failed:
                 failed_metadata_uris_json, failed_metadata_uris_checksum = utils.orjson_export(
@@ -230,21 +215,21 @@ async def main_crawler():
                     }
                 )
 
-        # Combine the metadata and permission metadata
-        if dvdfds_matadata and permission:
-            if isinstance(permission_dict, dict):
-                meta_dict = func.add_perrmission_info(meta_dict, permission_dict)[0]
-
-            # Export the metadata to a JSON file
+        # Combine the metadata and permission metadata, if both are provided
+        # Else write dummy permission metadata to the metadata
+        meta_dict = func.add_permission_info(meta_dict, permission_dict if isinstance(permission_dict, dict) and permission_dict else None)
+
+        # Export the metadata to a JSON file
+        meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict_with_permission')
+        json_file_checksum_dict.append(
+            {
+                'type': 'Dataset Metadata (Representation, File & Permission)',
+                'path': meta_json_file_path,
+                'checksum': meta_json_checksum,
+            }
+        )
 
-            meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict_with_permission')
-            json_file_checksum_dict.append(
-                {
-                    'type': 'Dataset Metadata (Representation, File & Permission)',
-                    'path': meta_json_file_path,
-                    'checksum': meta_json_checksum,
-                }
-            )
+        print(f'Successfully crawled {utils.count_key(meta_dict)} metadata of dataset representation and file in total.\n')
 
         if empty_dv:
             empty_dv_json, empty_dv_checksum = utils.orjson_export(empty_dv_dict, 'empty_dv')

From 94470462d20c50a9a15f0bc8603588bb0a5dcd61 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 00:33:05 -0500
Subject: [PATCH 20/28] 1. Fixed prompt output for ds_meta

---
 dvmeta/main.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/dvmeta/main.py b/dvmeta/main.py
index d1bc468..f12cbf3 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -219,17 +219,17 @@ async def main_crawler():
         # Else write dummy permission metadata to the metadata
         meta_dict = func.add_permission_info(meta_dict, permission_dict if isinstance(permission_dict, dict) and permission_dict else None)
 
-        # Export the metadata to a JSON file
-        meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict_with_permission')
-        json_file_checksum_dict.append(
-            {
-                'type': 'Dataset Metadata (Representation, File & Permission)',
-                'path': meta_json_file_path,
-                'checksum': meta_json_checksum,
-            }
-        )
-
-        print(f'Successfully crawled {utils.count_key(meta_dict)} metadata of dataset representation and file in total.\n')
+        if meta_dict:
+            # Export the metadata to a JSON file
+            meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict_with_permission')
+            json_file_checksum_dict.append(
+                {
+                    'type': 'Dataset Metadata (Representation, File & Permission)',
+                    'path': meta_json_file_path,
+                    'checksum': meta_json_checksum,
+                }
+            )
+            print(f'Successfully crawled {utils.count_key(meta_dict)} metadata of dataset representation and file in total.\n')
 
         if empty_dv:
             empty_dv_json, empty_dv_checksum = utils.orjson_export(empty_dv_dict, 'empty_dv')

From ce65c09f33ebe5801248227c518277b120d75318 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 00:35:00 -0500
Subject: [PATCH 21/28] 1. Updated name of ds_meta.

---
 dvmeta/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dvmeta/main.py b/dvmeta/main.py
index f12cbf3..2df929d 100644
--- a/dvmeta/main.py
+++ b/dvmeta/main.py
@@ -221,7 +221,7 @@ async def main_crawler():
 
         if meta_dict:
             # Export the metadata to a JSON file
-            meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'meta_dict_with_permission')
+            meta_json_file_path, meta_json_checksum = utils.orjson_export(meta_dict, 'ds_metadata')
             json_file_checksum_dict.append(
                 {
                     'type': 'Dataset Metadata (Representation, File & Permission)',

From c30bcced7b4d3ac25708a3a64cd81daee9875869 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 00:36:13 -0500
Subject: [PATCH 22/28] 1. Updated README for exported_files section & clearer
 instructions

---
 README.md | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 7ddda7b..988b39d 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
    cd ./dataverse-metadata-crawler
    ```
 
-3. Create an environment file (.env)
+3. Create an environment file (`.env`)
    ```sh
    touch .env  # For Unix/MacOS
    nano .env   # or vim .env, or your preferred editor
@@ -38,12 +38,17 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
    notepad .env
    ```
 
-4. Configure the environment (.env) file using the text editor of your choice.
+4. Configure the environment (`.env`) file using the text editor of your choice.
    ```sh
    # .env file
-   BASE_URL = "TARGET_REPO_URL"  # e.g., "https://demo.borealisdata.ca/"
+   BASE_URL = "TARGET_REPO_URL"  # Base URL of the repository; e.g., "https://demo.borealisdata.ca/"
    API_KEY = "YOUR_API_KEY"      # Found in your Dataverse account settings. Can also be specified in the CLI interface using the -a flag.
    ```
+   Your `.env` file should look like this:
+   ```sh
+   BASE_URL = "https://demo.borealisdata.ca/"
+   API_KEY = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXX"
+   ```
 
 5. Set up virtual environment (recommended)
    ```sh
@@ -115,10 +120,10 @@ exported_files/
 ├── json_files/
 │   └── ds_metadata_yyyymmdd-HHMMSS.json # With -d flag enabled
 │   └── empty_dv_yyyymmdd-HHMMSS.json # With -e flag enabled
-│   └── failed_metadata_uris_yyyymmdd-HHMMSS.json 
-│   └── permission_dict_yyyymmdd-HHMMSS.json # With -p flag enabled
-│   └── pid_dict_yyyymmdd-HHMMSS.json # Only exported if -p flag is used without -d flag
-│   └── pid_dict_dd_yyyymmdd-HHMMSS.json # Hierarchical information of deaccessioned/draft datasets
+│   └── failed_metadata_uris_yyyymmdd-HHMMSS.json  # With -f flag enabled
+│   └── permission_dict_yyyymmdd-HHMMSS.json # With only -p flag enabled
+│   └── pid_dict_yyyymmdd-HHMMSS.json # With only -p flag enabled
+│   └── pid_dict_dd_yyyymmdd-HHMMSS.json # Hierarchical information of deaccessioned/draft datasets.
 ├── csv_files/
 │   └── ds_metadata_yyyymmdd-HHMMSS.csv # with -s flag enabled
 └── logs_files/

From 9421e33bc3415263626413daa1e91f5c2fee3efb Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 16:07:31 -0500
Subject: [PATCH 23/28] 1. Updated README: adding Disclaimer section.

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 988b39d..f93d90d 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,10 @@ exported_files/
     └── log_yyyymmdd-HHMMSS.txt # Exported by default, without specifying --no-log
 ```
 
+## ⚠️Disclaimer
+> [!WARNING]
+> To retrieve data about unpublished datasets or information that is not available publicly (e.g. collaborators/permissions), you will need to have necessary access rights. **Please note that any publication or use of non-publicly available data may require review by a Research Ethics Board**.
+
 ## ✅Tests
 No tests have been written yet. Contributions welcome!
 

From f2078352ce4f592144743025b7638420e0304e41 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Mon, 20 Jan 2025 18:28:16 -0500
Subject: [PATCH 24/28] 1. Added GitHub Actions workflows for Jekyll deployment
 and Poetry dependency export 2. Updated CITATION.cff & README

---
 .github/workflows/jekyll-gh-pages.yml         | 51 +++++++++++++
 .../workflows/poetry-export_dependencies.yml  | 73 +++++++++++++++++++
 CITATION.cff                                  |  4 +-
 README.md                                     | 16 ++--
 _config.yml                                   | 19 +++++
 5 files changed, 154 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/jekyll-gh-pages.yml
 create mode 100644 .github/workflows/poetry-export_dependencies.yml
 create mode 100644 _config.yml

diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
new file mode 100644
index 0000000..e31d81c
--- /dev/null
+++ b/.github/workflows/jekyll-gh-pages.yml
@@ -0,0 +1,51 @@
+# Sample workflow for building and deploying a Jekyll site to GitHub Pages
+name: Deploy Jekyll with GitHub Pages dependencies preinstalled
+
+on:
+  # Runs on pushes targeting the default branch
+  push:
+    branches: ["main"]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Build job
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Build with Jekyll
+        uses: actions/jekyll-build-pages@v1
+        with:
+          source: ./
+          destination: ./_site
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+
+  # Deployment job
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml
new file mode 100644
index 0000000..36306c8
--- /dev/null
+++ b/.github/workflows/poetry-export_dependencies.yml
@@ -0,0 +1,73 @@
+name: Poetry export requirements.txt
+on:
+  push:
+    branches:
+      - '*'  # Trigger on any push to any branch
+    paths:
+      - 'requirements.txt'
+      - 'pyproject.toml'
+      - 'poetry.lock'
+jobs:
+  poetry-export_dependencies:
+    strategy:
+      fail-fast: false
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Install poetry
+        uses: abatilo/actions-poetry@v4
+        with:
+          poetry-version: 'latest'
+      - name: Install the poetry-plugin-export
+        run: poetry self add poetry-plugin-export
+      - name: Update poetry lock file
+        run: poetry lock
+      - name: Export the project dependencies to requirements.txt
+        run: |
+          poetry export -f requirements.txt --output requirements.txt
+      - name: Get branch name
+        shell: bash
+        run: echo "BRANCH_NAME=${GITHUB_REF#refs/heads/}" >> $GITHUB_ENV
+      - name: Check for changes
+        id: check_changes
+        run: |
+          if [[ -n "$(git status --porcelain requirements.txt poetry.lock)" ]]; then
+            echo "changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "changes=false" >> $GITHUB_OUTPUT
+          fi
+      - name: Configure Git
+        run: |
+          git config --local user.email "github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+      - name: Commit and push if changed
+        if: steps.check_changes.outputs.changes == 'true'
+        run: |
+          # Pull with rebase to get latest changes
+          git pull --rebase origin ${{ env.BRANCH_NAME }}
+          
+          # Stage and commit changes
+          git add requirements.txt poetry.lock
+          git commit -m "chore: update requirements.txt and poetry.lock [skip ci]"
+          
+          # Push with retry logic
+          max_attempts=3
+          attempt=1
+          while [ $attempt -le $max_attempts ]; do
+            if git push origin ${{ env.BRANCH_NAME }}; then
+              break
+            else
+              if [ $attempt -eq $max_attempts ]; then
+                echo "Failed to push after $max_attempts attempts"
+                exit 1
+              fi
+              echo "Push failed, attempt $attempt of $max_attempts. Pulling and retrying..."
+              git pull --rebase origin ${{ env.BRANCH_NAME }}
+              attempt=$((attempt + 1))
+            fi
+          done
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/CITATION.cff b/CITATION.cff
index 4700932..97e309c 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -4,7 +4,7 @@ authors:
 - family-names: "Lui"
   given-names: "Lok Hei"
   orcid: "https://orcid.org/0000-0001-5077-1530"
-title: "Dataverse metadata Crawler"
+title: "Dataverse Metadata Crawler"
 version: 0.1.0
 date-released: 2025-01-16
-url: "https://github.com/kenlhlui/dataverse-metadata-crawler-p"
\ No newline at end of file
+url: "https://github.com/scholarsportal/dataverse-metadata-crawler"
diff --git a/README.md b/README.md
index cf4b45d..6189d31 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
 
 2. Change to the project directory
    ```sh
-   cd ~/dataverse-metadata-export-p
+   cd ./dataverse-metadata-crawler
    ```
 
 3. Create an environment file (.env)
@@ -65,6 +65,7 @@ A Python CLI tool for extracting and exporting metadata from [Dataverse](https:/
 python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALIAS -v VERSION
 ```
 **Required arguments:**
+
 | **Option**         | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**     |
 |--------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
 | --collection_alias | -c        | TEXT     | Name of the collection to crawl. <br/> **[required]**                                                                                                                                                                                                                                      | None            |
@@ -72,6 +73,7 @@ python3 dvmeta/main.py [-a AUTH] [-l] [-d] [-p] [-f] [-e] [-s] -c COLLECTION_ALI
 
 
 **Optional arguments:**
+
 | **Option**           | **Short** | **Type** | **Description**                                                                                                                                                                                                                                                                            | **Default**               |
 |----------------------|-----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
 | --auth               | -a        | TEXT     | Authentication token to access the Dataverse repository. <br/> If                                                                                                                                                                                                          | None                      |
@@ -96,6 +98,7 @@ python3 dvmeta/main.py -c demo -v 1.0 -d -s -p -a xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx
 ```
 
 ## 📂Output Structure
+
 | File                                      | Description                                                                                                                             |
 |-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------|
 | ds_metadata_yyyymmdd-HHMMSS.json          | Datasets' their data files' metadata in JSON format.                                                                                    |
@@ -145,21 +148,20 @@ If you use this software in your work, please cite it using the following metada
 
 APA:
 ```
-Lui, L. H. (2025). Dataverse metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/kenlhlui/dataverse-metadata-crawler-p
+Lui, L. H. (2025). Dataverse Metadata Crawler (Version 0.1.0) [Computer software]. https://github.com/scholarsportal/dataverse-metadata-crawler
 ```
 
 BibTeX:
 ```
-@software{Lui_Dataverse_metadata_Crawler_2025,
+@software{Lui_Dataverse_Metadata_Crawler_2025,
 author = {Lui, Lok Hei},
 month = jan,
-title = {{Dataverse metadata Crawler}},
-url = {https://github.com/kenlhlui/dataverse-metadata-crawler-p},
+title = {{Dataverse Metadata Crawler}},
+url = {https://github.com/scholarsportal/dataverse-metadata-crawler},
 version = {0.1.0},
 year = {2025}
 }
 ```
 
 ## ✍️Authors
-Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - kenlh.lui@utoronto.ca
-
+Ken Lui - Data Curation Specialist, Map and Data Library, University of Toronto - [kenlh.lui@utoronto.ca](mailto:kenlh.lui@utoronto.ca)
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 0000000..2217569
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,19 @@
+# Site settings
+title: Dataverse Metadata Crawler
+description: A Python CLI tool for extracting and exporting metadata from Dataverse repositories to JSON and CSV formats.
+baseurl: "/dataverse-metadata-crawler" # Base URL (leave blank for root deployment)
+url: "https://scholarsportal.github.io" # Your GitHub Pages URL
+
+remote_theme: pages-themes/primer
+plugins:
+- jekyll-remote-theme # add this line to the plugins list if you already have one
+- jekyll-seo-tag    # Required by primer theme
+
+# Markdown settings
+markdown: kramdown
+kramdown:
+  input: GFM # Enables GitHub Flavored Markdown (GFM)
+
+# Build settings
+source: ./
+destination: ./_site

From cbd2fab18402c6e18dad2029bb8ee092752214ae Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 16:38:47 -0500
Subject: [PATCH 25/28] Update poetry-export_dependencies.yml

---
 .github/workflows/poetry-export_dependencies.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml
index 36306c8..d84f5b6 100644
--- a/.github/workflows/poetry-export_dependencies.yml
+++ b/.github/workflows/poetry-export_dependencies.yml
@@ -1,12 +1,15 @@
 name: Poetry export requirements.txt
 on:
-  push:
+  pull_request:
     branches:
-      - '*'  # Trigger on any push to any branch
+      - 'main'  # Trigger only on pull requests made to the main branch
     paths:
       - 'requirements.txt'
       - 'pyproject.toml'
       - 'poetry.lock'
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
 jobs:
   poetry-export_dependencies:
     strategy:

From 45b55c0d468f18033c8650e7e460217cf2d53abe Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 16:43:40 -0500
Subject: [PATCH 26/28] Update poetry-export_dependencies.yml

---
 .github/workflows/poetry-export_dependencies.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml
index 28a6383..da392ed 100644
--- a/.github/workflows/poetry-export_dependencies.yml
+++ b/.github/workflows/poetry-export_dependencies.yml
@@ -1,7 +1,6 @@
 name: Poetry export requirements.txt
 on:
-
-  pull_request:
+  pull_request_target:
     branches:
       - 'main'  # Trigger only on pull requests made to the main branch
 

From d95ac7bb875987c18e312e8becd17e2c9a4e8b3e Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 16:53:08 -0500
Subject: [PATCH 27/28] Update poetry-export_dependencies.yml

---
 .github/workflows/poetry-export_dependencies.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/poetry-export_dependencies.yml b/.github/workflows/poetry-export_dependencies.yml
index da392ed..11bdb7e 100644
--- a/.github/workflows/poetry-export_dependencies.yml
+++ b/.github/workflows/poetry-export_dependencies.yml
@@ -1,6 +1,6 @@
 name: Poetry export requirements.txt
 on:
-  pull_request_target:
+  push:
     branches:
       - 'main'  # Trigger only on pull requests made to the main branch
 

From 5a0bf38338444409026371ad03902c3f06345a48 Mon Sep 17 00:00:00 2001
From: Ken Lui <116421546+kenlhlui@users.noreply.github.com>
Date: Tue, 28 Jan 2025 16:55:33 -0500
Subject: [PATCH 28/28] Update pyproject.toml

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3b2d991..5f3d38a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "dataverse-metadata-crawler"
-version = "0.1.0"
-description = "A Python CLI tool for extracting and exporting metadata from Dataverse repositories to JSON and CSV formats."
+version = "0.1.1"
+description = "A Python CLI tool for bulk extracting and exporting metadata from Dataverse repositories' collections to JSON and CSV formats."
 authors = ["Ken Lui <kenlh.lui@utoronto.ca>"]
 license = "MIT"
 readme = "README.md"