Add GH workflow for running analysis #4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Analyze | |
env: | |
TEST_PATTERN: '*justice.gov/*' | |
on: | |
pull_request: {} | |
workflow_dispatch: | |
inputs: | |
threshold: | |
description: 'Threshold' | |
required: false | |
type: string | |
pattern: | |
description: 'Pattern' | |
required: false | |
type: string | |
default: '' | |
from: | |
description: 'From Time' | |
required: false | |
type: string | |
default: '' | |
to: | |
description: 'To Time' | |
required: false | |
type: string | |
default: '' | |
jobs: | |
analyze: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10' | |
cache: pip | |
- name: Install System Dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y --no-install-recommends \ | |
gcc g++ pkg-config libxml2-dev libxslt-dev libz-dev | |
- name: Install Python Dependencies | |
run: pip install -r requirements.txt | |
- name: Download NLTK Corpora | |
run: | | |
python -m nltk.downloader stopwords | |
- uses: actions/setup-node@v4 | |
with: | |
node-version: '22' | |
cache: 'npm' | |
cache-dependency-path: readability-server/package-lock.json | |
- name: Install readability-server dependencies | |
run: | | |
cd readability-server | |
npm ci | |
- name: Run readability-server | |
run: | | |
cd readability-server | |
npm start & | |
- name: Analyze! | |
run: | | |
# FIXME: set up readability running in a background process. | |
# We probably need to bring the code over from | |
# web-monitoring-changed-terms-analysis | |
python generate_task_sheets.py \ | |
--output out \ | |
--after '${{ inputs.from || '240' }}' \ | |
--before '${{ inputs.to || '0' }}' \ | |
--threshold '${{ inputs.threshold || '0.25' }}' \ | |
--pattern '${{ inputs.pattern || env.TEST_PATTERN }}' | |
- name: Upload Results | |
uses: actions/upload-artifact@v4 | |
with: | |
name: output | |
path: out | |
if-no-files-found: error | |
# TODO: what's appropriate retention here? | |
# retention-days: 1 |