Skip to content

Add GH workflow for running analysis #4

Add GH workflow for running analysis

Add GH workflow for running analysis #4

Workflow file for this run

name: Analyze
env:
TEST_PATTERN: '*justice.gov/*'
on:
pull_request: {}
workflow_dispatch:
inputs:
threshold:
description: 'Threshold'
required: false
type: string
pattern:
description: 'Pattern'
required: false
type: string
default: ''
from:
description: 'From Time'
required: false
type: string
default: ''
to:
description: 'To Time'
required: false
type: string
default: ''
jobs:
analyze:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: pip
- name: Install System Dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
gcc g++ pkg-config libxml2-dev libxslt-dev libz-dev
- name: Install Python Dependencies
run: pip install -r requirements.txt
- name: Download NLTK Corpora
run: |
python -m nltk.downloader stopwords
- uses: actions/setup-node@v4
with:
node-version: '22'
cache: 'npm'
cache-dependency-path: readability-server/package-lock.json
- name: Install readability-server dependencies
run: |
cd readability-server
npm ci
- name: Run readability-server
run: |
cd readability-server
npm start &
- name: Analyze!
run: |
# FIXME: set up readability running in a background process.
# We probably need to bring the code over from
# web-monitoring-changed-terms-analysis
python generate_task_sheets.py \
--output out \
--after '${{ inputs.from || '240' }}' \
--before '${{ inputs.to || '0' }}' \
--threshold '${{ inputs.threshold || '0.25' }}' \
--pattern '${{ inputs.pattern || env.TEST_PATTERN }}'
- name: Upload Results
uses: actions/upload-artifact@v4
with:
name: output
path: out
if-no-files-found: error
# TODO: what's appropriate retention here?
# retention-days: 1