Skip to content

Commit 4f95bc1

Browse files
authored
Add GH workflow for running analysis (#18)
Proof-of-concept: can we schedule weekly analysis on GitHub actions? This adds a manually-runnable action to do the analysis and save results as an artifact. It doesn’t (yet?) upload the results to Google Drive.
1 parent b77a67d commit 4f95bc1

File tree

2 files changed

+91
-0
lines changed

2 files changed

+91
-0
lines changed

.editorconfig

+3
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ trim_trailing_whitespace = true
66

77
[*.js]
88
indent_size = 2
9+
10+
[*.yml]
11+
indent_size = 2

.github/workflows/analyze.yml

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
name: Analyze
2+
3+
env:
4+
TEST_PATTERN: '*justice.gov/*'
5+
6+
on:
7+
pull_request: {}
8+
workflow_dispatch:
9+
inputs:
10+
threshold:
11+
description: 'Threshold'
12+
required: false
13+
type: string
14+
pattern:
15+
description: 'Pattern'
16+
required: false
17+
type: string
18+
default: ''
19+
from:
20+
description: 'From Time'
21+
required: false
22+
type: string
23+
default: ''
24+
to:
25+
description: 'To Time'
26+
required: false
27+
type: string
28+
default: ''
29+
30+
jobs:
31+
analyze:
32+
runs-on: ubuntu-latest
33+
steps:
34+
- uses: actions/checkout@v4
35+
36+
- uses: actions/setup-python@v5
37+
with:
38+
python-version: '3.10'
39+
cache: pip
40+
41+
- name: Install System Dependencies
42+
run: |
43+
sudo apt-get update
44+
sudo apt-get install -y --no-install-recommends \
45+
gcc g++ pkg-config libxml2-dev libxslt-dev libz-dev
46+
47+
- name: Install Python Dependencies
48+
run: pip install -r requirements.txt
49+
50+
- name: Download NLTK Corpora
51+
run: |
52+
python -m nltk.downloader stopwords
53+
54+
- uses: actions/setup-node@v4
55+
with:
56+
node-version: '22'
57+
cache: 'npm'
58+
cache-dependency-path: readability-server/package-lock.json
59+
60+
- name: Install readability-server dependencies
61+
run: |
62+
cd readability-server
63+
npm ci
64+
65+
- name: Run readability-server
66+
run: |
67+
cd readability-server
68+
npm start &
69+
70+
- name: Analyze!
71+
env:
72+
WEB_MONITORING_DB_URL: '${{ secrets.WEB_MONITORING_DB_URL }}'
73+
run: |
74+
python generate_task_sheets.py \
75+
--output out \
76+
--after '${{ inputs.from || '240' }}' \
77+
--before '${{ inputs.to || '0' }}' \
78+
--threshold '${{ inputs.threshold || '0.25' }}' \
79+
--pattern '${{ inputs.pattern || env.TEST_PATTERN }}'
80+
81+
- name: Upload Results
82+
uses: actions/upload-artifact@v4
83+
with:
84+
name: output
85+
path: out
86+
if-no-files-found: error
87+
# TODO: what's appropriate retention here?
88+
# retention-days: 1

0 commit comments

Comments
 (0)