|
| 1 | +""" |
| 2 | +Build constitutional insights data from hca.db for the Insights dashboard tab. |
| 3 | +""" |
| 4 | +import json |
| 5 | +import sqlite3 |
| 6 | +from pathlib import Path |
| 7 | + |
| 8 | +DB_FILE = "data/processed/hca.db" |
| 9 | +OUTPUT = "website/public/insights.json" |
| 10 | + |
| 11 | + |
| 12 | +def strip_prefix(issue): |
| 13 | + """Strip 'Public Law—Federal constitutional law—' prefix.""" |
| 14 | + for prefix in [ |
| 15 | + "Public Law—Federal constitutional law—", |
| 16 | + "Public Law—Federal constitutional law— ", |
| 17 | + ]: |
| 18 | + if issue.startswith(prefix): |
| 19 | + return issue[len(prefix):].strip() |
| 20 | + return issue.strip() |
| 21 | + |
| 22 | + |
| 23 | +def get_insights_data(): |
| 24 | + conn = sqlite3.connect(DB_FILE) |
| 25 | + conn.row_factory = sqlite3.Row |
| 26 | + c = conn.cursor() |
| 27 | + |
| 28 | + # Unnamed cases (no hca_citation) are unmatched sub-matters of multi-matter decisions |
| 29 | + # that are already represented by the named lead case. Exclude them from all stats |
| 30 | + # so we count distinct decisions rather than inflating counts with duplicate sub-matters. |
| 31 | + CITED = "hca_citation IS NOT NULL AND hca_citation != ''" |
| 32 | + |
| 33 | + # 1. Vote split distribution on constitutional cases |
| 34 | + c.execute(f""" |
| 35 | + SELECT |
| 36 | + maj_votes || '-' || min_votes AS split, |
| 37 | + COUNT(*) AS count |
| 38 | + FROM cases |
| 39 | + WHERE primary_issue_sub_area = 'Constitutional law' |
| 40 | + AND {CITED} |
| 41 | + GROUP BY maj_votes, min_votes |
| 42 | + ORDER BY (maj_votes + min_votes) DESC, maj_votes DESC |
| 43 | + """) |
| 44 | + vote_splits = [{'split': r['split'], 'count': r['count']} for r in c.fetchall()] |
| 45 | + |
| 46 | + # 2. Topic breakdown table |
| 47 | + c.execute(f""" |
| 48 | + SELECT |
| 49 | + primary_issue AS topic, |
| 50 | + COUNT(*) AS cases, |
| 51 | + ROUND(100.0 * SUM(CASE WHEN min_votes = 0 THEN 1 ELSE 0 END)/COUNT(*), 1) AS unanimous_pct, |
| 52 | + ROUND(100.0 * SUM(CASE WHEN min_votes >= 2 THEN 1 ELSE 0 END)/COUNT(*), 1) AS contested_pct, |
| 53 | + ROUND(100.0 * SUM(CASE WHEN decision_direction='liberal' THEN 1 ELSE 0 END)/COUNT(*), 1) AS liberal_pct, |
| 54 | + ROUND(100.0 * SUM(CASE WHEN decision_direction='conservative' THEN 1 ELSE 0 END)/COUNT(*), 1) AS conservative_pct, |
| 55 | + ROUND(100.0 * SUM(CASE WHEN party_winning LIKE 'appealing%' THEN 1 ELSE 0 END)/COUNT(*), 1) AS appellant_win_pct, |
| 56 | + ROUND(AVG(CAST(maj_votes AS REAL) / (maj_votes + min_votes)), 2) AS avg_majority_share |
| 57 | + FROM cases |
| 58 | + WHERE primary_issue_sub_area = 'Constitutional law' |
| 59 | + AND {CITED} |
| 60 | + GROUP BY primary_issue |
| 61 | + HAVING cases >= 3 |
| 62 | + ORDER BY cases DESC |
| 63 | + """) |
| 64 | + topic_breakdown = [] |
| 65 | + for r in c.fetchall(): |
| 66 | + topic_breakdown.append({ |
| 67 | + 'topic': strip_prefix(r['topic']), |
| 68 | + 'cases': r['cases'], |
| 69 | + 'unanimous_pct': r['unanimous_pct'], |
| 70 | + 'contested_pct': r['contested_pct'], |
| 71 | + 'liberal_pct': r['liberal_pct'], |
| 72 | + 'conservative_pct': r['conservative_pct'], |
| 73 | + 'appellant_win_pct': r['appellant_win_pct'], |
| 74 | + 'avg_majority_share': r['avg_majority_share'], |
| 75 | + }) |
| 76 | + |
| 77 | + # 3. Decision direction by Chief Justice era (constitutional cases only) |
| 78 | + c.execute(f""" |
| 79 | + SELECT |
| 80 | + chief_argument AS chief, |
| 81 | + COUNT(*) AS cases, |
| 82 | + ROUND(100.0 * SUM(CASE WHEN decision_direction='liberal' THEN 1 ELSE 0 END)/COUNT(*), 1) AS liberal_pct, |
| 83 | + ROUND(100.0 * SUM(CASE WHEN decision_direction='conservative' THEN 1 ELSE 0 END)/COUNT(*), 1) AS conservative_pct, |
| 84 | + ROUND(100.0 * SUM(CASE WHEN decision_direction='unspecifiable' THEN 1 ELSE 0 END)/COUNT(*), 1) AS unspecifiable_pct, |
| 85 | + ROUND(100.0 * SUM(CASE WHEN party_winning LIKE 'appealing%' THEN 1 ELSE 0 END)/COUNT(*), 1) AS appellant_win_pct, |
| 86 | + MIN(year_decision) AS from_year, |
| 87 | + MAX(year_decision) AS to_year |
| 88 | + FROM cases |
| 89 | + WHERE primary_issue_sub_area = 'Constitutional law' |
| 90 | + AND chief_argument IS NOT NULL |
| 91 | + AND {CITED} |
| 92 | + GROUP BY chief_argument |
| 93 | + ORDER BY MIN(year_decision) |
| 94 | + """) |
| 95 | + direction_by_era = [dict(r) for r in c.fetchall()] |
| 96 | + |
| 97 | + # 4. Implied freedom of political communication — year-by-year |
| 98 | + c.execute(f""" |
| 99 | + SELECT |
| 100 | + year_decision AS year, |
| 101 | + COUNT(*) AS cases, |
| 102 | + SUM(CASE WHEN decision_direction='liberal' THEN 1 ELSE 0 END) AS liberal, |
| 103 | + SUM(CASE WHEN decision_direction='conservative' THEN 1 ELSE 0 END) AS conservative, |
| 104 | + SUM(CASE WHEN decision_direction='unspecifiable' THEN 1 ELSE 0 END) AS unspecifiable, |
| 105 | + GROUP_CONCAT( |
| 106 | + CASE WHEN case_name IS NOT NULL AND case_name != '' |
| 107 | + THEN case_name |
| 108 | + ELSE hca_citation |
| 109 | + END, |
| 110 | + ' | ' |
| 111 | + ) AS case_names, |
| 112 | + GROUP_CONCAT(hca_citation, ' | ') AS citations, |
| 113 | + GROUP_CONCAT(maj_votes || '-' || min_votes, ' | ') AS vote_splits, |
| 114 | + GROUP_CONCAT(decision_direction, ' | ') AS directions |
| 115 | + FROM cases |
| 116 | + WHERE primary_issue LIKE '%political communication%' |
| 117 | + AND {CITED} |
| 118 | + GROUP BY year_decision |
| 119 | + ORDER BY year_decision |
| 120 | + """) |
| 121 | + implied_freedom = [] |
| 122 | + for r in c.fetchall(): |
| 123 | + implied_freedom.append({ |
| 124 | + 'year': r['year'], |
| 125 | + 'cases': r['cases'], |
| 126 | + 'liberal': r['liberal'], |
| 127 | + 'conservative': r['conservative'], |
| 128 | + 'unspecifiable': r['unspecifiable'], |
| 129 | + 'case_names': r['case_names'].split(' | ') if r['case_names'] else [], |
| 130 | + 'citations': r['citations'].split(' | ') if r['citations'] else [], |
| 131 | + 'vote_splits': r['vote_splits'].split(' | ') if r['vote_splits'] else [], |
| 132 | + 'directions': r['directions'].split(' | ') if r['directions'] else [], |
| 133 | + }) |
| 134 | + |
| 135 | + # 5. Corporations power case list |
| 136 | + c.execute(f""" |
| 137 | + SELECT |
| 138 | + year_decision AS year, |
| 139 | + CASE WHEN case_name IS NOT NULL AND case_name != '' THEN case_name ELSE hca_citation END AS name, |
| 140 | + hca_citation AS citation, |
| 141 | + maj_votes, |
| 142 | + min_votes, |
| 143 | + decision_direction AS direction, |
| 144 | + party_winning |
| 145 | + FROM cases |
| 146 | + WHERE primary_issue LIKE '%Corporations power%' |
| 147 | + AND {CITED} |
| 148 | + ORDER BY year_decision |
| 149 | + """) |
| 150 | + corporations_cases = [dict(r) for r in c.fetchall()] |
| 151 | + |
| 152 | + # Summary stats for callout cards |
| 153 | + c.execute(f"SELECT COUNT(*) AS n FROM cases WHERE primary_issue_sub_area='Constitutional law' AND {CITED}") |
| 154 | + total_const = c.fetchone()['n'] |
| 155 | + |
| 156 | + c.execute(f"SELECT COUNT(*) AS n FROM cases WHERE primary_issue_sub_area='Constitutional law' AND min_votes=0 AND {CITED}") |
| 157 | + unanimous_const = c.fetchone()['n'] |
| 158 | + |
| 159 | + c.execute(f"SELECT COUNT(*) AS n FROM cases WHERE primary_issue LIKE '%political communication%' AND decision_direction='conservative' AND {CITED}") |
| 160 | + freedom_conservative = c.fetchone()['n'] |
| 161 | + |
| 162 | + c.execute(f"SELECT COUNT(*) AS n FROM cases WHERE primary_issue LIKE '%political communication%' AND {CITED}") |
| 163 | + freedom_total = c.fetchone()['n'] |
| 164 | + |
| 165 | + c.execute(f""" |
| 166 | + SELECT ROUND(100.0*SUM(CASE WHEN min_votes>=2 THEN 1 ELSE 0 END)/COUNT(*),1) AS pct |
| 167 | + FROM cases WHERE primary_issue LIKE '%Corporations power%' AND {CITED} |
| 168 | + """) |
| 169 | + corps_contested = c.fetchone()['pct'] |
| 170 | + |
| 171 | + conn.close() |
| 172 | + |
| 173 | + return { |
| 174 | + 'summary': { |
| 175 | + 'total_constitutional_cases': total_const, |
| 176 | + 'unanimous_pct': round(100.0 * unanimous_const / total_const, 1), |
| 177 | + 'freedom_total': freedom_total, |
| 178 | + 'freedom_conservative_pct': round(100.0 * freedom_conservative / freedom_total, 1), |
| 179 | + 'corporations_contested_pct': corps_contested, |
| 180 | + }, |
| 181 | + 'vote_splits': vote_splits, |
| 182 | + 'topic_breakdown': topic_breakdown, |
| 183 | + 'direction_by_era': direction_by_era, |
| 184 | + 'implied_freedom': implied_freedom, |
| 185 | + 'corporations_cases': corporations_cases, |
| 186 | + } |
| 187 | + |
| 188 | + |
| 189 | +def main(): |
| 190 | + print("Building insights data...") |
| 191 | + data = get_insights_data() |
| 192 | + |
| 193 | + Path(OUTPUT).parent.mkdir(parents=True, exist_ok=True) |
| 194 | + with open(OUTPUT, 'w') as f: |
| 195 | + json.dump(data, f, indent=2) |
| 196 | + |
| 197 | + print(f"✅ Wrote {OUTPUT}") |
| 198 | + s = data['summary'] |
| 199 | + print(f" {s['total_constitutional_cases']} constitutional cases") |
| 200 | + print(f" {s['unanimous_pct']}% unanimous") |
| 201 | + print(f" {len(data['topic_breakdown'])} topics") |
| 202 | + print(f" {len(data['implied_freedom'])} implied freedom data points") |
| 203 | + |
| 204 | + |
| 205 | +if __name__ == "__main__": |
| 206 | + main() |
0 commit comments