-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathrun.py
200 lines (173 loc) · 6.24 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from __future__ import annotations
import argparse
import dotenv
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING
from jinja2 import Environment, FileSystemLoader
from mastodon import Mastodon
from api import fetch_posts_and_boosts
from scorers import get_scorers
from thresholds import get_threshold_from_name, get_thresholds
if TYPE_CHECKING:
from scorers import Scorer
from thresholds import Threshold
def render_digest(context: dict, output_dir: Path, theme: str = "default") -> None:
environment = Environment(
loader=FileSystemLoader([f"templates/themes/{theme}", "templates/common"])
)
template = environment.get_template("index.html.jinja")
output_html = template.render(context)
output_file_path = output_dir / "index.html"
output_file_path.write_text(output_html)
def list_themes() -> list[str]:
# Return themes, named by directory in `/templates/themes` and which have an `index.html.jinja` present.
return list(
filter(
lambda dir_name: not dir_name.startswith(".")
and os.path.exists(f"templates/themes/{dir_name}/index.html.jinja"),
os.listdir("templates/themes"),
)
)
def format_base_url(mastodon_base_url: str) -> str:
return mastodon_base_url.strip().rstrip("/")
def run(
hours: int,
scorer: Scorer,
threshold: Threshold,
mastodon_token: str,
mastodon_base_url: str,
timeline: str,
output_dir: Path,
theme: str,
) -> None:
print(f"Building digest from the past {hours} hours...")
mst = Mastodon(
user_agent="mastodon_digest",
access_token=mastodon_token,
api_base_url=mastodon_base_url,
)
# 1. Fetch all the posts and boosts from our home timeline that we haven't interacted with
posts, boosts = fetch_posts_and_boosts(hours, mst, timeline)
# 2. Score them, and return those that meet our threshold
threshold_posts = threshold.posts_meeting_criteria(posts, scorer)
threshold_boosts = threshold.posts_meeting_criteria(boosts, scorer)
# 3. Sort posts and boosts by score, descending
threshold_posts = sorted(
threshold_posts, key=lambda post: post.get_score(scorer), reverse=True
)
threshold_boosts = sorted(
threshold_boosts, key=lambda post: post.get_score(scorer), reverse=True
)
# 4. Build the digest
if len(threshold_posts) == 0 and len(threshold_boosts) == 0:
sys.exit(
f"No posts or boosts were found for the provided digest arguments. Exiting."
)
else:
render_digest(
context={
"hours": hours,
"posts": threshold_posts,
"boosts": threshold_boosts,
"mastodon_base_url": mastodon_base_url,
"rendered_at": datetime.utcnow().strftime("%B %d, %Y at %H:%M:%S UTC"),
"timeline_name": timeline,
"threshold": threshold.get_name(),
"scorer": scorer.get_name(),
},
output_dir=output_dir,
theme=theme,
)
if __name__ == "__main__":
scorers = get_scorers()
thresholds = get_thresholds()
arg_parser = argparse.ArgumentParser(
prog="mastodon_digest",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
arg_parser.add_argument(
"-f", # for "feed" since t-for-timeline is taken
default="home",
dest="timeline",
help="The timeline to summarize: Expects 'home', 'local' or 'federated', or 'list:id', 'hashtag:tag'",
required=False,
)
arg_parser.add_argument(
"-n",
choices=range(1, 25),
default=12,
dest="hours",
help="The number of hours to include in the Mastodon Digest",
type=int,
)
arg_parser.add_argument(
"-s",
choices=list(scorers.keys()),
default="SimpleWeighted",
dest="scorer",
help="""Which post scoring criteria to use.
Simple scorers take a geometric mean of boosts and favs.
Extended scorers include reply counts in the geometric mean.
Weighted scorers multiply the score by an inverse square root
of the author's followers, to reduce the influence of large accounts.
""",
)
arg_parser.add_argument(
"-t",
choices=list(thresholds.keys()),
default="normal",
dest="threshold",
help="""Which post threshold criteria to use.
lax = 90th percentile,
normal = 95th percentile,
strict = 98th percentile
""",
)
arg_parser.add_argument(
"-o",
default="./render/",
dest="output_dir",
help="Output directory for the rendered digest",
required=False,
)
arg_parser.add_argument(
"--theme",
choices=list_themes(),
default="default",
dest="theme",
help="Named template theme with which to render the digest",
required=False,
)
args = arg_parser.parse_args()
# Attempt to validate the output directory
output_dir = Path(args.output_dir)
if not output_dir.exists() or not output_dir.is_dir():
sys.exit(f"Output directory not found: {args.output_dir}")
# Loosely validate the timeline argument, so that if a completely unexpected string is entered,
# we explicitly reset to 'Home', which makes the rendered output cleaner.
timeline = args.timeline.strip().lower()
validTimelineTypes = ["home", "local", "federated", "hashtag", "list"]
timelineType, *_ = timeline.split(":", 1)
if not timelineType in validTimelineTypes:
timeline = "home"
# load and validate env
dotenv.load_dotenv(override=False)
mastodon_token = os.getenv("MASTODON_TOKEN")
mastodon_base_url = os.getenv("MASTODON_BASE_URL")
if not mastodon_token:
sys.exit("Missing environment variable: MASTODON_TOKEN")
if not mastodon_base_url:
sys.exit("Missing environment variable: MASTODON_BASE_URL")
run(
args.hours,
scorers[args.scorer](),
get_threshold_from_name(args.threshold),
mastodon_token,
format_base_url(mastodon_base_url),
timeline,
output_dir,
args.theme,
)