forked from woluxwolu/twint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmost_liked.py
132 lines (102 loc) · 4.31 KB
/
most_liked.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import sys, os, time
from datetime import datetime, date, timedelta
import twint.run
from twint import Config
from dotenv import load_dotenv
import pandas as pd
import csv
def get_usernames(source_filename: str):
return open(source_filename).read().splitlines()
def get_filtered_and_sorted_filename(filename: str):
return "{0}_filtered.{1}".format(*filename.rsplit('.', 1))
def get_twint_config(username, start_datetime, end_datetime):
config = Config()
config.User_id = username
config.Since = start_datetime
config.Until = end_datetime
config.Utc = False
config.Replies = False
config.Impressions = True
config.Pandas = True
config.X_csrf_token = os.environ.get("")
return config
def get_tweets(source_names, start_time, end_time, x_csrf_token, cookie):
df = None
for username in source_names:
print(f"[{username}]] Getting tweets...")
config = get_twint_config(username, start_time, end_time)
config.X_csrf_token = x_csrf_token
config.Cookie = cookie
try:
twint.run.Search(config)
search_data = twint.storage.panda.Tweets_df
if df is None:
df = search_data
else:
df = pd.concat([df, search_data], ignore_index=True)
except Exception as e:
print(f"Error: {e}")
time.sleep(3)
return df
def write_out_tweets(tweets_df, filename):
tweets_df.to_csv(filename, index=False, quoting=csv.QUOTE_NONNUMERIC)
filtered_filename = get_filtered_and_sorted_filename(filename)
tweets_df \
[['user_id', 'username', 'name', 'tweet', 'date', 'nlikes', 'nimpressions', 'nquotes', 'nreplies', 'nretweets', 'reply_to']] \
.query("nlikes > 9") \
.sort_values(by=['nlikes'], ascending=False) \
.to_csv(filtered_filename, index=False, quoting=csv.QUOTE_NONNUMERIC)
def get_last_week(date: date):
last_week_date = date - timedelta(days=7)
return get_week(last_week_date)
def get_week(date: date):
year, week_num, day_of_week = date.isocalendar()
return f'{year}-W{week_num}'
def get_datetime_iso(date: date, method):
d1 = method(date)
r1 = datetime.strptime(d1 + '-1', "%Y-W%W-%w")
return 'T'.join(str(r1.astimezone()).split())
def get_tweets_once(api_client, source_ids):
if (len(sys.argv) <= 3):
week_date = date.today()
else:
# get_datetime_iso() always get the datetime of Monday 00:00
# so add 7 days to set the start of 'last week' as this week's monday
week_date = datetime.strptime(sys.argv[3],"%Y-%m-%d") + timedelta(days=7)
tweets = get_tweets(api_client, source_ids)
write_out_tweets(tweets, sys.argv[2])
def get_tweets_for_week(source_names, output_filename, end_date, week_num, x_csrf_token, cookie):
if not os.path.exists("./output/most_liked"):
os.mkdir("./output/most_liked")
year, end_week_num, day_of_week = end_date.isocalendar()
start = datetime.strptime(f"{year}-W{week_num}" + '-1', "%Y-W%W-%w")
start_str = str(start)
end = datetime.strptime(f"{year}-W{week_num+1}" + '-1', "%Y-W%W-%w")
end_str = str(end)
print(f"------ GETTING RESULTS FOR WEEK {week_num} -------")
print(f"------ FROM {start_str} TO {end_str}")
tweets_df = get_tweets(source_names, start_str, end_str, x_csrf_token, cookie)
write_out_tweets(tweets_df, output_filename)
def main():
print("\n\n")
if (len(sys.argv) < 3):
print("NOT ENOUGH ARGUMENTS. PLEASE RUN THIS AS FOLLOWS:")
print("python most_liked.py SOURCE_FILE WEEK_NUM OUTPUT_FILE")
print("\n")
print("SOURCEFILE - text file with one username per line")
print("WEEK_NUM - number of week in current year")
print("OUTPUT_FILE - output filename")
return
load_dotenv()
x_csrf_token = os.environ.get("X_CSRF_TOKEN")
cookie = os.environ.get("COOKIE")
if x_csrf_token is None or cookie is None:
print("Please specify correct X_CSRF_TOKEN and COOKIE values in .env file.")
return
source_filename = sys.argv[1]
week_num = int(sys.argv[2])
output_filename = sys.argv[3]
source_names = get_usernames(source_filename)
get_tweets_for_week(source_names, output_filename, date.today(), week_num, x_csrf_token, cookie)
if __name__ == "__main__":
main()