forked from woluxwolu/twint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdzlcz.py
94 lines (77 loc) · 3.21 KB
/
dzlcz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import twint.run
import os
import requests
from dotenv import load_dotenv
from twint import Config
import pandas as pd
from dataclasses import dataclass, asdict
from datetime import datetime
import json
@dataclass
class TweetReport:
tweet_id: str
tweet_user_id: str
tweet_username: str
reported_by_user_id: str
reported_by_username: str
reported_by_status_id: str
reported_at: float
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
def create_tweet_report(row):
datetime_format = "%Y-%m-%d %H:%M:%S"
report = TweetReport(
tweet_id=row['in_reply_to_status_id'],
tweet_user_id=row['in_reply_to_user_id'],
tweet_username=row['in_reply_to_username'],
reported_by_user_id=row['user_id'],
reported_by_username=row['username'],
reported_by_status_id=row['id'],
#send as timestamp (units: ms)
reported_at=row['created_at']
)
return report
def strip_whitespace(x):
return x.strip()
forbidden_usernames = []
with open("dzlcz_forbidden_ids.txt", "r") as file:
forbidden_usernames = map(strip_whitespace, file.readlines())
load_dotenv()
x_csrf_token = os.environ.get("X_CSRF_TOKEN")
cookie = os.environ.get("COOKIE")
config = Config()
config.Search = "#dzlcz"
config.Impressions = True
config.Store_csv = True
config.Replies = True
config.Pandas = True
config.Limit = 100
config.X_csrf_token = x_csrf_token
config.Cookie = cookie
# twint.run.Search(config)
#
# search_data = twint.storage.panda.Tweets_df
#
# reports = search_data\
# .query("username not in @forbidden_usernames")\
# .query("in_reply_to_username not in @forbidden_usernames")\
# .apply(create_tweet_report, axis=1)
#
# # json_data = str(TweetReport.schema().dump(reports, many=True))
# reports_as_dict = [asdict(obj) for obj in reports]
# json_data = json.dumps(reports_as_dict)
json_data = "[{\"tweet_id\": \"1669368116046905346\", \"tweet_user_id\": \"1645463778085969923\", \"tweet_username\": \"RebelLibis\", \"reported_by_user_id\": 1366896190797840391, \"reported_by_username\": \"chuavexilu\", \"reported_by_status_id\": \"1670413185931771904\", \"reported_at\": 1687092512000.0}, {\"tweet_id\": \"1670399373606256641\", \"tweet_user_id\": \"1603344906440908803\", \"tweet_username\": \"Jiri_Cejle\", \"reported_by_user_id\": 1355186513210912773, \"reported_by_username\": \"JirkaSafra\", \"reported_by_status_id\": \"1670404775907663873\", \"reported_at\": 1687090507000.0}, {\"tweet_id\": \"1670363541503717378\", \"tweet_user_id\": \"1588521164103614465\", \"tweet_username\": \"radekmokry64\", \"reported_by_user_id\": 3329574471, \"reported_by_username\": \"kopapaka\", \"reported_by_status_id\": \"1670378219898388483\", \"reported_at\": 1687084176000.0}]"
json_data = '{"reports": ' + json_data + '}'
print(json_data)
with open("output/dzlcz.json", "w") as filewrite:
filewrite.write(json_data)
host = os.environ.get("DZLCZ_HOST")
auth_token = os.environ.get("DZLCZ_AUTH_TOKEN")
headers = {
'Authorization': f'Token {auth_token}'
}
response = requests.post(url=host, json=json_data, headers=headers)
print(response.status_code)
print(response.text)
#todo: send data to api