generated from 10xac/Twitter-Data-Analysis-Template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_tweet_data_into_database.py
114 lines (93 loc) · 3.27 KB
/
add_tweet_data_into_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import pandas as pd
import mysql.connector as mysql
from mysql.connector import Error
def DBConnect(dbName=None):
conn = mysql.connect(host='localhost', user='root', password='12345678',
database=dbName, buffered=True)
cur = conn.cursor()
return conn, cur
def emojiDB(dbName: str) -> None:
conn, cur = DBConnect(dbName)
dbQuery = f"ALTER DATABASE {dbName} CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci;"
cur.execute(dbQuery)
conn.commit()
def createDB(dbName: str) -> None:
conn, cur = DBConnect()
cur.execute(f"CREATE DATABASE IF NOT EXISTS {dbName};") # creating database
conn.commit()
cur.close()
def createTables(dbName: str) -> None:
conn, cur = DBConnect(dbName)
sqlFile = 'tweetsinformation_database_schema.sql'
fd = open(sqlFile, 'r')
readSqlFile = fd.read()
fd.close()
sqlCommands = readSqlFile.split(';')
for command in sqlCommands:
try:
res = cur.execute(command)
except Exception as ex:
print("Command skipped: ", command)
print(ex)
conn.commit()
cur.close()
return
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
cols_2_drop = ['original_text']
try:
df = df.drop(columns=cols_2_drop, axis=1)
df = df.fillna(0)
except KeyError as e:
print("Error:", e)
return df
def insert_to_tweet_table(dbName: str, df: pd.DataFrame, table_name: str) -> None:
conn, cur = DBConnect(dbName)
df = preprocess_df(df)
for _, row in df.iterrows():
sqlQuery = f"""INSERT INTO TweetInformation (created_at, source, subjectivity, polarity, language,
favorite_count, retweet_count, followers_count, friends_count,
hashtags, user_mentions, place, clean_text)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"""
data = (row[0], row[1], row[2], row[3], (row[4]), (row[5]), row[6], row[7], row[8], row[9], row[10], row[11],
row[12])
try:
# Execute the SQL command
cur.execute(sqlQuery, data)
# Commit your changes in the database
conn.commit()
#print("Data Inserted Successfully")
except Exception as e:
conn.rollback()
print("Error: ", e)
return
def db_execute_fetch(*args, many=False, tablename='', rdf=True, **kwargs) -> pd.DataFrame:
connection, cursor1 = DBConnect(**kwargs)
if many:
cursor1.executemany(*args)
else:
cursor1.execute(*args)
# get column names
field_names = [i[0] for i in cursor1.description]
# get column values
res = cursor1.fetchall()
# get row count and show info
nrow = cursor1.rowcount
if tablename:
print(f"{nrow} recrods fetched from {tablename} table")
cursor1.close()
connection.close()
# return result
if rdf:
return pd.DataFrame(res, columns=field_names)
else:
return res
if __name__ == "__main__":
print("sucessfully")
dbName='tweets'
createDB(dbName)
emojiDB(dbName='tweets')
createTables(dbName)
df = pd.read_csv('data/cleaned_data_final.csv')
insert_to_tweet_table(dbName='tweets', df=df, table_name='TweetInformation')
print("sucessfully")