-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovie_db.py
337 lines (318 loc) · 13.6 KB
/
movie_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
import sqlite3 as lite
import csv
import re
import pandas as pd
import argparse
import collections
import json
import glob
import math
import os
import requests
import string
import sqlite3
import sys
import time
import xml
#* Actors (aid, fname, lname, gender)
#* Movies (mid, title, year, rank)
#* Directors (did, fname, lname)
#* Cast (aid, mid, role)
#* Movie_Director (did, mid)
class Movie_db(object):
def __init__(self, db_name):
db_name: "cs1656-public.db"
self.con = lite.connect(db_name)
self.cur = self.con.cursor()
#q0 is an example
def q0(self):
query = '''SELECT COUNT(*) FROM Actors'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q1(self):
# DONE
# List all the actors (first and last name) who acted in at least one film in the 80s (1980-1990, both ends inclusive) and in at least one film in the 21st century (>=2000). Sort alphabetically, by the actor's last and first name.
query = '''
SELECT DISTINCT fname, lname
FROM Actors
WHERE aid IN (SELECT aid
FROM Cast
WHERE mid IN (SELECT mid
FROM Movies
WHERE year BETWEEN 1980 AND 1990))
AND aid IN (SELECT aid
FROM Cast
WHERE mid IN (SELECT mid
FROM Movies
WHERE year >= 2000))
ORDER BY lname, fname
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q2(self):
# DONE
# List all the movies (title, year) that were released in the same year as the movie entitled "Rogue One: A Star Wars Story", but had a better rank (Note: the higher the value in the rank attribute, the better the rank of the movie). Sort alphabetically, by movie title.
query = '''
SELECT title, year
FROM Movies
WHERE year = (SELECT year
FROM Movies
WHERE title = "Rogue One: A Star Wars Story")
AND rank > (SELECT rank
FROM Movies
WHERE title = "Rogue One: A Star Wars Story")
ORDER BY title
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q3(self):
# DONE
# List all the actors (first and last name) who played in a Star Wars movie (i.e., title like '%Star Wars%') in decreasing order of how many Star Wars movies they appeared in. If an actor plays multiple roles in the same movie, count that still as one movie. If there is a tie, use the actor's last and first name to generate a full sorted order. Sort alphabetically, by the number of movies (descending), the actor's last name and first name.
query = '''
SELECT a.fname, a.lname, COUNT(distinct c.mid) AS num_movies
FROM Actors a
JOIN Cast c ON c.aid = a.aid
JOIN Movies m ON m.mid = c.mid
WHERE m.title LIKE '%Star Wars%'
GROUP BY a.aid
ORDER BY num_movies DESC, a.lname, a.fname
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q4(self):
# DONE
# Find the actor(s) (first and last name) who only acted in films released before 1990. Sort alphabetically, by the actor's last and first name.
query = '''
SELECT a.fname, a.lname
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
WHERE m.year < 1990 AND a.aid NOT IN (
SELECT a2.aid
FROM Actors a2
JOIN Cast c2 ON a2.aid = c2.aid
JOIN Movies m2 ON c2.mid = m2.mid
WHERE m2.year >= 1990
)
GROUP BY a.lname, a.fname;
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q5(self):
# DONE
# List the top 10 directors in descending order of the number of films they directed (first name, last name, number of films directed). For simplicity, feel free to ignore ties at the number 10 spot (i.e., always show up to 10 only). Sort alphabetically, by the number of films (descending), the actor's last name and first name.
query = '''
SELECT fname, lname, COUNT(distinct mid) AS num_movies
FROM Directors
JOIN Movie_Director ON Movie_Director.did = Directors.did
GROUP BY Directors.did
ORDER BY num_movies DESC, lname, fname
LIMIT 10
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q6(self):
# DONE
# Find the top 10 movies with the largest cast (title, number of cast members) in decreasing order. Note: show all movies in case of a tie.
query = '''
SELECT m.title, COUNT(*) as num_cast
FROM Movies m
JOIN Cast c ON m.mid = c.mid
GROUP BY m.mid
HAVING num_cast >= (
SELECT MIN(num_cast)
FROM(
SELECT COUNT(*) as num_cast
FROM Movies m
JOIN Cast c ON m.mid = c.mid
GROUP BY m.mid
ORDER BY num_cast DESC, m.title
LIMIT 10
)
)
ORDER BY num_cast DESC, m.title
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q7(self):
# DONE
# Find the movie(s) whose cast has more actors than actresses (i.e., gender=Male vs gender=Female). Show the title, the number of actors, and the number of actresses in the results. Sort alphabetically, by movie title. Hint: Make sure you account for the case of 0 actors or actresses in a movie.
query = '''
SELECT m.title,
COUNT(CASE WHEN a.gender = 'Male' THEN 1 END) AS num_actors,
COUNT(CASE WHEN a.gender = 'Female' THEN 1 END) AS num_actresses
FROM Movies m
JOIN Cast c ON c.mid = m.mid
JOIN Actors a ON a.aid = c.aid
GROUP BY m.mid
HAVING COUNT(CASE WHEN a.gender = 'Male' THEN 1 END) > COUNT(CASE WHEN a.gender = 'Female' THEN 1 END)
ORDER BY m.title ASC;
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q8(self):
# DONE
# Find all the actors who have worked with at least 7 different directors. Do not consider cases of self-directing (i.e., when the director is also an actor in a movie), but count all directors in a movie towards the threshold of 7 directors. Show the actor's first, last name, and the number of directors he/she has worked with. Sort in decreasing order of number of directors.
query = '''
SELECT a.fname, a.lname, COUNT(distinct md.did) AS num_directors
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
JOIN Movie_Director md ON m.mid = md.mid
JOIN Directors d ON md.did = d.did AND d.fname <> a.fname AND d.lname <> a.lname
GROUP BY a.aid, a.fname, a.lname
HAVING COUNT(distinct md.did) >= 7
ORDER BY num_directors DESC;
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q9(self):
# DONE
# For all actors whose first name starts with a B, count the movies that he/she appeared in his/her debut year (i.e., year of their first movie). Show the actor's first and last name, plus the count. Sort by decreasing order of the count, then the first and last name.
query = '''
SELECT a.fname, a.lname, COUNT(distinct c.mid) AS num_movies
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
WHERE a.fname LIKE 'B%' AND m.year = (
SELECT MIN(m2.year)
FROM Movies m2
JOIN Cast c2 ON c2.mid = m2.mid
WHERE c2.aid = a.aid
)
GROUP BY a.aid
ORDER BY num_movies DESC, a.fname, a.lname;
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q10(self):
# DONE
# Find instances of nepotism between actors and directors, i.e., an actor in a movie and the director having the same last name, but a different first name. Show the last name and the title of the movie, sorted alphabetically by last name and the movie title.
query = '''
SELECT a.lname, m.title
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
JOIN Movie_Director md ON m.mid = md.mid
JOIN Directors d ON md.did = d.did
WHERE a.lname = d.lname AND a.fname != d.fname
ORDER BY a.lname, m.title
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q11(self):
# DONE
# The Bacon number of an actor is the length of the shortest path between the actor and Kevin Bacon in the "co-acting" graph. That is, Kevin Bacon has Bacon number 0; all actors who acted in the same movie as him have Bacon number 1; all actors who acted in the same film as some actor with Bacon number 1 have Bacon number 2, etc. List all actors whose Bacon number is 2 (first name, last name). Sort the results by the last and first name. You can familiarize yourself with the concept, by visiting The Oracle of Bacon.
# VIEW FOR ALL KEVIN BACON MOVIES
self.cur.execute("DROP VIEW IF EXISTS bacon_movies")
query = '''
CREATE VIEW bacon_movies AS
SELECT m.mid
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
WHERE a.lname = "Bacon"
'''
self.cur.execute(query)
# VIEW FOR ACTORS WITH BACON NUMBER OF 1 (excluding kevin himself)
self.cur.execute("DROP VIEW IF EXISTS bacon_num_1")
query = '''
CREATE VIEW bacon_num_1 AS
SELECT a.aid
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
WHERE m.mid IN (SELECT mid FROM bacon_movies) AND NOT (a.lname = "Bacon")
'''
self.cur.execute(query)
# VIEW THAT SELECTS ALL MOVIES THAT ACTORS FROM bacon_num_1 HAVE ACTED IN THAT KEVIN IS NOT IN(NOT bacon_movies)
self.cur.execute("DROP VIEW IF EXISTS mov_without_kb")
query = '''
CREATE VIEW mov_without_kb AS
SELECT m.mid
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
WHERE a.aid IN (SELECT * FROM bacon_num_1) AND NOT a.lname = "Bacon" AND NOT m.mid IN (
SELECT mid FROM bacon_movies
)
'''
self.cur.execute(query)
query = '''
SELECT DISTINCT a.fname, a.lname
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
WHERE m.mid IN (SELECT * FROM mov_without_kb) AND NOT a.lname = "Bacon" AND NOT m.mid IN (SELECT mid FROM bacon_movies) AND NOT a.aid in (SELECT * FROM bacon_num_1)
ORDER BY a.lname, a.fname
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
def q12(self):
# DONE
# Assume that the popularity of an actor is reflected by the average rank of all the movies he/she has acted in. Find the top 20 most popular actors (in descreasing order of popularity) -- list the actor's first/last name, the total number of movies he/she has acted, and his/her popularity score. For simplicity, feel free to ignore ties at the number 20 spot (i.e., always show up to 20 only).
query = '''
SELECT a.fname, a.lname, COUNT(c.mid) AS num_movies, AVG(m.rank) AS popularity
FROM Actors a
JOIN Cast c ON a.aid = c.aid
JOIN Movies m ON c.mid = m.mid
GROUP BY a.aid, a.fname, a.lname
ORDER BY popularity DESC
LIMIT 20
'''
self.cur.execute(query)
all_rows = self.cur.fetchall()
return all_rows
if __name__ == "__main__":
task = Movie_db("cs1656-public.db")
rows = task.q0()
print(rows)
print()
rows = task.q1()
print(rows)
print()
rows = task.q2()
print(rows)
print()
rows = task.q3()
print(rows)
print()
rows = task.q4()
print(rows)
print()
rows = task.q5()
print(rows)
print()
rows = task.q6()
print(rows)
print()
rows = task.q7()
print(rows)
print()
rows = task.q8()
print(rows)
print()
rows = task.q9()
print(rows)
print()
rows = task.q10()
print(rows)
print()
rows = task.q11()
print(rows)
print()
rows = task.q12()
print(rows)
print()