-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathclass12-queries.sql
More file actions
386 lines (320 loc) · 13.7 KB
/
class12-queries.sql
File metadata and controls
386 lines (320 loc) · 13.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
-- This is a single-line comment
-- The comment symbol doesn't have to come at the start of a line like in some config files
-- like httpd htaccess files, samba config, ssh config, etc.
/* this is a potentially
multi-line comment */
/* The MariaDB parser allows for a "comment" that is *only* parsed as SQL in MariaDB, like this: */
/* You can have a comment in the middle of a SQL statement, like this: */
select foo /* , bar */ * from baz; -- do
/* How can you find the maximum length of a film? */
select max(length) from film;
/* How can you find the maximum length of a film, and name the result "l"? */
select max(length) as l from film;
/* How can you find the minimum rental_duration of a film? */
select min(rental_duration) from film;
/* And name the result as r? */
select min(rental_duration) as r from film;
/* How would you find the film_id of the films that are of "minimum length"? */
/* You could use a nested subquery, like this: */
select film_id from film where length = (select min(length) from film);
/* Alternatively, you could use a CTE, i.e., a WITH statement, which is a bit
more in keeping with the phrasing of the assignment */
with minlen as (select min(length) as l from film)
select film_id from film, minlen where film.length = minlen.l;
/* Do these two queries have the same query plan? Let's check them out using "explain" */
-- first, let's do the subquery method:
explain select film_id from film where length = (select min(length) from film);
-- now, let's do the CTE method:
explain with minlen as (select min(length) as l from film)
select film_id from film, minlen where film.length = minlen.l;
-- are they the same?
-- is the distinction meaningful in terms of performance?
-- advantages of a CTE:
-- 1. readability
-- 2. can refer to the CTE more than once in the query
-- What if you needed the film_id values of the shortest films, in descending order?
with minlen as (select min(length) as l from film)
select film_id from film, minlen where film.length = minlen.l
order by film_id desc;
-- How would you find the film_id of the films that have maximum rental
-- duration, using a nested subquery?
select film_id from film where rental_duration = (select max(rental_duration) from film);
-- How would you find the film_id of the films that have maximum rental
-- duration, using a CTE?
with maxdur as (select max(rental_duration) as r from film)
select film_id from film, maxdur where film.rental_duration = maxdur.r;
-- What if we wanted all films that have minimum length or maximum rental duration?
-- Well, we could certainly do a union, which by default will return distinct
-- film IDs, right?
select film_id from film where length = (select min(length) from film)
union
select film_id from film where rental_duration = (select max(rental_duration) from film);
-- What if we didn't want to use a union? We can use an "OR" in the with clause, of course:
select film_id from film
where length = (select min(length) from film)
or rental_duration = (select max(rental_duration) from film);
-- Can we do it with a union of two CTEs? Certainly
(with minlen as (select min(length) as l from film) select film_id from film, minlen where film.length = minlen.l)
union
(with maxdur as (select max(rental_duration) as r from film) select film_id from film, maxdur where film.rental_duration = maxdur.r);
-- Can we do it with a single CTE? Yes, absolutely:
with minlen as (select min(length) as l from film),
maxdur as (select max(rental_duration) as r from film)
select film_id from film, minlen, maxdur
where film.length = minlen.l or film.rental_duration = maxdur.r
-- the single-CTE approach seems to be pretty easy to read and is more in line with how
-- we might envision the worfkow being done (though again, SQL DQL is a declarative language)
-- I'll leave it to you to order the resulting film IDs in descending order as requested
-- What if we needed a list of pairs of actors and films? We'd have to join the film, actor, and film_actor tables, right?
-- Can we join film and flim_actor using a natural join? Let's check using describe:
describe film;
describe film_actor;
-- Hmm, they both have a column "last_update" as well as "film_id", and natural join will by default
-- use *both* columns for the implied inner equijoin, so that won't work. We can use "using";
-- this is going to be a big table, so let's put a limit clause on it so we can peek at the results:
select title, first_name, last_name
from film
join film_actor using (film_id)
join actor using (actor_id)
limit 20;
-- OK so what if we want to also include the category name of the film as a third column in the results?
-- we have to also join to "category" and "film_category", right?
select title, category.name, first_name, last_name
from film
join film_actor using (film_id)
join actor using (actor_id)
join film_category using (film_id)
join category using (category_id) limit 20;
-- OK, this is looking good. But what if we want to restrict to just movies in which UMA WOOD starred?
select title, category.name, first_name, last_name
from film
join film_actor using (film_id)
join actor using (actor_id)
join film_category using (film_id)
join category using (category_id) where first_name = 'UMA' and last_name = 'WOOD';
-- Now what if we want to order by category and then by title:
select title, category.name, first_name, last_name
from film
join film_actor using (film_id)
join actor using (actor_id)
join film_category using (film_id)
join category using (category_id) where first_name = 'UMA' and last_name = 'WOOD'
order by category.name, title;
-- What if we need to count up the number of distinct titles by category?
select category.name, count(title)
from film
join film_actor using (film_id)
join actor using (actor_id)
join film_category using (film_id)
join category using (category_id) where first_name = 'UMA' and last_name = 'WOOD'
group by category.name
order by category.name;
-- can we order by category.name descending?
select category.name, count(title)
from film
join film_actor using (film_id)
join actor using (actor_id)
join film_category using (film_id)
join category using (category_id) where first_name = 'UMA' and last_name = 'WOOD'
group by category.name
order by category.name desc;
-- what if we wanted a table of last name, first name, and category name
-- for categories of movies that each actor has acted in at least once?
select distinct last_name, first_name, category.name
from film
join film_actor using (film_id)
join actor using (actor_id)
join film_category using (film_id)
join category using (category_id)
order by last_name, first_name, category.name
limit 30;
-- what if we wanted the count of distinct categories each actor has acted in,
-- ordered by increasing count?
select last_name, first_name, count(*) as c from
(select distinct last_name, first_name, category.name
from film
join film_actor using (film_id)
join actor using (actor_id)
join film_category using (film_id)
join category using (category_id)) as foo
group by last_name, first_name
order by c
limit 30;
-- so, Julia Fawcett has only acted in eight categories
-- What if we needed to know the count of times Julia Fawcett
-- has acted in each category of movie, with zero times shown
-- for any film category in which Fawcett has not acted, ordered by the count in
-- descending order?
-- First, we need a query showing the category_ids for all titles
-- that Julia Fawcett has acted in
select last_name, first_name, title, category_id
from film_category
join film using (film_id)
join film_actor using (film_id)
join actor using (actor_id) where last_name = 'FAWCETT' and first_name = 'JULIA'
-- Now we want a count for each category ID
select category_id, count(*) as c
from film_category
join film using (film_id)
join film_actor using (film_id)
join actor using (actor_id) where last_name = 'FAWCETT' and first_name = 'JULIA'
group by category_id;
-- Next, we want to join that table with the "category" table
-- to get category names, but we want *all* category names shown
-- even if there are no acting credits for Julia Fawcett for that
-- film category; for that purpose, we use an outer join:
select name, ifnull(c, 0) as count
from category
left join
(select category_id, count(*) as c
from film_category
join film using (film_id)
join film_actor using (film_id)
join actor using (actor_id) where last_name = 'FAWCETT' and first_name = 'JULIA'
group by category_id) as foo using (category_id);
-- now order by category name in descending order
select name, ifnull(c, 0) as count
from category
left join
(select category_id, count(*) as c
from film_category
join film using (film_id)
join film_actor using (film_id)
join actor using (actor_id) where last_name = 'FAWCETT' and first_name = 'JULIA'
group by category_id) as foo using (category_id)
order by count desc;
-- what if we need to show only the categories for which the count is less than 3?
-- we can post-filter using "having"
select name, ifnull(c, 0) as count
from category
left join
(select category_id, count(*) as c
from film_category
join film using (film_id)
join film_actor using (film_id)
join actor using (actor_id) where last_name = 'FAWCETT' and first_name = 'JULIA'
group by category_id) as foo using (category_id)
having count < 3
order by count desc;
-- What if we needed to know the actor_id, first_name, and last_name for all
-- actors who have *never* acted in a Children's film?
-- We'll start by getting a table of film_ids for children's films
select film_id
from film_category
join category using (category_id)
where category.name = 'Children'
limit 30;
-- we have to join this to actor to get the actor_ids for any actors
-- who have acted in a children's film
select distinct actor_id from
film_actor join film_category using (film_id)
join category using (category_id)
where category.name = 'Children'
order by actor_id
limit 30;
-- Let's join actor to film_actor and restrict to actor_ids not in the
-- list of actor IDs that we just got from the previous query:
select actor_id, first_name, last_name
from actor
join film_actor using (actor_id)
where actor_id not in (-- who have acted in a children's film
select distinct actor_id from
film_actor join film_category using (film_id)
join category using (category_id)
where category.name = 'Children')
limit 30;
-- Need to get rid of the redundancy, using distinct
-- and order by actor_id:
select distinct actor_id, first_name, last_name
from actor
join film_actor using (actor_id)
where actor_id not in (-- who have acted in a children's film
select distinct actor_id from
film_actor join film_category using (film_id)
join category using (category_id)
where category.name = 'Children')
order by actor_id
limit 30;
-- Can we do it with a CTE? Sure
with
aids as (select distinct actor_id
from film_actor
join film_category using (film_id)
join category using (category_id)
where category.name = 'Children')
select distinct actor_id, first_name, last_name
from actor
join film_actor using (actor_id)
where actor_id not in (select actor_id from aids)
order by actor_id
limit 30;
-- What if we need the actor_id, first_name, last_name, and maximum film
-- length of Childrens films for every actor?
-- Start by getting a table of Children's films
select film_id
from film
join film_category using (film_id)
join category using (category_id)
where category.name = 'Children' limit 30;
-- Join to film_actor and actor and include actor name, actor ID, and film length
-- drop the film_id since we won't need it
select actor_id, last_name, first_name, film.length
from film
join film_category using (film_id)
join category using (category_id)
join film_actor using (film_id)
join actor using (actor_id)
where category.name = 'Children' order by actor_id limit 30;
-- Now group by last_name, first_name and aggregate using max:
select last_name, first_name, max(film.length) as maxlen
from film
join film_category using (film_id)
join category using (category_id)
join film_actor using (film_id)
join actor using (actor_id)
where category.name = 'Children'
group by last_name, first_name;
-- Now let's order by actor_id in ascending order:
select actor_id, last_name, first_name, max(film.length) as maxlen
from film
join film_category using (film_id)
join category using (category_id)
join film_actor using (film_id)
join actor using (actor_id)
where category.name = 'Children'
group by actor_id
order by actor_id limit 30;
-- Find movies that have both Penelope Guiness and Jennifer Davis
select actor_id from actor where first_name = 'PENELOPE' and last_name = 'GUINESS'; -- returns 1
select actor_id from actor where first_name = 'JENNIFER' and last_name = 'DAVIS'; -- returns 4
-- get film IDs for flims that have both actor_id 1 and actor_id 4
select title
from film
join film_actor as fa1 using (film_id)
join film_actor as fa2 using (film_id)
where fa1.actor_id = 1 and fa2.actor_id = 4;
-- express 80 and 168 as subqueries
select title
from film
join film_actor as fa1 using (film_id)
join film_actor as fa2 using (film_id)
where fa1.actor_id = (select actor_id from actor where first_name = 'PENELOPE' and last_name = 'GUINESS')
and fa2.actor_id = (select actor_id from actor where first_name = 'JENNIFER' and last_name = 'DAVIS');
-- using CTE
with
pgtable as (select actor_id from actor where first_name = 'PENELOPE' and last_name = 'GUINESS'),
jdtable as (select actor_id from actor where first_name = 'JENNIFER' and last_name = 'DAVIS')
select title
from film
join film_actor as fa1 using (film_id)
join film_actor as fa2 using (film_id)
join pgtable
join jdtable
where fa1.actor_id = pgtable.actor_id
and fa2.actor_id = jdtable.actor_id;
select film_id, name
from film_actor
/* join film using (film_id) */
join film_category using (film_id)
join category using (category_id)
where actor_id=1;