-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathInstagram Clone SQL - Exploratory Data Analysis.sql
352 lines (296 loc) · 9.73 KB
/
Instagram Clone SQL - Exploratory Data Analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/*INSTAGRAM CLONE EXPLORATORY DATA ANALYSIS USING SQL*/
/*SQL SKILLS: joins, date manipulation, regular expressions, views, stored procedures, aggregate functions, string manipulation*/
-- --------------------------------------------------------------------------------------------------------------
/*Ques.1 The first 10 users on the platform*/
SELECT
*
FROM
ig_clone.users
ORDER BY created_at asc
LIMIT 10;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.2 Total number of registrations*/
SELECT
COUNT(*) AS 'Total Registration'
FROM
ig_clone.users;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.3 The day of the week most users register on*/
CREATE VIEW vwtotalregistrations AS
SELECT
DATE_FORMAT(created_at, '%W') AS 'day of the week',
COUNT(*) AS 'total number of registration'
FROM
ig_clone.users
GROUP BY 1
ORDER BY 2 DESC;
SELECT
*
FROM
vwtotalregistrations;
/*Version 2*/
SELECT
DAYNAME(created_at) AS 'Day of the Week',
COUNT(*) AS 'Total Registration'
FROM
ig_clone.users
GROUP BY 1
ORDER BY 2 DESC;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.4 The users who have never posted a photo*/
SELECT
u.username
FROM
ig_clone.users u
LEFT JOIN
ig_clone.photos p ON p.user_id = u.id
WHERE
p.id IS NULL;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.5 The most likes on a single photo*/
SELECT
u.username, p.image_url, COUNT(*) AS total
FROM
ig_clone.photos p
INNER JOIN
ig_clone.likes l ON l.photo_id = p.id
INNER JOIN
ig_clone.users u ON p.user_id = u.id
GROUP BY p.id
ORDER BY total DESC
LIMIT 1;
/*Version 2*/
SELECT
ROUND((SELECT
COUNT(*)
FROM
ig_clone.photos) / (SELECT
COUNT(*)
FROM
ig_clone.users),
2) AS 'Average Posts by Users';
-- --------------------------------------------------------------------------------------------------------------
/*Ques.6 The number of photos posted by most active users*/
SELECT
u.username AS 'Username',
COUNT(p.image_url) AS 'Number of Posts'
FROM
ig_clone.users u
JOIN
ig_clone.photos p ON u.id = p.user_id
GROUP BY u.id
ORDER BY 2 DESC
LIMIT 5;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.7 The total number of posts*/
SELECT
SUM(user_posts.total_posts_per_user) AS 'Total Posts by Users'
FROM
(SELECT
u.username, COUNT(p.image_url) AS total_posts_per_user
FROM
ig_clone.users u
JOIN ig_clone.photos p ON u.id = p.user_id
GROUP BY u.id) AS user_posts;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.8 The total number of users with posts*/
SELECT
COUNT(DISTINCT (u.id)) AS total_number_of_users_with_posts
FROM
ig_clone.users u
JOIN
ig_clone.photos p ON u.id = p.user_id;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.9 The usernames with numbers as ending*/
SELECT
id, username
FROM
ig_clone.users
WHERE
username REGEXP '[$0-9]';
-- --------------------------------------------------------------------------------------------------------------
/*Ques.10 The usernames with charachter as ending*/
SELECT
id, username
FROM
ig_clone.users
WHERE
username NOT REGEXP '[$0-9]';
-- --------------------------------------------------------------------------------------------------------------
/*Ques.11 The number of usernames that start with A*/
SELECT
count(id)
FROM
ig_clone.users
WHERE
username REGEXP '^[A]';
-- --------------------------------------------------------------------------------------------------------------
/*Ques.12 The most popular tag names by usage*/
SELECT
t.tag_name, COUNT(tag_name) AS seen_used
FROM
ig_clone.tags t
JOIN
ig_clone.photo_tags pt ON t.id = pt.tag_id
GROUP BY t.id
ORDER BY seen_used DESC
LIMIT 10;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.13 The most popular tag names by likes*/
SELECT
t.tag_name AS 'Tag Name',
COUNT(l.photo_id) AS 'Number of Likes'
FROM
ig_clone.photo_tags pt
JOIN
ig_clone.likes l ON l.photo_id = pt.photo_id
JOIN
ig_clone.tags t ON pt.tag_id = t.id
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.14 The users who have liked every single photo on the site*/
SELECT
u.id, u.username, COUNT(l.user_id) AS total_likes_by_user
FROM
ig_clone.users u
JOIN
ig_clone.likes l ON u.id = l.user_id
GROUP BY u.id
HAVING total_likes_by_user = (SELECT
COUNT(*)
FROM
ig_clone.photos);
-- --------------------------------------------------------------------------------------------------------------
/*Ques.15 Total number of users without comments*/
SELECT
COUNT(*) AS total_number_of_users_without_comments
FROM
(SELECT
u.username, c.comment_text
FROM
ig_clone.users u
LEFT JOIN ig_clone.comments c ON u.id = c.user_id
GROUP BY u.id , c.comment_text
HAVING comment_text IS NULL) AS users;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.16 The percentage of users who have either never commented on a photo or likes every photo*/
SELECT
tableA.total_A AS 'Number Of Users who never commented',
(tableA.total_A / (SELECT
COUNT(*)
FROM
ig_clone.users u)) * 100 AS '%',
tableB.total_B AS 'Number of Users who likes every photos',
(tableB.total_B / (SELECT
COUNT(*)
FROM
ig_clone.users u)) * 100 AS '%'
FROM
(SELECT
COUNT(*) AS total_A
FROM
(SELECT
u.username, c.comment_text
FROM
ig_clone.users u
LEFT JOIN ig_clone.comments c ON u.id = c.user_id
GROUP BY u.id , c.comment_text
HAVING comment_text IS NULL) AS total_number_of_users_without_comments) AS tableA
JOIN
(SELECT
COUNT(*) AS total_B
FROM
(SELECT
u.id, u.username, COUNT(u.id) AS total_likes_by_user
FROM
ig_clone.users u
JOIN ig_clone.likes l ON u.id = l.user_id
GROUP BY u.id , u.username
HAVING total_likes_by_user = (SELECT
COUNT(*)
FROM
ig_clone.photos p)) AS total_number_users_likes_every_photos) AS tableB;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.17 Clean URLs of photos posted on the platform*/
SELECT
SUBSTRING(image_url,
LOCATE('/', image_url) + 2,
LENGTH(image_url) - LOCATE('/', image_url)) AS IMAGE_URL
FROM
ig_clone.photos;
-- --------------------------------------------------------------------------------------------------------------
/*Ques.18 The average time on the platform */
SELECT
ROUND(AVG(DATEDIFF(CURRENT_TIMESTAMP, created_at)/360), 2) as Total_Years_on_Platform
FROM
ig_clone.users;
-- --------------------------------------------------------------------------------------------------------------
/*CREATING STORED PROCEDURES */
/*Ques.1 Popular hashtags list*/
CREATE PROCEDURE `spPopularTags`()
BEGIN
SELECT
t.tag_name, COUNT(tag_name) AS 'HashtagCounts'
FROM
ig_clone.tags t
JOIN
ig_clone.photo_tags pt ON t.id = pt.tag_id
GROUP BY t.id , 1
ORDER BY 2 DESC;
END //
CALL `ig_clone`.`spPopularTags`();
-- --------------------------------------------------------------------------------------------------------------
/*Ques.2 Users who have engaged atleast one time on the platform*/
CREATE PROCEDURE `spEngagedUser`()
BEGIN
SELECT DISTINCT
username
FROM
ig_clone.users u
INNER JOIN
ig_clone.photos p ON p.user_id = u.id
INNER JOIN
ig_clone.likes l ON l.user_id = p.user_id
WHERE
p.id IS NOT NULL
OR l.user_id IS NOT NULL;
END //
CALL `ig_clone`.`spEngagedUser`();
-- --------------------------------------------------------------------------------------------------------------
/*Ques.3 Total number of comments by the users on the platform */
CREATE PROCEDURE `spUserComments`()
BEGIN
SELECT
COUNT(*) as 'Total Number of Comments'
FROM (
SELECT
c.user_id, u.username
FROM ig_clone.users u
JOIN ig_clone.comments c ON u.id = c.user_idusers
WHERE
c.comment_text IS NOT NULL
GROUP BY u.username , c.user_id) as Table1;
END //
CALL `ig_clone`.`spUserComments`();
-- --------------------------------------------------------------------------------------------------------------
/*Ques.4 The username, image posted, tags used and comments made by a specific user*/
CREATE PROCEDURE `spUserInfo`(IN userid INT(11))
BEGIN
SELECT
u.id, u.username, p.image_url, c.comment_text, t.tag_name
FROM
ig_clone.users u
INNER JOIN
ig_clone.photos p ON p.user_id = u.id
INNER JOIN
ig_clone.comments c ON c.user_id = u.id
INNER JOIN
ig_clone.photo_tags pt ON pt.photo_id = p.id
INNER JOIN
ig_clone.tags t ON t.id = pt.tag_id
where u.id = userid;
END //
CALL `ig_clone`.`spUserInfo`(2);
-- --------------------------------------------------------------------------------------------------------------