-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpotify Analytics
75 lines (64 loc) · 2 KB
/
Spotify Analytics
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# For this project, I downloaded Spotify data from Kaggle.
# https://www.kaggle.com/datasets/equinxx/spotify-top-50-songs-in-2021
# Then I created a table to insert Spotify data into.
# Finally, I performed analytics on the data using SQL.
# Creating the table:
CREATE TABLE BIT_DB.Spotifydata (
id integer PRIMARY KEY,
artist_name varchar NOT NULL,
track_name varchar NOT NULL,
track_id varchar NOT NULL,
popularity integer NOT NULL,
danceability decimal(4,3) NOT NULL,
energy decimal(4,3) NOT NULL,
key integer NOT NULL,
loudness decimal(5,3) NOT NULL,
mode integer NOT NULL,
speechiness decimal(5,4) NOT NULL,
acousticness decimal(6,5) NOT NULL,
instrumentalness text NOT NULL,
liveness decimal(5,4) NOT NULL,
valence decimal(4,3) NOT NULL,
tempo decimal(6,3) NOT NULL,
duration_ms integer NOT NULL,
time_signature integer NOT NULL
)
# Then I inserted the Spotify Data .csv into the table.
# Next, I explored the data using the following SQL.
# First I determined the avg popularity, danceability, and energy by artist and track.
SELECT artist_name,
track_name,
avg(popularity),
avg(danceability),
avg(energy)
FROM BIT_DB.spotifydata
GROUP BY artist_name,
track_name;
# Then I determined who the Top 10 artists are by popularity
SELECT track_name,
artist_name,
popularity
FROM BIT_DB.spotifydata
ORDER BY popularity DESC
LIMIT 10;
# List tracks with high energy (greater than 0.8) and low acousticness (less than 0.2)
SELECT artist_name,
track_name,
energy,
acousticness
FROM BIT_DB.spotifydata
WHERE energy > 0.8 AND
acousticness < 0.2;
# Count the number of tracks for each artist
SELECT artist_name,
COUNT( * ) AS tracks_count
FROM BIT_DB.spotifydata
GROUP BY artist_name;
# Calculate the average tempo of the tracks:
SELECT AVG(tempo) AS avg_tempo
FROM BIT_DB.spotifydata;
# Find the track with the highest loudness
SELECT artist_name,
track_name,
MAX(loudness) AS max_loudness
FROM BIT_DB.spotifydata;