R Notebook by Miguel Miranda Enriquez

Data source: Kaggle

suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(tidyverse)))
df = read.csv("C:\\Users\\mmira\\Downloads\\SpotifyFeatures.csv")
colnames(df)
##  [1] "genre"            "artist_name"      "track_name"       "track_id"        
##  [5] "popularity"       "acousticness"     "danceability"     "duration_ms"     
##  [9] "energy"           "instrumentalness" "key"              "liveness"        
## [13] "loudness"         "mode"             "speechiness"      "tempo"           
## [17] "time_signature"   "valence"

Data Transformation

#Merge Children's Music and Children`s Music genre

df$genre[df$genre == "Children's Music"] <- "Children’s Music"
#Remove music genres: A Capella, Anime, Children’s Music,
# Classical, Comedy, Movie, Opera, Soundtrack

df <- df[df$genre != "A Capella" & df$genre != "Anime" &
         df$genre != "Children’s Music" &
         df$genre != "Classical" & df$genre != "Comedy" &
         df$genre != "Movie" & df$genre != "Opera" &
         df$genre != "Soundtrack",]

head(df)
ABCDEFGHIJ0123456789
 
 
genre
<chr>
artist_name
<chr>
track_name
<chr>
136R&BMary J. BligeBe Without You - Kendu Mix
137R&BRihannaDesperado
138R&BYung BleuIce On My Baby (feat. Kevin Gates) - Remix
139R&BSurfacesHeaven Falls / Fall on Me
140R&BOlivia O'BrienLove Myself
141R&BELHAENeeds
#Remove duplicates and
# create list of multiple genre values in one cell

new_df <- aggregate(df[1], by = df[4], 
                     FUN = unique)


head(new_df)
ABCDEFGHIJ0123456789
 
 
track_id
<chr>
genre
<list>
1000DfZJww8KiixTKuk9usJ<chr [1]>
2000EWWBkYaREzsBplYjUag<chr [1]>
3000xQL6tZNLJzIrtIgxqSl<chr [2]>
4001bQcMKt86oNbnWne4EAB<chr [1]>
5001CyR8xqmmpVZFiTZJ5BC<chr [1]>
6001gDjxhKGDSx4sMMAgS9R<chr [1]>
#Sort each list using str_sort function from tidyverse package
new_df$genre <- sapply(new_df$genre, str_sort)
#Change list to string
new_df$genre <- sapply(new_df$genre, paste, collapse=",")
head(new_df)
ABCDEFGHIJ0123456789
 
 
track_id
<chr>
genre
<chr>
1000DfZJww8KiixTKuk9usJReggae
2000EWWBkYaREzsBplYjUagJazz
3000xQL6tZNLJzIrtIgxqSlDance,Pop
4001bQcMKt86oNbnWne4EABReggaeton
5001CyR8xqmmpVZFiTZJ5BCBlues
6001gDjxhKGDSx4sMMAgS9RRock
#Merge dataframes by performing left inner join using dplyr package

#Drop old genre column
dropped_df = df[,!(names(df)
                   %in%c("genre"))]

#Merge new genre column
new_df = new_df %>% inner_join(dropped_df, 
                            by="track_id")

#Remove duplicates
new_df = new_df %>% distinct()

head(new_df)
ABCDEFGHIJ0123456789
 
 
track_id
<chr>
genre
<chr>
artist_name
<chr>
track_name
<chr>
popularity
<int>
1000DfZJww8KiixTKuk9usJReggaeMike LoveEarthlings30
2000EWWBkYaREzsBplYjUagJazzDon PhilippeFewerdolr39
3000xQL6tZNLJzIrtIgxqSlDance,PopZAYNStill Got Time70
4001bQcMKt86oNbnWne4EABReggaetonJustin QuilesNo Quieren Que Gane41
5001CyR8xqmmpVZFiTZJ5BCBluesTaj MahalShe Knows How To Rock Me31
6001gDjxhKGDSx4sMMAgS9RRockYoung TribeKingdom58
new_df
ABCDEFGHIJ0123456789
track_id
<chr>
genre
<chr>
000DfZJww8KiixTKuk9usJReggae
000EWWBkYaREzsBplYjUagJazz
000xQL6tZNLJzIrtIgxqSlDance,Pop
001bQcMKt86oNbnWne4EABReggaeton
001CyR8xqmmpVZFiTZJ5BCBlues
001gDjxhKGDSx4sMMAgS9RRock
001ifh9Zkyc5DhK7AGQRtKFolk,Indie
001KkOBeRiQ1J7IEJYHODWWorld
001YQlnDSduXd5LgBd66gTSka
002opcRBgYV5jqoh72QcqADance,Pop
# Use mutate funtion to create dummy variables based on if the genre title is contained in the all_genre column

#To avoid using special characters in column names...
# Column name for R&B is RnB

spotify_data <- new_df %>% mutate(
  Pop = ifelse(grepl("Pop", genre, fixed = TRUE) == TRUE, 1, 0),
  Rap = ifelse(grepl("Rap", genre, fixed = TRUE) == TRUE, 1, 0),
  Rock = ifelse(grepl("Rock", genre, fixed = TRUE) == TRUE, 1, 0),
  Hip_Hop = ifelse(grepl("Hip-Hop", genre, fixed = TRUE) == TRUE, 1, 0),
  Dance = ifelse(grepl("Dance", genre, fixed = TRUE) == TRUE, 1, 0),
  Indie = ifelse(grepl("Indie", genre, fixed = TRUE) == TRUE, 1, 0),
  RnB = ifelse(grepl("R&B", genre, fixed = TRUE) == TRUE, 1, 0),
  Alternative = ifelse(grepl("Alternative", genre, fixed = TRUE) == TRUE, 1, 0),
  Folk = ifelse(grepl("Folk", genre, fixed = TRUE) == TRUE, 1, 0),
  Soul = ifelse(grepl("Soul", genre, fixed = TRUE) == TRUE, 1, 0),
  Country = ifelse(grepl("Country", genre, fixed = TRUE) == TRUE, 1, 0),
  Jazz = ifelse(grepl("Jazz", genre, fixed = TRUE) == TRUE, 1, 0),
  Electronic = ifelse(grepl("Electronic", genre, fixed = TRUE) == TRUE, 1, 0),
  Reggaeton = ifelse(grepl("Reggaeton", genre, fixed = TRUE) == TRUE, 1, 0),
  Reggae = ifelse(grepl("Reggae", genre, fixed = TRUE) == TRUE, 1, 0),
  World = ifelse(grepl("World", genre, fixed = TRUE) == TRUE, 1, 0),
  Blues = ifelse(grepl("Blues", genre, fixed = TRUE) == TRUE, 1, 0),
  Ska = ifelse(grepl("Ska", genre, fixed = TRUE) == TRUE, 1, 0),)

spotify_data
ABCDEFGHIJ0123456789
track_id
<chr>
genre
<chr>
000DfZJww8KiixTKuk9usJReggae
000EWWBkYaREzsBplYjUagJazz
000xQL6tZNLJzIrtIgxqSlDance,Pop
001bQcMKt86oNbnWne4EABReggaeton
001CyR8xqmmpVZFiTZJ5BCBlues
001gDjxhKGDSx4sMMAgS9RRock
001ifh9Zkyc5DhK7AGQRtKFolk,Indie
001KkOBeRiQ1J7IEJYHODWWorld
001YQlnDSduXd5LgBd66gTSka
002opcRBgYV5jqoh72QcqADance,Pop
colnames(spotify_data)
##  [1] "track_id"         "genre"            "artist_name"      "track_name"      
##  [5] "popularity"       "acousticness"     "danceability"     "duration_ms"     
##  [9] "energy"           "instrumentalness" "key"              "liveness"        
## [13] "loudness"         "mode"             "speechiness"      "tempo"           
## [17] "time_signature"   "valence"          "Pop"              "Rap"             
## [21] "Rock"             "Hip_Hop"          "Dance"            "Indie"           
## [25] "RnB"              "Alternative"      "Folk"             "Soul"            
## [29] "Country"          "Jazz"             "Electronic"       "Reggaeton"       
## [33] "Reggae"           "World"            "Blues"            "Ska"
nrow(spotify_data)
## [1] 128794
spotify_data_final <- spotify_data
colnames(spotify_data_final)
##  [1] "track_id"         "genre"            "artist_name"      "track_name"      
##  [5] "popularity"       "acousticness"     "danceability"     "duration_ms"     
##  [9] "energy"           "instrumentalness" "key"              "liveness"        
## [13] "loudness"         "mode"             "speechiness"      "tempo"           
## [17] "time_signature"   "valence"          "Pop"              "Rap"             
## [21] "Rock"             "Hip_Hop"          "Dance"            "Indie"           
## [25] "RnB"              "Alternative"      "Folk"             "Soul"            
## [29] "Country"          "Jazz"             "Electronic"       "Reggaeton"       
## [33] "Reggae"           "World"            "Blues"            "Ska"
#Count tracks from each genre combination
genres_df <- group_by(spotify_data_final, genre) %>% summarize(tracks_count = length(genre))
genres_df
ABCDEFGHIJ0123456789
genre
<chr>
tracks_count
<int>
Alternative3829
Alternative,Blues98
Alternative,Blues,Country,Folk2
Alternative,Blues,Country,Folk,Indie,Rock4
Alternative,Blues,Dance4
Alternative,Blues,Dance,Folk,Indie,Rock4
Alternative,Blues,Dance,Folk,Rock2
Alternative,Blues,Dance,Indie,Rock31
Alternative,Blues,Dance,Rock11
Alternative,Blues,Folk,Indie,Rock2

Calculate mean, standard deviation, and coefficient of variation for each audio feature corresponding to each music genre combination (489 total)

genre_popularity_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(popularity),
               list(pop_mean = mean,
                    pop_stdev = sd)) %>%
  mutate(pop_cv = pop_stdev / pop_mean) %>%
  arrange(desc(pop_mean))



genre_popularity_summary
ABCDEFGHIJ0123456789
genre
<chr>
pop_mean
<dbl>
pop_stdev
<dbl>
pop_cv
<dbl>
Folk,Pop,R&B,Soul77.00000NANA
Pop,Reggae77.00000NANA
Dance,Electronic,Pop,Rap74.500000.70710680.009491366
Alternative,Dance,Electronic,Indie,Pop,Rock74.000002.82842710.038221988
Pop,Reggaeton73.271437.01534840.095744666
Hip-Hop,Pop,Reggaeton72.854178.41980840.115570719
Dance,Hip-Hop,Pop,Reggaeton72.615384.11376680.056651449
Alternative,Pop,Rock,Ska72.500002.12132030.029259591
Electronic,Rap72.500000.70710680.009753197
Pop,Rock,Soul72.500005.00000000.068965517
genre_acousticness_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(acousticness),
               list(acousticness_mean = mean,
                    acousticness_stdev = sd)) %>%
  mutate(acousticness_cv = acousticness_stdev / acousticness_mean) %>%
  arrange(desc(acousticness_mean))

genre_acousticness_summary
ABCDEFGHIJ0123456789
genre
<chr>
acousticness_mean
<dbl>
acousticness_stdev
<dbl>
Pop,World0.988500009.192388e-03
Folk,Jazz,World0.942000002.262742e-02
Blues,Folk,Jazz,Rock0.931666671.028915e-02
Blues,Folk,Pop,Rock,Soul0.908000000.000000e+00
Dance,Folk,Indie,Pop0.908000000.000000e+00
R&B,Rock0.90400000NA
Folk,Jazz,Pop0.899250003.133023e-02
Dance,Electronic,Folk,Indie,Pop0.87900000NA
Indie,World0.825500004.737615e-02
Alternative,Folk,Pop0.813000006.391870e-02
genre_danceability_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(danceability),
               list(danceability_mean = mean,
                    danceability_stdev = sd)) %>%
  mutate(danceability_cv = danceability_stdev / danceability_mean) %>%
  arrange(desc(danceability_mean))

genre_danceability_summary
ABCDEFGHIJ0123456789
genre
<chr>
danceability_mean
<dbl>
danceability_stdev
<dbl>
Indie,Pop,Rap,Reggae,Rock0.8700000NA
Electronic,Rap0.86700000.000000000
Pop,Rap,Reggae0.8660000NA
Indie,Rap,Reggae0.8590000NA
Alternative,Hip-Hop,Indie0.83900000.000000000
Dance,Pop,Rap,Reggae0.83533330.028867513
Pop,Rap,Reggaeton0.83350000.026162951
Alternative,Dance,Soul0.82925000.103057196
Alternative,Hip-Hop,Reggaeton0.82900000.000000000
Blues,Folk,Jazz,Soul0.82900000.000000000
genre_duration_ms_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(duration_ms),
               list(duration_ms_mean = mean,
                    duration_ms_stdev = sd)) %>%
  mutate(duration_ms_cv = duration_ms_stdev / duration_ms_mean) %>%
  arrange(desc(duration_ms_mean))

genre_duration_ms_summary
ABCDEFGHIJ0123456789
genre
<chr>
duration_ms_mean
<dbl>
duration_ms_stdev
<dbl>
Alternative,Blues,Folk,Rock638427.0NA
Soul,World465318.8285649.9230
Folk,Reggae420731.512027.1792
Jazz,Reggae,Soul420720.0NA
Folk,Jazz,World415583.5150325.9519
Indie,Jazz,World411513.5122518.2707
Dance,Electronic,Folk,Indie,Rock377387.00.0000
Jazz,Rock371773.4123517.3543
Alternative,Dance,Electronic,Folk,Indie,Rock370058.549010.5612
Jazz,Reggae343686.3108186.7175
genre_energy_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(energy),
               list(energy_mean = mean,
                    energy_stdev = sd)) %>%
  mutate(energy_cv = energy_stdev / energy_mean) %>%
  arrange(desc(energy_mean))

genre_energy_summary
ABCDEFGHIJ0123456789
genre
<chr>
energy_mean
<dbl>
energy_stdev
<dbl>
energy_cv
<dbl>
Alternative,Electronic,Indie,Pop,Rock0.95800000.0000000000.000000000
Dance,Electronic,Jazz,Pop0.94900000.0000000000.000000000
Alternative,Country,Indie,Rock0.93600000.0000000000.000000000
Alternative,Rap,Reggae,Rock0.92800000.0000000000.000000000
Pop,Rock,Ska0.92700000.0147309200.015890960
Blues,Dance,Indie,Rock0.92420000.0531290880.057486570
Dance,Indie,Rock,Ska0.91700000.0000000000.000000000
Country,Pop,Rap,Rock0.91100000.0138564060.015210106
Dance,Electronic,Folk,Indie,Rock0.91000000.0000000000.000000000
Alternative,Rap0.90092520.1108562450.123047117
genre_liveness_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(liveness),
               list(liveness_mean = mean,
                    liveness_stdev = sd)) %>%
  mutate(liveness_cv = liveness_stdev / liveness_mean) %>%
  arrange(desc(liveness_mean))

genre_liveness_summary
ABCDEFGHIJ0123456789
genre
<chr>
liveness_mean
<dbl>
liveness_stdev
<dbl>
liveness_cv
<dbl>
Alternative,Hip-Hop,Reggaeton0.776000000.0000000000.000000000
Dance,Electronic,Folk,Indie,Pop0.72200000NANA
Alternative,Dance,Electronic,Folk,Indie0.568500000.2349729770.413320980
Dance,Indie,Pop,Rap0.540000000.1562721980.289392960
Reggae,Soul0.466600000.5450379071.168105244
Country,Pop,Rap0.45900000NANA
Alternative,Hip-Hop,Reggaeton,Rock0.404833330.4234265781.045928146
Jazz,Reggae,Soul0.39300000NANA
Alternative,Hip-Hop,Pop0.379000000.1840934550.485734709
Folk,Indie,Pop,R&B0.376450000.4420124491.174159779
genre_loudness_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(loudness),
               list(loudness_mean = mean,
                    loudness_stdev = sd)) %>%
  mutate(loudness_cv = loudness_stdev / loudness_mean) %>%
  arrange(desc(loudness_mean))

genre_loudness_summary
ABCDEFGHIJ0123456789
genre
<chr>
loudness_mean
<dbl>
loudness_stdev
<dbl>
loudness_cv
<dbl>
Hip-Hop,Indie,R&B,Rap-2.5680000.85100000-0.33138629
Dance,Hip-Hop,Pop,Reggaeton-3.1393851.19665294-0.38117437
Alternative,Dance,Pop-3.3555000.69615537-0.20746696
Country,Pop,Rap,Rock-3.4706670.39548493-0.11395071
Pop,R&B,Rap,Rock-3.472000NANA
Folk,Indie,Ska-3.5300000.07495332-0.02123323
Dance,Reggaeton-3.5770001.33217917-0.37242918
Dance,Folk,Pop,R&B,Rock,Soul-3.6210000.000000000.00000000
Dance,Rap,Reggae-3.903000NANA
Dance,Pop,Rap,Reggae-4.0453330.90701727-0.22421323
genre_speechiness_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(speechiness),
               list(speechiness_mean = mean,
                    speechiness_stdev = sd)) %>%
  mutate(speechiness_cv = speechiness_stdev / speechiness_mean) %>%
  arrange(desc(speechiness_mean))

genre_speechiness_summary
ABCDEFGHIJ0123456789
genre
<chr>
speechiness_mean
<dbl>
speechiness_stdev
<dbl>
Alternative,Hip-Hop,Indie,Pop,R&B,Rap0.557000000.0080829038
Alternative,Hip-Hop,Indie,R&B,Rap0.360888890.1104441904
Pop,Rap,Reggae0.36000000NA
Country,Hip-Hop,Pop,Rap0.317500000.1137380030
Hip-Hop,Indie,Pop,Rap,Soul0.28500000NA
Country,Pop,Rap0.28300000NA
Country,Hip-Hop0.280783330.2306888330
Hip-Hop,Indie,R&B0.280000000.0367559519
Alternative,Hip-Hop,Jazz,Rap0.276943480.0832439031
Alternative,Folk,R&B0.273900000.2772435993
genre_tempo_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(tempo),
               list(tempo_mean = mean,
                    tempo_stdev = sd)) %>%
  mutate(tempo_cv = tempo_stdev / tempo_mean) %>%
  arrange(desc(tempo_mean))

genre_tempo_summary
ABCDEFGHIJ0123456789
genre
<chr>
tempo_mean
<dbl>
tempo_stdev
<dbl>
tempo_cv
<dbl>
Jazz,Pop202.08200NANA
Alternative,Reggaeton199.952000.000000000.000000000
Electronic,Folk,World180.09000NANA
Alternative,Dance,Electronic,Folk,Indie,Rock,World180.074000.000000000.000000000
Rap,Reggae172.9025029.553527920.170925972
Dance,World172.6170012.822372130.074282209
Dance,Folk,R&B,Soul169.470000.000000000.000000000
Alternative,Blues,Folk,Indie,Rock166.488000.000000000.000000000
Alternative,Pop,Reggae,Rock166.163000.000000000.000000000
Blues,Dance164.4425019.175534490.116609359
genre_valence_summary <- group_by(spotify_data_final, genre) %>%
  summarize_at(vars(valence),
               list(valence_mean = mean,
                    valence_stdev = sd)) %>%
  mutate(valence_cv = valence_stdev / valence_mean) %>%
  arrange(desc(valence_mean))

genre_valence_summary
ABCDEFGHIJ0123456789
genre
<chr>
valence_mean
<dbl>
valence_stdev
<dbl>
valence_cv
<dbl>
Blues,Pop0.9730000NANA
Dance,Folk,Pop,R&B,Rock,Soul0.96500000.00000000000.000000000
Dance,Folk,Pop0.9580000NANA
Blues,Folk,Pop,Rock0.95700000.00000000000.000000000
Blues,Pop,Soul0.93900000.03348631560.035661678
Blues,Pop,Rock,Soul0.93800000.00000000000.000000000
Pop,Reggae0.9310000NANA
Dance,Electronic,Folk,Indie,Rock0.92800000.00000000000.000000000
Alternative,Dance,Electronic,Indie,Pop,Rock0.91800000.00000000000.000000000
Alternative,Hip-Hop,Reggaeton0.88300000.00000000000.000000000

Merge audio features of genre into one dataframe

genres_data_list <- list(genres_df,
                        genre_popularity_summary,genre_acousticness_summary,
                        genre_danceability_summary,genre_duration_ms_summary,
                        genre_energy_summary,genre_liveness_summary,
                        genre_loudness_summary,genre_speechiness_summary,
                        genre_tempo_summary,genre_valence_summary)

#Merging multiple dataframes
my_merge <- function(df1, df2){
  merge(df1, df2, by = 'genre')
}

genres_df <-  Reduce(my_merge, genres_data_list)

genres_df
ABCDEFGHIJ0123456789
genre
<chr>
tracks_count
<int>
pop_mean
<dbl>
pop_stdev
<dbl>
Alternative382947.071047.2091946
Alternative,Blues9848.816336.8252819
Alternative,Blues,Country,Folk249.500002.1213203
Alternative,Blues,Country,Folk,Indie,Rock455.000002.5819889
Alternative,Blues,Dance446.500006.0277138
Alternative,Blues,Dance,Folk,Indie,Rock459.000002.4494897
Alternative,Blues,Dance,Folk,Rock253.000002.8284271
Alternative,Blues,Dance,Indie,Rock3157.064525.9717975
Alternative,Blues,Dance,Rock1153.545454.1319157
Alternative,Blues,Folk,Indie,Rock254.500002.1213203
colnames(genres_df)
##  [1] "genre"              "tracks_count"       "pop_mean"          
##  [4] "pop_stdev"          "pop_cv"             "acousticness_mean" 
##  [7] "acousticness_stdev" "acousticness_cv"    "danceability_mean" 
## [10] "danceability_stdev" "danceability_cv"    "duration_ms_mean"  
## [13] "duration_ms_stdev"  "duration_ms_cv"     "energy_mean"       
## [16] "energy_stdev"       "energy_cv"          "liveness_mean"     
## [19] "liveness_stdev"     "liveness_cv"        "loudness_mean"     
## [22] "loudness_stdev"     "loudness_cv"        "speechiness_mean"  
## [25] "speechiness_stdev"  "speechiness_cv"     "tempo_mean"        
## [28] "tempo_stdev"        "tempo_cv"           "valence_mean"      
## [31] "valence_stdev"      "valence_cv"
#Sort order was lost on merge, resorting
genres_df <- genres_df[order(-genres_df$pop_mean,
                             genres_df$tracks_count),]

#Remove row index column
row.names(genres_df) <- NULL

genres_df
ABCDEFGHIJ0123456789
genre
<chr>
tracks_count
<int>
pop_mean
<dbl>
pop_stdev
<dbl>
Folk,Pop,R&B,Soul177.00000NA
Pop,Reggae177.00000NA
Dance,Electronic,Pop,Rap274.500000.7071068
Alternative,Dance,Electronic,Indie,Pop,Rock274.000002.8284271
Pop,Reggaeton7073.271437.0153484
Hip-Hop,Pop,Reggaeton4872.854178.4198084
Dance,Hip-Hop,Pop,Reggaeton1372.615384.1137668
Alternative,Pop,Rock,Ska272.500002.1213203
Electronic,Rap272.500000.7071068
Pop,Rock,Soul472.500005.0000000

Detect how many music genre combinations are statistically significant (minimun of 100 tracks)

genres_df[genres_df$tracks_count >= 100,]
ABCDEFGHIJ0123456789
 
 
genre
<chr>
tracks_count
<int>
pop_mean
<dbl>
pop_stdev
<dbl>
pop_cv
<dbl>
30Dance,Pop,Rock12068.641676.4310120.09368962
41Pop134067.389557.8867380.11703206
42Dance,Pop,R&B36067.375008.2019050.12173513
45Hip-Hop,Pop,Rap159067.310696.5156310.09679934
47Pop,Rap35967.220068.6019380.12796684
50Folk,Pop,Rock10767.084115.3361030.07954346
55Pop,Rock48666.783957.0328120.10530692
57Hip-Hop,Pop,R&B,Rap12166.537195.9777940.08984140
60Dance,Pop185966.4728311.4925770.17289133
72Dance,Indie,Pop21165.919437.6965140.11675638

Visualizations of Music Genres

Irrelevant single genres removed: Movie, A Capella, Anime, Children's Music, Soundtrack, Comedy, Opera, and Classical.

Total count of single genre tracks after filtering irrelevant genres: 121,411.
Total count of combined genres tracks after filtering irrelevant genres: 73,489

Most combined genres tend to outperform tracks associated with only a single genre. In other words, tracks associated with more than one music genre tend to be more popular.

The chart below uses the same dataset as previous graph. A limited amount of single genres are displayed to preserve chart interpretability.

Let's examine how popular some audio features how and which single genres tend to be associated more with a particular audio feature (i.e. energy, danceability, duration, etc..)

Spotify uses the word “valence” to measure whether a song is likely to make someone feel happy (higher valence) or sad (lower valence).

Conclusion

Songs with elements of Dance, Pop, and Rock genres are the most likely to be profitable.

High correlation between danceability and popularity except with electronic music.

Beats-Per-Minute (BPM) range of 120-125 is recommended. This range is easy to dance to.

Song duration of ~3.5 minutes is recommended for most genres but for rap music ~4.5 minutes is advised.

Wether a song makes you feel happy or sad doesn't play a significant role except for reggaeton. Reggaeton songs that are too happy are not as popular.