From 41eca1c891f282416312b762811bb6d41502e463 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Fri, 18 May 2018 21:20:13 -0400 Subject: [PATCH 1/7] Get soundtrack musical features Implemented get_features() to retrieve musical features for soundtracks --- spotifyvis/views.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index 5a4ee80..b1154f9 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -116,4 +116,33 @@ def user_data(request): 'user_name': user_data_response['display_name'], 'id': user_data_response['id'], } - return render(request, 'spotifyvis/user_data.html', context) \ No newline at end of file + return render(request, 'spotifyvis/user_data.html', context) + + + +def get_features(track_id, token): + """Returns the features of a soundtrack + + Args: + track_id: the id of the soundtrack, needed to query the Spotify API + token: an access token for the Spotify API + + Returns: + A dictionary with the features as its keys + """ + + headers = { + 'Authorization': token, + } + response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json() + features_dict = {} + + # Data that we don't need + useless_keys = [ + "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature", + ] + for key, val in response.items(): + if key not in useless_keys: + features_dict[key] = val + + return features_dict \ No newline at end of file From 7b153649bfb1825e62a23033a818b86470c4a5b8 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 20 May 2018 15:19:11 -0400 Subject: [PATCH 2/7] Implement online standard deviation algorithm Implemented Welford's method for calculating standard deviation as data points arrive. --- spotifyvis/views.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index b1154f9..8dd57e9 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -145,4 +145,22 @@ def get_features(track_id, token): if key not in useless_keys: features_dict[key] = val - return features_dict \ No newline at end of file + return features_dict + + +def update_std_dev(cur_mean, new_data_point, sample_size): + """Calculates the standard deviation for a sample without storing all data points + + Args: + cur_mean: the current mean for N = (sample_size - 1) + new_data_point: a new data point + sample_size: sample size including the new data point + + Returns: + (updated_mean, std_dev) + """ + # This is an implementationof Welford's method + # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/ + new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size + std_dev = (new_data_point - new_mean) * (new_data_point - cur_mean) + return new_mean, std_dev \ No newline at end of file From bbc713e72953ea290bfbf8547d8389d6aedd6005 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 20 May 2018 15:55:59 -0400 Subject: [PATCH 3/7] Implement audio feature helper functions Implemented helper functions for updating audio feature statistics. --- spotifyvis/views.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index 8dd57e9..895e040 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -120,20 +120,17 @@ def user_data(request): -def get_features(track_id, token): - """Returns the features of a soundtrack +def get_audio_features(track_id, headers): + """Returns the audio features of a soundtrack Args: track_id: the id of the soundtrack, needed to query the Spotify API - token: an access token for the Spotify API + headers: headers containing the API token Returns: A dictionary with the features as its keys """ - - headers = { - 'Authorization': token, - } + response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json() features_dict = {} @@ -163,4 +160,31 @@ def update_std_dev(cur_mean, new_data_point, sample_size): # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/ new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size std_dev = (new_data_point - new_mean) * (new_data_point - cur_mean) - return new_mean, std_dev \ No newline at end of file + return new_mean, std_dev + + +def update_audio_feature_stats(feature, new_data_point, sample_size): + """Updates the audio feature statistics in library_stats + + Args: + feature: the audio feature to be updated (string) + new_data_point: new data to update the stats with + sample_size: sample size including the new data point + + Returns: + None + """ + # first time the feature is considered + if sample_size < 2: + library_stats['audio_features'][feature] = { + "average": new_data_point, + "std_dev": 0, + } + + else: + current_mean = library_stats['audio_features'][feature]['average'] + updated_mean, std_dev = update_std_dev(current_mean, new_data_point, sample_size) + + library_stats['audio_features'][feature]['average'] = updated_mean + library_stats['audio_features'][feature]['std_dev'] = std_dev + From d7002f75711ec2ef7992d26d2d53a7ef7fd5f4b4 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 20 May 2018 16:28:46 -0400 Subject: [PATCH 4/7] Implement audio feature statistics calculation Implemented the calculation average and standard deviation of audio features. --- spotifyvis/views.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index e9ca559..5aa780c 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -168,11 +168,12 @@ def get_audio_features(track_id, headers): return features_dict -def update_std_dev(cur_mean, new_data_point, sample_size): +def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size): """Calculates the standard deviation for a sample without storing all data points Args: cur_mean: the current mean for N = (sample_size - 1) + cur_std_dev: the current standard deviation for N = (sample_size - 1) new_data_point: a new data point sample_size: sample size including the new data point @@ -182,8 +183,12 @@ def update_std_dev(cur_mean, new_data_point, sample_size): # This is an implementationof Welford's method # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/ new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size - std_dev = (new_data_point - new_mean) * (new_data_point - cur_mean) - return new_mean, std_dev + delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean) + new_std_dev = math.sqrt( + (math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / ( + sample_size - 1 + )) + return new_mean, new_std_dev def update_audio_feature_stats(feature, new_data_point, sample_size): @@ -203,13 +208,13 @@ def update_audio_feature_stats(feature, new_data_point, sample_size): "average": new_data_point, "std_dev": 0, } - else: current_mean = library_stats['audio_features'][feature]['average'] - updated_mean, std_dev = update_std_dev(current_mean, new_data_point, sample_size) + cur_std_dev = library_stats['audio_features'][feature]['std_dev'] + updated_mean, new_std_dev = update_std_dev(current_mean, cur_std_dev, new_data_point, sample_size) library_stats['audio_features'][feature]['average'] = updated_mean - library_stats['audio_features'][feature]['std_dev'] = std_dev + library_stats['audio_features'][feature]['std_dev'] = new_std_dev # parse_library {{{ # @@ -228,12 +233,19 @@ def parse_library(headers, tracks): # keeps track of point to get songs from offset = 0 payload = {'limit': str(limit)} - for i in range(0, tracks, limit): + for _ in range(0, tracks, limit): payload['offset'] = str(offset) saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json() + num_samples = offset for track_dict in saved_tracks_response['items']: + # Track the number of samples for calculating + # audio feature averages and standard deviations on the fly + num_samples += 1 get_track_info(track_dict['track']) # get_genre(headers, track_dict['track']['album']['id']) + audio_features_dict = get_audio_features(track_dict['id'], headers) + for feature, feature_data in audio_features_dict.items(): + update_audio_feature_stats(feature, feature_data, num_samples) for artist_dict in track_dict['track']['artists']: increase_artist_count(headers, artist_dict['name'], artist_dict['id']) # calculates num_songs with offset + songs retrieved From a5780387e108a66e7aa9fd7e60c5598f3c7a887b Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 20 May 2018 16:31:51 -0400 Subject: [PATCH 5/7] Made minor changes to variable names --- spotifyvis/views.py | 86 ++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index 5aa780c..01edd75 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -143,6 +143,45 @@ def user_data(request): # }}} user_data # +# parse_library {{{ # + +def parse_library(headers, tracks): + """Scans user's library for certain number of tracks to update library_stats with. + + :headers: For API call. + :tracks: Number of tracks to get from user's library. + :returns: None + + """ + # TODO: implement importing entire library with 0 as tracks param + # number of tracks to get with each call + limit = 5 + # keeps track of point to get songs from + offset = 0 + payload = {'limit': str(limit)} + for _ in range(0, tracks, limit): + payload['offset'] = str(offset) + saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json() + num_samples = offset + for track_dict in saved_tracks_response['items']: + # Track the number of samples for calculating + # audio feature averages and standard deviations on the fly + num_samples += 1 + get_track_info(track_dict['track']) + # get_genre(headers, track_dict['track']['album']['id']) + audio_features_dict = get_audio_features(track_dict['id'], headers) + for feature, feature_data in audio_features_dict.items(): + update_audio_feature_stats(feature, feature_data, num_samples) + for artist_dict in track_dict['track']['artists']: + increase_artist_count(headers, artist_dict['name'], artist_dict['id']) + # calculates num_songs with offset + songs retrieved + library_stats['num_songs'] = offset + len(saved_tracks_response['items']) + offset += limit + calculate_genres_from_artists(headers) + pprint.pprint(library_stats) + +# }}} parse_library # + def get_audio_features(track_id, headers): """Returns the audio features of a soundtrack @@ -178,7 +217,7 @@ def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size): sample_size: sample size including the new data point Returns: - (updated_mean, std_dev) + (new_mean, new_std_dev) """ # This is an implementationof Welford's method # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/ @@ -209,53 +248,14 @@ def update_audio_feature_stats(feature, new_data_point, sample_size): "std_dev": 0, } else: - current_mean = library_stats['audio_features'][feature]['average'] + cur_mean = library_stats['audio_features'][feature]['average'] cur_std_dev = library_stats['audio_features'][feature]['std_dev'] - updated_mean, new_std_dev = update_std_dev(current_mean, cur_std_dev, new_data_point, sample_size) + new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size) - library_stats['audio_features'][feature]['average'] = updated_mean + library_stats['audio_features'][feature]['average'] = new_mean library_stats['audio_features'][feature]['std_dev'] = new_std_dev -# parse_library {{{ # - -def parse_library(headers, tracks): - """Scans user's library for certain number of tracks to update library_stats with. - - :headers: For API call. - :tracks: Number of tracks to get from user's library. - :returns: None - - """ - # TODO: implement importing entire library with 0 as tracks param - # number of tracks to get with each call - limit = 5 - # keeps track of point to get songs from - offset = 0 - payload = {'limit': str(limit)} - for _ in range(0, tracks, limit): - payload['offset'] = str(offset) - saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json() - num_samples = offset - for track_dict in saved_tracks_response['items']: - # Track the number of samples for calculating - # audio feature averages and standard deviations on the fly - num_samples += 1 - get_track_info(track_dict['track']) - # get_genre(headers, track_dict['track']['album']['id']) - audio_features_dict = get_audio_features(track_dict['id'], headers) - for feature, feature_data in audio_features_dict.items(): - update_audio_feature_stats(feature, feature_data, num_samples) - for artist_dict in track_dict['track']['artists']: - increase_artist_count(headers, artist_dict['name'], artist_dict['id']) - # calculates num_songs with offset + songs retrieved - library_stats['num_songs'] = offset + len(saved_tracks_response['items']) - offset += limit - calculate_genres_from_artists(headers) - pprint.pprint(library_stats) - -# }}} parse_library # - # increase_nested_key {{{ # def increase_nested_key(top_key, nested_key, amount=1): From 4066c96c94d3f12d029ac787ea9839679d224cbb Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 20 May 2018 16:35:19 -0400 Subject: [PATCH 6/7] Changed signature of get_audio_features() for consistency Switched around the ordering of the parameters so that they are consistent with other data acquisition helper functions. --- spotifyvis/views.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index 01edd75..53280e0 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -182,13 +182,13 @@ def parse_library(headers, tracks): # }}} parse_library # -def get_audio_features(track_id, headers): +def get_audio_features(headers, track_id): """Returns the audio features of a soundtrack Args: - track_id: the id of the soundtrack, needed to query the Spotify API headers: headers containing the API token - + track_id: the id of the soundtrack, needed to query the Spotify API + Returns: A dictionary with the features as its keys """ From ec3fb3f959e1060023962b02c5e2f54dd8f07d6c Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 20 May 2018 16:37:02 -0400 Subject: [PATCH 7/7] Fix get_audio_features() bug Fixed bug where the parameters passed in the wrong order for get_audio_features(). --- spotifyvis/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index 53280e0..c55cee2 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -169,7 +169,7 @@ def parse_library(headers, tracks): num_samples += 1 get_track_info(track_dict['track']) # get_genre(headers, track_dict['track']['album']['id']) - audio_features_dict = get_audio_features(track_dict['id'], headers) + audio_features_dict = get_audio_features(headers, track_dict['id']) for feature, feature_data in audio_features_dict.items(): update_audio_feature_stats(feature, feature_data, num_samples) for artist_dict in track_dict['track']['artists']: