From 41eca1c891f282416312b762811bb6d41502e463 Mon Sep 17 00:00:00 2001
From: Chris Shyi <chrisshyi13@gmail.com>
Date: Fri, 18 May 2018 21:20:13 -0400
Subject: [PATCH 1/7] Get soundtrack musical features

Implemented get_features() to retrieve musical features for soundtracks
---
 spotifyvis/views.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/spotifyvis/views.py b/spotifyvis/views.py
index 5a4ee80..b1154f9 100644
--- a/spotifyvis/views.py
+++ b/spotifyvis/views.py
@@ -116,4 +116,33 @@ def user_data(request):
         'user_name': user_data_response['display_name'],
         'id': user_data_response['id'],
     }
-    return render(request, 'spotifyvis/user_data.html', context)
\ No newline at end of file
+    return render(request, 'spotifyvis/user_data.html', context)
+
+
+
+def get_features(track_id, token):
+    """Returns the features of a soundtrack
+
+    Args:
+        track_id: the id of the soundtrack, needed to query the Spotify API
+        token: an access token for the Spotify API
+ 
+    Returns:
+        A dictionary with the features as its keys
+    """
+
+    headers = {
+        'Authorization': token,
+    }
+    response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
+    features_dict = {}
+
+    # Data that we don't need
+    useless_keys = [ 
+        "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
+    ]
+    for key, val in response.items():
+        if key not in useless_keys:
+            features_dict[key] = val
+
+    return features_dict
\ No newline at end of file

From 7b153649bfb1825e62a23033a818b86470c4a5b8 Mon Sep 17 00:00:00 2001
From: Chris Shyi <chris.shyi@mail.utoronto.ca>
Date: Sun, 20 May 2018 15:19:11 -0400
Subject: [PATCH 2/7] Implement online standard deviation algorithm

Implemented Welford's method for calculating standard deviation as data
points arrive.
---
 spotifyvis/views.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/spotifyvis/views.py b/spotifyvis/views.py
index b1154f9..8dd57e9 100644
--- a/spotifyvis/views.py
+++ b/spotifyvis/views.py
@@ -145,4 +145,22 @@ def get_features(track_id, token):
         if key not in useless_keys:
             features_dict[key] = val
 
-    return features_dict
\ No newline at end of file
+    return features_dict
+
+
+def update_std_dev(cur_mean, new_data_point, sample_size):
+    """Calculates the standard deviation for a sample without storing all data points
+
+    Args:
+        cur_mean: the current mean for N = (sample_size - 1)
+        new_data_point: a new data point
+        sample_size: sample size including the new data point
+    
+    Returns:
+        (updated_mean, std_dev)
+    """
+    # This is an implementationof Welford's method
+    # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
+    new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
+    std_dev = (new_data_point - new_mean) * (new_data_point - cur_mean)
+    return new_mean, std_dev
\ No newline at end of file

From bbc713e72953ea290bfbf8547d8389d6aedd6005 Mon Sep 17 00:00:00 2001
From: Chris Shyi <chris.shyi@mail.utoronto.ca>
Date: Sun, 20 May 2018 15:55:59 -0400
Subject: [PATCH 3/7] Implement audio feature helper functions

Implemented helper functions for updating audio feature statistics.
---
 spotifyvis/views.py | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/spotifyvis/views.py b/spotifyvis/views.py
index 8dd57e9..895e040 100644
--- a/spotifyvis/views.py
+++ b/spotifyvis/views.py
@@ -120,20 +120,17 @@ def user_data(request):
 
 
 
-def get_features(track_id, token):
-    """Returns the features of a soundtrack
+def get_audio_features(track_id, headers):
+    """Returns the audio features of a soundtrack
 
     Args:
         track_id: the id of the soundtrack, needed to query the Spotify API
-        token: an access token for the Spotify API
+        headers: headers containing the API token
  
     Returns:
         A dictionary with the features as its keys
     """
-
-    headers = {
-        'Authorization': token,
-    }
+    
     response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
     features_dict = {}
 
@@ -163,4 +160,31 @@ def update_std_dev(cur_mean, new_data_point, sample_size):
     # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
     new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
     std_dev = (new_data_point - new_mean) * (new_data_point - cur_mean)
-    return new_mean, std_dev
\ No newline at end of file
+    return new_mean, std_dev
+
+
+def update_audio_feature_stats(feature, new_data_point, sample_size):
+    """Updates the audio feature statistics in library_stats
+
+    Args:
+        feature: the audio feature to be updated (string)
+        new_data_point: new data to update the stats with
+        sample_size: sample size including the new data point
+    
+    Returns:
+        None
+    """
+    # first time the feature is considered
+    if sample_size < 2:
+        library_stats['audio_features'][feature] = {
+            "average": new_data_point,
+            "std_dev": 0,
+        }
+
+    else:
+        current_mean = library_stats['audio_features'][feature]['average']
+        updated_mean, std_dev = update_std_dev(current_mean, new_data_point, sample_size)
+
+        library_stats['audio_features'][feature]['average'] = updated_mean
+        library_stats['audio_features'][feature]['std_dev'] = std_dev
+

From d7002f75711ec2ef7992d26d2d53a7ef7fd5f4b4 Mon Sep 17 00:00:00 2001
From: Chris Shyi <chris.shyi@mail.utoronto.ca>
Date: Sun, 20 May 2018 16:28:46 -0400
Subject: [PATCH 4/7] Implement audio feature statistics calculation

Implemented the calculation average and standard deviation of audio features.
---
 spotifyvis/views.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/spotifyvis/views.py b/spotifyvis/views.py
index e9ca559..5aa780c 100644
--- a/spotifyvis/views.py
+++ b/spotifyvis/views.py
@@ -168,11 +168,12 @@ def get_audio_features(track_id, headers):
     return features_dict
 
 
-def update_std_dev(cur_mean, new_data_point, sample_size):
+def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
     """Calculates the standard deviation for a sample without storing all data points
 
     Args:
         cur_mean: the current mean for N = (sample_size - 1)
+        cur_std_dev: the current standard deviation for N = (sample_size - 1)
         new_data_point: a new data point
         sample_size: sample size including the new data point
     
@@ -182,8 +183,12 @@ def update_std_dev(cur_mean, new_data_point, sample_size):
     # This is an implementationof Welford's method
     # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
     new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
-    std_dev = (new_data_point - new_mean) * (new_data_point - cur_mean)
-    return new_mean, std_dev
+    delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean)
+    new_std_dev = math.sqrt(
+        (math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / (
+        sample_size - 1
+    ))
+    return new_mean, new_std_dev
 
 
 def update_audio_feature_stats(feature, new_data_point, sample_size):
@@ -203,13 +208,13 @@ def update_audio_feature_stats(feature, new_data_point, sample_size):
             "average": new_data_point,
             "std_dev": 0,
         }
-
     else:
         current_mean = library_stats['audio_features'][feature]['average']
-        updated_mean, std_dev = update_std_dev(current_mean, new_data_point, sample_size)
+        cur_std_dev = library_stats['audio_features'][feature]['std_dev']
+        updated_mean, new_std_dev = update_std_dev(current_mean, cur_std_dev, new_data_point, sample_size)
 
         library_stats['audio_features'][feature]['average'] = updated_mean
-        library_stats['audio_features'][feature]['std_dev'] = std_dev
+        library_stats['audio_features'][feature]['std_dev'] = new_std_dev
 
 
 #  parse_library {{{ # 
@@ -228,12 +233,19 @@ def parse_library(headers, tracks):
     # keeps track of point to get songs from
     offset = 0
     payload = {'limit': str(limit)}
-    for i in range(0, tracks, limit):
+    for _ in range(0, tracks, limit):
         payload['offset'] = str(offset)
         saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
+        num_samples = offset
         for track_dict in saved_tracks_response['items']:
+            # Track the number of samples for calculating
+            # audio feature averages and standard deviations on the fly
+            num_samples += 1 
             get_track_info(track_dict['track'])
             #  get_genre(headers, track_dict['track']['album']['id'])
+            audio_features_dict = get_audio_features(track_dict['id'], headers)
+            for feature, feature_data in audio_features_dict.items():
+                update_audio_feature_stats(feature, feature_data, num_samples)
             for artist_dict in track_dict['track']['artists']:
                 increase_artist_count(headers, artist_dict['name'], artist_dict['id'])
         # calculates num_songs with offset + songs retrieved

From a5780387e108a66e7aa9fd7e60c5598f3c7a887b Mon Sep 17 00:00:00 2001
From: Chris Shyi <chris.shyi@mail.utoronto.ca>
Date: Sun, 20 May 2018 16:31:51 -0400
Subject: [PATCH 5/7] Made minor changes to variable names

---
 spotifyvis/views.py | 86 ++++++++++++++++++++++-----------------------
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/spotifyvis/views.py b/spotifyvis/views.py
index 5aa780c..01edd75 100644
--- a/spotifyvis/views.py
+++ b/spotifyvis/views.py
@@ -143,6 +143,45 @@ def user_data(request):
 
 #  }}} user_data  # 
 
+#  parse_library {{{ # 
+
+def parse_library(headers, tracks):
+    """Scans user's library for certain number of tracks to update library_stats with.
+
+    :headers: For API call.
+    :tracks: Number of tracks to get from user's library.
+    :returns: None
+
+    """
+    #  TODO: implement importing entire library with 0 as tracks param
+    # number of tracks to get with each call
+    limit = 5
+    # keeps track of point to get songs from
+    offset = 0
+    payload = {'limit': str(limit)}
+    for _ in range(0, tracks, limit):
+        payload['offset'] = str(offset)
+        saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
+        num_samples = offset
+        for track_dict in saved_tracks_response['items']:
+            # Track the number of samples for calculating
+            # audio feature averages and standard deviations on the fly
+            num_samples += 1 
+            get_track_info(track_dict['track'])
+            #  get_genre(headers, track_dict['track']['album']['id'])
+            audio_features_dict = get_audio_features(track_dict['id'], headers)
+            for feature, feature_data in audio_features_dict.items():
+                update_audio_feature_stats(feature, feature_data, num_samples)
+            for artist_dict in track_dict['track']['artists']:
+                increase_artist_count(headers, artist_dict['name'], artist_dict['id'])
+        # calculates num_songs with offset + songs retrieved
+        library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
+        offset += limit
+    calculate_genres_from_artists(headers)
+    pprint.pprint(library_stats)
+
+#  }}} parse_library # 
+
 def get_audio_features(track_id, headers):
     """Returns the audio features of a soundtrack
 
@@ -178,7 +217,7 @@ def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
         sample_size: sample size including the new data point
     
     Returns:
-        (updated_mean, std_dev)
+        (new_mean, new_std_dev)
     """
     # This is an implementationof Welford's method
     # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
@@ -209,53 +248,14 @@ def update_audio_feature_stats(feature, new_data_point, sample_size):
             "std_dev": 0,
         }
     else:
-        current_mean = library_stats['audio_features'][feature]['average']
+        cur_mean = library_stats['audio_features'][feature]['average']
         cur_std_dev = library_stats['audio_features'][feature]['std_dev']
-        updated_mean, new_std_dev = update_std_dev(current_mean, cur_std_dev, new_data_point, sample_size)
+        new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size)
 
-        library_stats['audio_features'][feature]['average'] = updated_mean
+        library_stats['audio_features'][feature]['average'] = new_mean
         library_stats['audio_features'][feature]['std_dev'] = new_std_dev
 
 
-#  parse_library {{{ # 
-
-def parse_library(headers, tracks):
-    """Scans user's library for certain number of tracks to update library_stats with.
-
-    :headers: For API call.
-    :tracks: Number of tracks to get from user's library.
-    :returns: None
-
-    """
-    #  TODO: implement importing entire library with 0 as tracks param
-    # number of tracks to get with each call
-    limit = 5
-    # keeps track of point to get songs from
-    offset = 0
-    payload = {'limit': str(limit)}
-    for _ in range(0, tracks, limit):
-        payload['offset'] = str(offset)
-        saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
-        num_samples = offset
-        for track_dict in saved_tracks_response['items']:
-            # Track the number of samples for calculating
-            # audio feature averages and standard deviations on the fly
-            num_samples += 1 
-            get_track_info(track_dict['track'])
-            #  get_genre(headers, track_dict['track']['album']['id'])
-            audio_features_dict = get_audio_features(track_dict['id'], headers)
-            for feature, feature_data in audio_features_dict.items():
-                update_audio_feature_stats(feature, feature_data, num_samples)
-            for artist_dict in track_dict['track']['artists']:
-                increase_artist_count(headers, artist_dict['name'], artist_dict['id'])
-        # calculates num_songs with offset + songs retrieved
-        library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
-        offset += limit
-    calculate_genres_from_artists(headers)
-    pprint.pprint(library_stats)
-
-#  }}} parse_library # 
-
 #  increase_nested_key {{{ # 
 
 def increase_nested_key(top_key, nested_key, amount=1):

From 4066c96c94d3f12d029ac787ea9839679d224cbb Mon Sep 17 00:00:00 2001
From: Chris Shyi <chris.shyi@mail.utoronto.ca>
Date: Sun, 20 May 2018 16:35:19 -0400
Subject: [PATCH 6/7] Changed signature of get_audio_features() for consistency

Switched around the ordering of the parameters so that they are
consistent with other data acquisition helper functions.
---
 spotifyvis/views.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spotifyvis/views.py b/spotifyvis/views.py
index 01edd75..53280e0 100644
--- a/spotifyvis/views.py
+++ b/spotifyvis/views.py
@@ -182,13 +182,13 @@ def parse_library(headers, tracks):
 
 #  }}} parse_library # 
 
-def get_audio_features(track_id, headers):
+def get_audio_features(headers, track_id):
     """Returns the audio features of a soundtrack
 
     Args:
-        track_id: the id of the soundtrack, needed to query the Spotify API
         headers: headers containing the API token
- 
+        track_id: the id of the soundtrack, needed to query the Spotify API
+        
     Returns:
         A dictionary with the features as its keys
     """

From ec3fb3f959e1060023962b02c5e2f54dd8f07d6c Mon Sep 17 00:00:00 2001
From: Chris Shyi <chris.shyi@mail.utoronto.ca>
Date: Sun, 20 May 2018 16:37:02 -0400
Subject: [PATCH 7/7] Fix get_audio_features() bug

Fixed bug where the parameters passed in the wrong order for
get_audio_features().
---
 spotifyvis/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spotifyvis/views.py b/spotifyvis/views.py
index 53280e0..c55cee2 100644
--- a/spotifyvis/views.py
+++ b/spotifyvis/views.py
@@ -169,7 +169,7 @@ def parse_library(headers, tracks):
             num_samples += 1 
             get_track_info(track_dict['track'])
             #  get_genre(headers, track_dict['track']['album']['id'])
-            audio_features_dict = get_audio_features(track_dict['id'], headers)
+            audio_features_dict = get_audio_features(headers, track_dict['id'])
             for feature, feature_data in audio_features_dict.items():
                 update_audio_feature_stats(feature, feature_data, num_samples)
             for artist_dict in track_dict['track']['artists']: