From 7b153649bfb1825e62a23033a818b86470c4a5b8 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 20 May 2018 15:19:11 -0400 Subject: [PATCH] Implement online standard deviation algorithm Implemented Welford's method for calculating standard deviation as data points arrive. --- spotifyvis/views.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index b1154f9..8dd57e9 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -145,4 +145,22 @@ def get_features(track_id, token): if key not in useless_keys: features_dict[key] = val - return features_dict \ No newline at end of file + return features_dict + + +def update_std_dev(cur_mean, new_data_point, sample_size): + """Calculates the standard deviation for a sample without storing all data points + + Args: + cur_mean: the current mean for N = (sample_size - 1) + new_data_point: a new data point + sample_size: sample size including the new data point + + Returns: + (updated_mean, std_dev) + """ + # This is an implementationof Welford's method + # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/ + new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size + std_dev = (new_data_point - new_mean) * (new_data_point - cur_mean) + return new_mean, std_dev \ No newline at end of file