Graphs and tables for your Spotify account.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

293 lines
11 KiB

  1. import requests
  2. import math
  3. import pprint
  4. # parse_library {{{ #
  5. def parse_library(headers, tracks, library_stats):
  6. """Scans user's library for certain number of tracks to update library_stats with.
  7. :headers: For API call.
  8. :tracks: Number of tracks to get from user's library.
  9. :library_stats: Dictionary containing the data mined from user's library
  10. :returns: None
  11. """
  12. # TODO: implement importing entire library with 0 as tracks param
  13. # number of tracks to get with each call
  14. limit = 5
  15. # keeps track of point to get songs from
  16. offset = 0
  17. payload = {'limit': str(limit)}
  18. for _ in range(0, tracks, limit):
  19. payload['offset'] = str(offset)
  20. saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
  21. num_samples = offset
  22. for track_dict in saved_tracks_response['items']:
  23. # Track the number of samples for calculating
  24. # audio feature averages and standard deviations on the fly
  25. get_track_info(track_dict['track'], library_stats, num_samples)
  26. # get_genre(headers, track_dict['track']['album']['id'])
  27. audio_features_dict = get_audio_features(headers, track_dict['track']['id'])
  28. if len(audio_features_dict) != 0:
  29. num_samples += 1
  30. for feature, feature_data in audio_features_dict.items():
  31. update_audio_feature_stats(feature, feature_data, num_samples, library_stats)
  32. for artist_dict in track_dict['track']['artists']:
  33. increase_artist_count(headers, artist_dict['name'], artist_dict['id'], library_stats)
  34. # calculates num_songs with offset + songs retrieved
  35. library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
  36. offset += limit
  37. calculate_genres_from_artists(headers, library_stats)
  38. pprint.pprint(library_stats)
  39. # }}} parse_library #
  40. def get_audio_features(headers, track_id):
  41. """Returns the audio features of a soundtrack
  42. Args:
  43. headers: headers containing the API token
  44. track_id: the id of the soundtrack, needed to query the Spotify API
  45. Returns:
  46. A dictionary with the features as its keys, if audio feature data is missing for the track,
  47. an empty dictionary is returned.
  48. """
  49. response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
  50. if 'error' in response:
  51. return {}
  52. features_dict = {}
  53. # Data that we don't need
  54. useless_keys = [
  55. "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
  56. ]
  57. for key, val in response.items():
  58. if key not in useless_keys:
  59. features_dict[key] = val
  60. return features_dict
  61. def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
  62. """Calculates the standard deviation for a sample without storing all data points
  63. Args:
  64. cur_mean: the current mean for N = (sample_size - 1)
  65. cur_std_dev: the current standard deviation for N = (sample_size - 1)
  66. new_data_point: a new data point
  67. sample_size: sample size including the new data point
  68. Returns:
  69. (new_mean, new_std_dev)
  70. """
  71. # This is an implementation of Welford's method
  72. # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
  73. new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
  74. delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean)
  75. new_std_dev = math.sqrt(
  76. (math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / (
  77. sample_size - 1
  78. ))
  79. return new_mean, new_std_dev
  80. def update_audio_feature_stats(feature, new_data_point, sample_size, library_stats):
  81. """Updates the audio feature statistics in library_stats
  82. Args:
  83. feature: the audio feature to be updated (string)
  84. new_data_point: new data to update the stats with
  85. sample_size: sample size including the new data point
  86. library_stats Dictionary containing the data mined from user's Spotify library
  87. Returns:
  88. None
  89. """
  90. # first time the feature is considered
  91. if sample_size < 2:
  92. library_stats['audio_features'][feature] = {
  93. "average": new_data_point,
  94. "std_dev": 0,
  95. }
  96. else:
  97. cur_mean = library_stats['audio_features'][feature]['average']
  98. cur_std_dev = library_stats['audio_features'][feature]['std_dev']
  99. new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size)
  100. library_stats['audio_features'][feature] = {
  101. "average": new_mean,
  102. "std_dev": new_std_dev
  103. }
  104. # increase_nested_key {{{ #
  105. def increase_nested_key(top_key, nested_key, library_stats, amount=1):
  106. """Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes
  107. appropriate action.
  108. :top_key: First key of library_stats.
  109. :nested_key: Key in top_key's dict for which we want to increase value of.
  110. :library_stats: Dictionary containing the data mined from user's Spotify library
  111. :returns: None
  112. """
  113. if nested_key not in library_stats[top_key]:
  114. library_stats[top_key][nested_key] = amount
  115. else:
  116. library_stats[top_key][nested_key] += amount
  117. # }}} increase_nested_key #
  118. # increase_artist_count {{{ #
  119. def increase_artist_count(headers, artist_name, artist_id, library_stats):
  120. """Increases count for artist in library_stats and stores the artist_id.
  121. :headers: For making the API call.
  122. :artist_name: Artist to increase count for.
  123. :artist_id: The Spotify ID for the artist.
  124. :library_stats: Dictionary containing the data mined from user's Spotify library
  125. :returns: None
  126. """
  127. if artist_name not in library_stats['artists']:
  128. library_stats['artists'][artist_name] = {}
  129. library_stats['artists'][artist_name]['count'] = 1
  130. library_stats['artists'][artist_name]['id'] = artist_id
  131. else:
  132. library_stats['artists'][artist_name]['count'] += 1
  133. # }}} increase_artist_count #
  134. def update_popularity_stats(new_data_point, library_stats, sample_size):
  135. """Updates the popularity statistics in library_stats
  136. Args:
  137. new_data_point: new data to update the popularity stats with
  138. library_stats: Dictionary containing data mined from user's Spotify library
  139. sample_size: The sample size including the new data
  140. Returns:
  141. None
  142. """
  143. if sample_size < 2:
  144. library_stats['popularity'] = {
  145. "average": new_data_point,
  146. "std_dev": 0,
  147. }
  148. else :
  149. cur_mean_popularity = library_stats['popularity']['average']
  150. cur_popularity_stdev = library_stats['popularity']['std_dev']
  151. new_mean, new_std_dev = update_std_dev(
  152. cur_mean_popularity, cur_popularity_stdev, new_data_point, sample_size)
  153. library_stats['popularity'] = {
  154. "average": new_mean,
  155. "std_dev": new_std_dev,
  156. }
  157. # get_track_info {{{ #
  158. def get_track_info(track_dict, library_stats, sample_size):
  159. """Get all the info from the track_dict directly returned by the API call in parse_library.
  160. :track_dict: Dict returned from the API call containing the track info.
  161. :library_stats: Dictionary containing the data mined from user's Spotify library
  162. :sample_size: The sample size so far including this track
  163. :returns: None
  164. """
  165. # popularity
  166. update_popularity_stats(track_dict['popularity'], library_stats, sample_size)
  167. # year
  168. year_released = track_dict['album']['release_date'].split('-')[0]
  169. increase_nested_key('year_released', year_released, library_stats)
  170. # artist
  171. # artist_names = [artist['name'] for artist in track_dict['artists']]
  172. # for artist_name in artist_names:
  173. # increase_nested_key('artists', artist_name)
  174. # runtime
  175. library_stats['total_runtime'] += float(track_dict['duration_ms']) / (1000 * 60)
  176. # }}} get_track_info #
  177. # calculate_genres_from_artists {{{ #
  178. def calculate_genres_from_artists(headers, library_stats):
  179. """Tallies up genre counts based on artists in library_stats.
  180. :headers: For making the API call.
  181. :library_stats: Dictionary containing the data mined from user's Spotify library
  182. :returns: None
  183. """
  184. for artist_entry in library_stats['artists'].values():
  185. artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_entry['id'], headers=headers).json()
  186. # increase each genre count by artist count
  187. for genre in artist_response['genres']:
  188. increase_nested_key('genres', genre, library_stats, artist_entry['count'])
  189. # }}} calculate_genres_from_artists #
  190. def process_library_stats(library_stats):
  191. """Processes library_stats into format more suitable for D3 consumption
  192. Args:
  193. library_stats: Dictionary containing the data mined from user's Spotify library
  194. Returns:
  195. A new dictionary that contains the data in library_stats, in a format more suitable for D3 consumption
  196. """
  197. processed_library_stats = {}
  198. for key in library_stats:
  199. if key == 'artists' or key == 'genres' or key == 'year_released':
  200. for inner_key in library_stats[key]:
  201. if key not in processed_library_stats:
  202. processed_library_stats[key] = []
  203. processed_item_key = '' # identifier key for each dict in the list
  204. count = 0
  205. if 'artist' in key:
  206. processed_item_key = 'name'
  207. count = library_stats[key][inner_key]['count']
  208. elif 'genre' in key:
  209. processed_item_key = 'genre'
  210. count = library_stats[key][inner_key]
  211. else:
  212. processed_item_key = 'year'
  213. count = library_stats[key][inner_key]
  214. processed_library_stats[key].append({
  215. processed_item_key: inner_key,
  216. "count": count
  217. })
  218. elif key == 'audio_features':
  219. for audio_feature in library_stats[key]:
  220. if 'audio_features' not in processed_library_stats:
  221. processed_library_stats['audio_features'] = []
  222. processed_library_stats['audio_features'].append({
  223. 'feature': audio_feature,
  224. 'average': library_stats[key][audio_feature]['average'],
  225. 'std_dev': library_stats[key][audio_feature]['std_dev']
  226. })
  227. # TODO: Not sure about final form for 'popularity'
  228. # elif key == 'popularity':
  229. # processed_library_stats[key] = []
  230. # processed_library_stats[key].append({
  231. # })
  232. elif key == 'num_songs' or key == 'total_runtime' or key == 'popularity':
  233. processed_library_stats[key] = library_stats[key]
  234. return processed_library_stats