Graphs and tables for your Spotify account.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

410 lines
15 KiB

  1. # imports {{{ #
  2. import requests
  3. import math
  4. import pprint
  5. from .models import Artist, User, Track, AudioFeatures
  6. from django.db.models import Count
  7. from django.http import JsonResponse
  8. from django.core import serializers
  9. # }}} imports #
  10. # parse_library {{{ #
  11. def parse_library(headers, tracks, library_stats, user):
  12. """Scans user's library for certain number of tracks to update library_stats with.
  13. :headers: For API call.
  14. :tracks: Number of tracks to get from user's library.
  15. :library_stats: Dictionary containing the data mined from user's library
  16. :user: a User object representing the user whose library we are parsing
  17. :returns: None
  18. """
  19. # TODO: implement importing entire library with 0 as tracks param
  20. # number of tracks to get with each call
  21. limit = 5
  22. # keeps track of point to get songs from
  23. offset = 0
  24. payload = {'limit': str(limit)}
  25. # use two separate variables to track, because the average popularity also requires num_samples
  26. num_samples = 0 # number of actual track samples
  27. feature_data_points = 0 # number of feature data analyses (some tracks do not have analyses available)
  28. # iterate until hit requested num of tracks
  29. for _ in range(0, tracks, limit):
  30. payload['offset'] = str(offset)
  31. # get current set of tracks
  32. saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
  33. # TODO: refactor the for loop body into helper function
  34. # iterate through each track
  35. for track_dict in saved_tracks_response['items']:
  36. num_samples += 1
  37. # update artist info before track so that Track object can reference
  38. # Artist object
  39. track_artists = []
  40. for artist_dict in track_dict['track']['artists']:
  41. increase_artist_count(headers, artist_dict['name'],
  42. artist_dict['id'], library_stats)
  43. track_artists.append(Artist.objects.get_or_create(
  44. artist_id=artist_dict['id'],
  45. name=artist_dict['name'],
  46. )[0])
  47. track_obj = save_track_obj(track_dict['track'], track_artists, user)
  48. get_track_info(track_dict['track'], library_stats, num_samples)
  49. audio_features_dict = get_audio_features(headers,
  50. track_dict['track']['id'], track_obj)
  51. if len(audio_features_dict) != 0:
  52. # Track the number of audio analyses for calculating
  53. # audio feature averages and standard deviations on the fly
  54. feature_data_points += 1
  55. for feature, feature_data in audio_features_dict.items():
  56. update_audio_feature_stats(feature, feature_data,
  57. feature_data_points, library_stats)
  58. # calculates num_songs with offset + songs retrieved
  59. library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
  60. offset += limit
  61. calculate_genres_from_artists(headers, library_stats)
  62. # pprint.pprint(library_stats)
  63. # }}} parse_library #
  64. # save_track_obj {{{ #
  65. def save_track_obj(track_dict, artists, user):
  66. """Make an entry in the database for this track if it doesn't exist already.
  67. :track_dict: dictionary from the API call containing track information.
  68. :artists: artists of the song, passed in as a list of Artist objects.
  69. :user: User object for which this Track is to be associated with.
  70. :returns: The created/retrieved Track object.
  71. """
  72. track_obj_query = Track.objects.filter(track_id__exact=track_dict['id'])
  73. if len(track_obj_query) == 0:
  74. new_track = Track.objects.create(
  75. track_id=track_dict['id'],
  76. year=track_dict['album']['release_date'].split('-')[0],
  77. popularity=int(track_dict['popularity']),
  78. runtime=int(float(track_dict['duration_ms']) / 1000),
  79. name=track_dict['name'],
  80. )
  81. # print("pop/run: ", new_track.popularity, new_track.runtime)
  82. # have to add artists and user after saving object since track needs to
  83. # have ID before filling in m2m field
  84. for artist in artists:
  85. new_track.artists.add(artist)
  86. new_track.users.add(user)
  87. new_track.save()
  88. return new_track
  89. elif len(track_obj_query) == 1:
  90. return track_obj_query[0]
  91. # }}} save_track_obj #
  92. # get_audio_features {{{ #
  93. def get_audio_features(headers, track_id, track):
  94. """Returns the audio features of a soundtrack
  95. Args:
  96. headers: headers containing the API token
  97. track_id: the id of the soundtrack, needed to query the Spotify API
  98. track: Track object to associate with the AudioFeatures object
  99. Returns:
  100. A dictionary with the features as its keys, if audio feature data is missing for the track,
  101. an empty dictionary is returned.
  102. """
  103. response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
  104. if 'error' in response:
  105. return {}
  106. features_dict = {}
  107. # Data that we don't need
  108. useless_keys = [
  109. "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
  110. ]
  111. audio_features_entry = AudioFeatures()
  112. audio_features_entry.track = track
  113. for key, val in response.items():
  114. if key not in useless_keys:
  115. features_dict[key] = val
  116. setattr(audio_features_entry, key, val)
  117. audio_features_entry.save()
  118. return features_dict
  119. # }}} get_audio_features #
  120. # update_std_dev {{{ #
  121. def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
  122. """Calculates the standard deviation for a sample without storing all data points
  123. Args:
  124. cur_mean: the current mean for N = (sample_size - 1)
  125. cur_std_dev: the current standard deviation for N = (sample_size - 1)
  126. new_data_point: a new data point
  127. sample_size: sample size including the new data point
  128. Returns:
  129. (new_mean, new_std_dev)
  130. """
  131. # This is an implementation of Welford's method
  132. # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
  133. new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
  134. delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean)
  135. new_std_dev = math.sqrt(
  136. (math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / (
  137. sample_size - 1
  138. ))
  139. return new_mean, new_std_dev
  140. # }}} update_std_dev #
  141. # update_audio_feature_stats {{{ #
  142. def update_audio_feature_stats(feature, new_data_point, sample_size, library_stats):
  143. """Updates the audio feature statistics in library_stats
  144. Args:
  145. feature: the audio feature to be updated (string)
  146. new_data_point: new data to update the stats with
  147. sample_size: sample size including the new data point
  148. library_stats Dictionary containing the data mined from user's Spotify library
  149. Returns:
  150. None
  151. """
  152. # first time the feature is considered
  153. if sample_size < 2:
  154. library_stats['audio_features'][feature] = {
  155. "average": new_data_point,
  156. "std_dev": 0,
  157. }
  158. else:
  159. cur_mean = library_stats['audio_features'][feature]['average']
  160. cur_std_dev = library_stats['audio_features'][feature]['std_dev']
  161. new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size)
  162. library_stats['audio_features'][feature] = {
  163. "average": new_mean,
  164. "std_dev": new_std_dev
  165. }
  166. # }}} update_audio_feature_stats #
  167. # increase_nested_key {{{ #
  168. def increase_nested_key(top_key, nested_key, library_stats, amount=1):
  169. """Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes
  170. appropriate action.
  171. :top_key: First key of library_stats.
  172. :nested_key: Key in top_key's dict for which we want to increase value of.
  173. :library_stats: Dictionary containing the data mined from user's Spotify library
  174. :returns: None
  175. """
  176. if nested_key not in library_stats[top_key]:
  177. library_stats[top_key][nested_key] = amount
  178. else:
  179. library_stats[top_key][nested_key] += amount
  180. # }}} increase_nested_key #
  181. # increase_artist_count {{{ #
  182. def increase_artist_count(headers, artist_name, artist_id, library_stats):
  183. """Increases count for artist in library_stats and stores the artist_id.
  184. :headers: For making the API call.
  185. :artist_name: Artist to increase count for.
  186. :artist_id: The Spotify ID for the artist.
  187. :library_stats: Dictionary containing the data mined from user's Spotify library
  188. :returns: None
  189. """
  190. if artist_name not in library_stats['artists']:
  191. library_stats['artists'][artist_name] = {}
  192. library_stats['artists'][artist_name]['count'] = 1
  193. library_stats['artists'][artist_name]['id'] = artist_id
  194. else:
  195. library_stats['artists'][artist_name]['count'] += 1
  196. # }}} increase_artist_count #
  197. # update_popularity_stats {{{ #
  198. def update_popularity_stats(new_data_point, library_stats, sample_size):
  199. """Updates the popularity statistics in library_stats
  200. Args:
  201. new_data_point: new data to update the popularity stats with
  202. library_stats: Dictionary containing data mined from user's Spotify library
  203. sample_size: The sample size including the new data
  204. Returns:
  205. None
  206. """
  207. if sample_size < 2:
  208. library_stats['popularity'] = {
  209. "average": new_data_point,
  210. "std_dev": 0,
  211. }
  212. else :
  213. cur_mean_popularity = library_stats['popularity']['average']
  214. cur_popularity_stdev = library_stats['popularity']['std_dev']
  215. new_mean, new_std_dev = update_std_dev(
  216. cur_mean_popularity, cur_popularity_stdev, new_data_point, sample_size)
  217. library_stats['popularity'] = {
  218. "average": new_mean,
  219. "std_dev": new_std_dev,
  220. }
  221. # }}} update_popularity_stats #
  222. # get_track_info {{{ #
  223. def get_track_info(track_dict, library_stats, sample_size):
  224. """Get all the info from the track_dict directly returned by the API call in parse_library.
  225. :track_dict: Dict returned from the API call containing the track info.
  226. :library_stats: Dictionary containing the data mined from user's Spotify library
  227. :sample_size: The sample size so far including this track
  228. :returns: None
  229. """
  230. # popularity
  231. update_popularity_stats(track_dict['popularity'], library_stats, sample_size)
  232. # year
  233. year_released = track_dict['album']['release_date'].split('-')[0]
  234. increase_nested_key('year_released', year_released, library_stats)
  235. # runtime
  236. library_stats['total_runtime'] += float(track_dict['duration_ms']) / (1000 * 60)
  237. # }}} get_track_info #
  238. # calculate_genres_from_artists {{{ #
  239. def calculate_genres_from_artists(headers, library_stats):
  240. """Tallies up genre counts based on artists in library_stats.
  241. :headers: For making the API call.
  242. :library_stats: Dictionary containing the data mined from user's Spotify library
  243. :returns: None
  244. """
  245. for artist_entry in library_stats['artists'].values():
  246. artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_entry['id'], headers=headers).json()
  247. # increase each genre count by artist count
  248. for genre in artist_response['genres']:
  249. increase_nested_key('genres', genre, library_stats, artist_entry['count'])
  250. # update genre for artist in database with top genre
  251. Artist.objects.filter(artist_id=artist_entry['id']).update(genre=artist_response['genres'][0])
  252. # }}} calculate_genres_from_artists #
  253. # process_library_stats {{{ #
  254. def process_library_stats(library_stats):
  255. """Processes library_stats into format more suitable for D3 consumption
  256. Args:
  257. library_stats: Dictionary containing the data mined from user's Spotify library
  258. Returns:
  259. A new dictionary that contains the data in library_stats, in a format more suitable for D3 consumption
  260. """
  261. processed_library_stats = {}
  262. for key in library_stats:
  263. if key == 'artists' or key == 'genres' or key == 'year_released':
  264. for inner_key in library_stats[key]:
  265. if key not in processed_library_stats:
  266. processed_library_stats[key] = []
  267. processed_item_key = '' # identifier key for each dict in the list
  268. count = 0
  269. if 'artist' in key:
  270. processed_item_key = 'name'
  271. count = library_stats[key][inner_key]['count']
  272. elif 'genre' in key:
  273. processed_item_key = 'genre'
  274. count = library_stats[key][inner_key]
  275. else:
  276. processed_item_key = 'year'
  277. count = library_stats[key][inner_key]
  278. processed_library_stats[key].append({
  279. processed_item_key: inner_key,
  280. "count": count
  281. })
  282. elif key == 'audio_features':
  283. for audio_feature in library_stats[key]:
  284. if 'audio_features' not in processed_library_stats:
  285. processed_library_stats['audio_features'] = []
  286. processed_library_stats['audio_features'].append({
  287. 'feature': audio_feature,
  288. 'average': library_stats[key][audio_feature]['average'],
  289. 'std_dev': library_stats[key][audio_feature]['std_dev']
  290. })
  291. # TODO: Not sure about final form for 'popularity'
  292. # elif key == 'popularity':
  293. # processed_library_stats[key] = []
  294. # processed_library_stats[key].append({
  295. # })
  296. elif key == 'num_songs' or key == 'total_runtime' or key == 'popularity':
  297. processed_library_stats[key] = library_stats[key]
  298. return processed_library_stats
  299. # }}} process_library_stats #
  300. def get_genre_data(user):
  301. """Return genre data needed to create the graph user.
  302. :user: User object for which to return the data for.
  303. :returns: List of dicts containing counts for each genre.
  304. """
  305. pass
  306. # user_tracks = Track.objects.filter(users__exact=user)
  307. # for track in user_tracks:
  308. # print(track.name)
  309. def get_artist_data(user_id):
  310. """Return artist data needed to create the graph for user.
  311. :user_id: user ID for which to return the data for.
  312. :returns: List of dicts containing counts for each artist.
  313. """
  314. # TODO: not actual artists for user
  315. # PICK UP: figure out how to pass data to D3/frontend
  316. print(user_id)
  317. # user = User.objects.get(user_id=user_id)
  318. artist_counts = Artist.objects.annotate(num_songs=Count('track'))
  319. processed_artist_data = [{'name': artist.name, 'num_songs': artist.num_songs} for artist in artist_counts]
  320. # for artist in artist_counts:
  321. # print(artist.name, artist.num_songs)
  322. return JsonResponse(processed_artist_data, safe=False)
  323. # return serializers.serialize('json', processed_artist_data)
  324. # return processed_artist_data