Graphs and tables for your Spotify account.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

395 lines
14 KiB

  1. # imports {{{ #
  2. import requests
  3. import math
  4. import pprint
  5. from .models import Artist, User, Track, AudioFeatures
  6. from django.db.models import Count
  7. from django.http import JsonResponse
  8. from django.core import serializers
  9. import json
  10. # }}} imports #
  11. # parse_library {{{ #
  12. def parse_library(headers, tracks, user):
  13. """Scans user's library for certain number of tracks to update library_stats with.
  14. :headers: For API call.
  15. :tracks: Number of tracks to get from user's library.
  16. :user: a User object representing the user whose library we are parsing
  17. :returns: None
  18. """
  19. # TODO: implement importing entire library with 0 as tracks param
  20. # number of tracks to get with each call
  21. limit = 5
  22. # keeps track of point to get songs from
  23. offset = 0
  24. payload = {'limit': str(limit)}
  25. # iterate until hit requested num of tracks
  26. for _ in range(0, tracks, limit):
  27. payload['offset'] = str(offset)
  28. # get current set of tracks
  29. saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
  30. # TODO: refactor the for loop body into helper function
  31. # iterate through each track
  32. for track_dict in saved_tracks_response['items']:
  33. # update artist info before track so that Track object can reference
  34. # Artist object
  35. track_artists = []
  36. for artist_dict in track_dict['track']['artists']:
  37. artist_obj, artist_created = Artist.objects.get_or_create(
  38. artist_id=artist_dict['id'],
  39. name=artist_dict['name'],
  40. )
  41. # update_artist_genre(headers, artist_obj)
  42. # get_or_create() returns a tuple (obj, created)
  43. track_artists.append(artist_obj)
  44. top_genre = get_top_genre(headers,
  45. track_dict['track']['artists'][0]['id'])
  46. track_obj, track_created = save_track_obj(track_dict['track'],
  47. track_artists, top_genre, user)
  48. # if a new track is not created, the associated audio feature does not need to be created again
  49. if track_created:
  50. save_audio_features(headers, track_dict['track']['id'], track_obj)
  51. """
  52. TODO: Put this logic in another function
  53. # Audio analysis could be empty if not present in Spotify database
  54. if len(audio_features_dict) != 0:
  55. # Track the number of audio analyses for calculating
  56. # audio feature averages and standard deviations on the fly
  57. feature_data_points += 1
  58. for feature, feature_data in audio_features_dict.items():
  59. update_audio_feature_stats(feature, feature_data,
  60. feature_data_points, library_stats)
  61. """
  62. # calculates num_songs with offset + songs retrieved
  63. offset += limit
  64. # pprint.pprint(library_stats)
  65. # }}} parse_library #
  66. # save_track_obj {{{ #
  67. def save_track_obj(track_dict, artists, top_genre, user):
  68. """Make an entry in the database for this track if it doesn't exist already.
  69. :track_dict: dictionary from the API call containing track information.
  70. :artists: artists of the song, passed in as a list of Artist objects.
  71. :top_genre: top genre associated with this track (see get_top_genre).
  72. :user: User object for which this Track is to be associated with.
  73. :returns: (The created/retrieved Track object, created)
  74. """
  75. track_query = Track.objects.filter(track_id__exact=track_dict['id'])
  76. if len(track_query) != 0:
  77. return track_query[0], False
  78. else:
  79. new_track = Track.objects.create(
  80. track_id=track_dict['id'],
  81. year=track_dict['album']['release_date'].split('-')[0],
  82. popularity=int(track_dict['popularity']),
  83. runtime=int(float(track_dict['duration_ms']) / 1000),
  84. name=track_dict['name'],
  85. genre=top_genre,
  86. )
  87. # have to add artists and user after saving object since track needs to
  88. # have ID before filling in m2m field
  89. for artist in artists:
  90. new_track.artists.add(artist)
  91. new_track.users.add(user)
  92. new_track.save()
  93. return new_track, True
  94. # }}} save_track_obj #
  95. # get_audio_features {{{ #
  96. def save_audio_features(headers, track_id, track):
  97. """Creates and saves a new AudioFeatures object
  98. Args:
  99. headers: headers containing the API token
  100. track_id: the id of the soundtrack, needed to query the Spotify API
  101. track: Track object to associate with the new AudioFeatures object
  102. """
  103. response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
  104. if 'error' in response:
  105. return {}
  106. # Data that we don't need
  107. useless_keys = [
  108. "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
  109. ]
  110. audio_features_entry = AudioFeatures()
  111. audio_features_entry.track = track
  112. for key, val in response.items():
  113. if key not in useless_keys:
  114. setattr(audio_features_entry, key, val)
  115. audio_features_entry.save()
  116. # }}} get_audio_features #
  117. # update_std_dev {{{ #
  118. def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
  119. """Calculates the standard deviation for a sample without storing all data points
  120. Args:
  121. cur_mean: the current mean for N = (sample_size - 1)
  122. cur_std_dev: the current standard deviation for N = (sample_size - 1)
  123. new_data_point: a new data point
  124. sample_size: sample size including the new data point
  125. Returns:
  126. (new_mean, new_std_dev)
  127. """
  128. # This is an implementation of Welford's method
  129. # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
  130. new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
  131. delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean)
  132. new_std_dev = math.sqrt(
  133. (math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / (
  134. sample_size - 1
  135. ))
  136. return new_mean, new_std_dev
  137. # }}} update_std_dev #
  138. # update_audio_feature_stats {{{ #
  139. def update_audio_feature_stats(feature, new_data_point, sample_size, library_stats):
  140. """Updates the audio feature statistics in library_stats
  141. Args:
  142. feature: the audio feature to be updated (string)
  143. new_data_point: new data to update the stats with
  144. sample_size: sample size including the new data point
  145. library_stats Dictionary containing the data mined from user's Spotify library
  146. Returns:
  147. None
  148. """
  149. # first time the feature is considered
  150. if sample_size < 2:
  151. library_stats['audio_features'][feature] = {
  152. "average": new_data_point,
  153. "std_dev": 0,
  154. }
  155. else:
  156. cur_mean = library_stats['audio_features'][feature]['average']
  157. cur_std_dev = library_stats['audio_features'][feature]['std_dev']
  158. new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size)
  159. library_stats['audio_features'][feature] = {
  160. "average": new_mean,
  161. "std_dev": new_std_dev
  162. }
  163. # }}} update_audio_feature_stats #
  164. # increase_nested_key {{{ #
  165. def increase_nested_key(top_key, nested_key, library_stats, amount=1):
  166. """Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes
  167. appropriate action.
  168. :top_key: First key of library_stats.
  169. :nested_key: Key in top_key's dict for which we want to increase value of.
  170. :library_stats: Dictionary containing the data mined from user's Spotify library
  171. :returns: None
  172. """
  173. if nested_key not in library_stats[top_key]:
  174. library_stats[top_key][nested_key] = amount
  175. else:
  176. library_stats[top_key][nested_key] += amount
  177. # }}} increase_nested_key #
  178. # increase_artist_count {{{ #
  179. def increase_artist_count(headers, artist_name, artist_id, library_stats):
  180. """Increases count for artist in library_stats and stores the artist_id.
  181. :headers: For making the API call.
  182. :artist_name: Artist to increase count for.
  183. :artist_id: The Spotify ID for the artist.
  184. :library_stats: Dictionary containing the data mined from user's Spotify library
  185. :returns: None
  186. """
  187. if artist_name not in library_stats['artists']:
  188. library_stats['artists'][artist_name] = {}
  189. library_stats['artists'][artist_name]['count'] = 1
  190. library_stats['artists'][artist_name]['id'] = artist_id
  191. else:
  192. library_stats['artists'][artist_name]['count'] += 1
  193. # }}} increase_artist_count #
  194. # update_popularity_stats {{{ #
  195. def update_popularity_stats(new_data_point, library_stats, sample_size):
  196. """Updates the popularity statistics in library_stats
  197. Args:
  198. new_data_point: new data to update the popularity stats with
  199. library_stats: Dictionary containing data mined from user's Spotify library
  200. sample_size: The sample size including the new data
  201. Returns:
  202. None
  203. """
  204. if sample_size < 2:
  205. library_stats['popularity'] = {
  206. "average": new_data_point,
  207. "std_dev": 0,
  208. }
  209. else :
  210. cur_mean_popularity = library_stats['popularity']['average']
  211. cur_popularity_stdev = library_stats['popularity']['std_dev']
  212. new_mean, new_std_dev = update_std_dev(
  213. cur_mean_popularity, cur_popularity_stdev, new_data_point, sample_size)
  214. library_stats['popularity'] = {
  215. "average": new_mean,
  216. "std_dev": new_std_dev,
  217. }
  218. # }}} update_popularity_stats #
  219. # get_track_info {{{ #
  220. def get_track_info(track_dict, library_stats, sample_size):
  221. """Get all the info from the track_dict directly returned by the API call in parse_library.
  222. :track_dict: Dict returned from the API call containing the track info.
  223. :library_stats: Dictionary containing the data mined from user's Spotify library
  224. :sample_size: The sample size so far including this track
  225. :returns: None
  226. """
  227. # popularity
  228. update_popularity_stats(track_dict['popularity'], library_stats, sample_size)
  229. # year
  230. year_released = track_dict['album']['release_date'].split('-')[0]
  231. increase_nested_key('year_released', year_released, library_stats)
  232. # runtime
  233. library_stats['total_runtime'] += float(track_dict['duration_ms']) / (1000 * 60)
  234. # }}} get_track_info #
  235. # update_artist_genre {{{ #
  236. def update_artist_genre(headers, artist_obj):
  237. """Updates the top genre for an artist by querying the Spotify API
  238. :headers: For making the API call.
  239. :artist_obj: the Artist object whose genre field will be updated
  240. :returns: None
  241. """
  242. artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_obj.artist_id, headers=headers).json()
  243. # update genre for artist in database with top genre
  244. if len(artist_response['genres']) > 0:
  245. artist_obj.genre = artist_response['genres'][0]
  246. artist_obj.save()
  247. # }}} #
  248. # {{{ #
  249. def get_top_genre(headers, top_artist_id):
  250. """Updates the top genre for a track by querying the Spotify API
  251. :headers: For making the API call.
  252. :top_artist: The first artist's (listed in the track) Spotify ID.
  253. :returns: The first genre listed for the top_artist.
  254. """
  255. artist_response = requests.get('https://api.spotify.com/v1/artists/' +
  256. top_artist_id, headers=headers).json()
  257. if len(artist_response['genres']) > 0:
  258. return artist_response['genres'][0]
  259. else:
  260. return "undefined"
  261. # }}} #
  262. # process_library_stats {{{ #
  263. def process_library_stats(library_stats):
  264. """Processes library_stats into format more suitable for D3 consumption
  265. Args:
  266. library_stats: Dictionary containing the data mined from user's Spotify library
  267. Returns:
  268. A new dictionary that contains the data in library_stats, in a format more suitable for D3 consumption
  269. """
  270. processed_library_stats = {}
  271. for key in library_stats:
  272. if key == 'artists' or key == 'genres' or key == 'year_released':
  273. for inner_key in library_stats[key]:
  274. if key not in processed_library_stats:
  275. processed_library_stats[key] = []
  276. processed_item_key = '' # identifier key for each dict in the list
  277. count = 0
  278. if 'artist' in key:
  279. processed_item_key = 'name'
  280. count = library_stats[key][inner_key]['count']
  281. elif 'genre' in key:
  282. processed_item_key = 'genre'
  283. count = library_stats[key][inner_key]
  284. else:
  285. processed_item_key = 'year'
  286. count = library_stats[key][inner_key]
  287. processed_library_stats[key].append({
  288. processed_item_key: inner_key,
  289. "count": count
  290. })
  291. elif key == 'audio_features':
  292. for audio_feature in library_stats[key]:
  293. if 'audio_features' not in processed_library_stats:
  294. processed_library_stats['audio_features'] = []
  295. processed_library_stats['audio_features'].append({
  296. 'feature': audio_feature,
  297. 'average': library_stats[key][audio_feature]['average'],
  298. 'std_dev': library_stats[key][audio_feature]['std_dev']
  299. })
  300. # TODO: Not sure about final form for 'popularity'
  301. # elif key == 'popularity':
  302. # processed_library_stats[key] = []
  303. # processed_library_stats[key].append({
  304. # })
  305. elif key == 'num_songs' or key == 'total_runtime' or key == 'popularity':
  306. processed_library_stats[key] = library_stats[key]
  307. return processed_library_stats
  308. # }}} process_library_stats #