Graphs and tables for your Spotify account.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

320 lines
11 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. # imports {{{ #
  2. import requests
  3. import math
  4. import pprint
  5. from .models import *
  6. from django.db.models import Count, Q, F
  7. from django.http import JsonResponse
  8. from django.core import serializers
  9. import json
  10. # }}} imports #
  11. # global vars {{{ #
  12. USER_TRACKS_LIMIT = 50
  13. ARTIST_LIMIT = 50
  14. FEATURES_LIMIT = 100
  15. # ARTIST_LIMIT = 25
  16. # FEATURES_LIMIT = 25
  17. console_logging = True
  18. # console_logging = False
  19. artists_genre_processed = 0
  20. features_processed = 0
  21. # }}} global vars #
  22. # parse_library {{{ #
  23. def parse_library(headers, num_tracks, user):
  24. """Scans user's library for num_tracks and store the information in a database
  25. :headers: For API call.
  26. :num_tracks: Number of tracks to get from user's library (0 scans the entire
  27. library).
  28. :user: a User object representing the user whose library we are parsing
  29. :returns: None
  30. """
  31. offset = 0
  32. payload = {'limit': str(USER_TRACKS_LIMIT)}
  33. artist_genre_queue = []
  34. features_queue = []
  35. # create this obj so loop runs at least once
  36. saved_tracks_response = [0]
  37. # scan until reach num_tracks or no tracks left if scanning entire library
  38. while (num_tracks == 0 or offset < num_tracks) and len(saved_tracks_response) > 0:
  39. payload['offset'] = str(offset)
  40. saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks',
  41. headers=headers,
  42. params=payload).json()['items']
  43. if console_logging:
  44. tracks_processed = 0
  45. for track_dict in saved_tracks_response:
  46. # add artists {{{ #
  47. # update artist info before track so that Track object can reference
  48. # Artist object
  49. track_artists = []
  50. for artist_dict in track_dict['track']['artists']:
  51. artist_obj, artist_created = Artist.objects.get_or_create(
  52. artist_id=artist_dict['id'],
  53. name=artist_dict['name'],)
  54. # only add/tally up artist genres if new
  55. if artist_created:
  56. artist_genre_queue.append(artist_obj)
  57. if len(artist_genre_queue) == ARTIST_LIMIT:
  58. add_artist_genres(headers, artist_genre_queue)
  59. artist_genre_queue = []
  60. track_artists.append(artist_obj)
  61. # }}} add artists #
  62. track_obj, track_created = save_track_obj(track_dict['track'],
  63. track_artists, user)
  64. # add audio features {{{ #
  65. # if a new track is not created, the associated audio feature does
  66. # not need to be created again
  67. if track_created:
  68. features_queue.append(track_obj)
  69. if len(features_queue) == FEATURES_LIMIT:
  70. get_audio_features(headers, features_queue)
  71. features_queue = []
  72. # }}} add audio features #
  73. if console_logging:
  74. tracks_processed += 1
  75. print("Added track #{}: {} - {}".format(
  76. offset + tracks_processed,
  77. track_obj.artists.first(),
  78. track_obj.name,
  79. ))
  80. # calculates num_songs with offset + songs retrieved
  81. offset += USER_TRACKS_LIMIT
  82. # clean-up {{{ #
  83. # update remaining artists without genres and songs without features if
  84. # there are any
  85. if len(artist_genre_queue) > 0:
  86. add_artist_genres(headers, artist_genre_queue)
  87. if len(features_queue) > 0:
  88. get_audio_features(headers, features_queue)
  89. # }}} clean-up #
  90. update_track_genres(user)
  91. # }}} parse_library #
  92. # update_track_genres {{{ #
  93. def update_track_genres(user):
  94. """Updates user's tracks with the most common genre associated with the
  95. songs' artist(s).
  96. :user: User object who's tracks are being updated.
  97. :returns: None
  98. """
  99. tracks_processed = 0
  100. user_tracks = Track.objects.filter(users__exact=user)
  101. for track in user_tracks:
  102. # just using this variable to save another call to db
  103. track_artists = track.artists.all()
  104. # set genres to first artist's genres then find intersection with others
  105. shared_genres = track_artists.first().genres.all()
  106. for artist in track_artists:
  107. shared_genres = shared_genres.intersection(artist.genres.all())
  108. shared_genres = shared_genres.order_by('-num_songs')
  109. undefined_genre_obj = Genre.objects.get(name="undefined")
  110. most_common_genre = shared_genres.first() if shared_genres.first() is \
  111. not undefined_genre_obj else shared_genres[1]
  112. track.genre = most_common_genre if most_common_genre is not None \
  113. else undefined_genre_obj
  114. track.save()
  115. tracks_processed += 1
  116. if console_logging:
  117. print("Added '{}' as genre for song #{} - '{}'".format(
  118. track.genre,
  119. tracks_processed,
  120. track.name,
  121. ))
  122. # }}} update_track_genres #
  123. # save_track_obj {{{ #
  124. def save_track_obj(track_dict, artists, user):
  125. """Make an entry in the database for this track if it doesn't exist already.
  126. :track_dict: dictionary from the API call containing track information.
  127. :artists: artists of the song, passed in as a list of Artist objects.
  128. :top_genre: top genre associated with this track (see get_top_genre).
  129. :user: User object for which this Track is to be associated with.
  130. :returns: (The created/retrieved Track object, created)
  131. """
  132. track_query = Track.objects.filter(track_id__exact=track_dict['id'])
  133. if len(track_query) != 0:
  134. return track_query[0], False
  135. else:
  136. new_track = Track.objects.create(
  137. track_id=track_dict['id'],
  138. year=track_dict['album']['release_date'].split('-')[0],
  139. popularity=int(track_dict['popularity']),
  140. runtime=int(float(track_dict['duration_ms']) / 1000),
  141. name=track_dict['name'],
  142. )
  143. # have to add artists and user after saving object since track needs to
  144. # have ID before filling in m2m field
  145. for artist in artists:
  146. new_track.artists.add(artist)
  147. new_track.users.add(user)
  148. new_track.save()
  149. return new_track, True
  150. # }}} save_track_obj #
  151. # get_audio_features {{{ #
  152. def get_audio_features(headers, track_objs):
  153. """Creates and saves a new AudioFeatures objects for the respective
  154. track_objs. track_objs should contain the API limit for a single call
  155. (FEATURES_LIMIT) for maximum efficiency.
  156. :headers: headers containing the API token
  157. :track_objs: Track objects to associate with the new AudioFeatures object
  158. :returns: None
  159. """
  160. track_ids = str.join(",", [track_obj.track_id for track_obj in track_objs])
  161. features_response = requests.get("https://api.spotify.com/v1/audio-features",
  162. headers=headers,
  163. params={'ids': track_ids}
  164. ).json()['audio_features']
  165. # pprint.pprint(features_response)
  166. useless_keys = [ "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature", ]
  167. for i in range(len(track_objs)):
  168. if features_response[i] is not None:
  169. # Data that we don't need
  170. cur_features_obj = AudioFeatures()
  171. cur_features_obj.track = track_objs[i]
  172. for key, val in features_response[i].items():
  173. if key not in useless_keys:
  174. setattr(cur_features_obj, key, val)
  175. cur_features_obj.save()
  176. if console_logging:
  177. global features_processed
  178. features_processed += 1
  179. print("Added features for song #{} - {}".format(
  180. features_processed, track_objs[i].name))
  181. # }}} get_audio_features #
  182. # process_artist_genre {{{ #
  183. def process_artist_genre(genre_name, artist_obj):
  184. """Increase count for correspoding Genre object to genre_name and add that
  185. Genre to artist_obj.
  186. :genre_name: Name of genre.
  187. :artist_obj: Artist object to add Genre object to.
  188. :returns: None
  189. """
  190. genre_obj, created = Genre.objects.get_or_create(name=genre_name,
  191. defaults={'num_songs':1})
  192. if not created:
  193. genre_obj.num_songs = F('num_songs') + 1
  194. genre_obj.save()
  195. artist_obj.genres.add(genre_obj)
  196. artist_obj.save()
  197. # }}} process_artist_genre #
  198. # add_artist_genres {{{ #
  199. def add_artist_genres(headers, artist_objs):
  200. """Adds genres to artist_objs and increases the count the respective Genre
  201. object. artist_objs should contain the API limit for a single call
  202. (ARTIST_LIMIT) for maximum efficiency.
  203. :headers: For making the API call.
  204. :artist_objs: List of Artist objects for which to add/tally up genres for.
  205. :returns: None
  206. """
  207. artist_ids = str.join(",", [artist_obj.artist_id for artist_obj in artist_objs])
  208. artists_response = requests.get('https://api.spotify.com/v1/artists/',
  209. headers=headers,
  210. params={'ids': artist_ids},
  211. ).json()['artists']
  212. for i in range(len(artist_objs)):
  213. if len(artists_response[i]['genres']) == 0:
  214. process_artist_genre("undefined", artist_objs[i])
  215. else:
  216. for genre in artists_response[i]['genres']:
  217. process_artist_genre(genre, artist_objs[i])
  218. if console_logging:
  219. global artists_genre_processed
  220. artists_genre_processed += 1
  221. print("Added genres for artist #{} - {}".format(
  222. artists_genre_processed, artist_objs[i].name))
  223. # }}} add_artist_genres #
  224. # get_artists_in_genre {{{ #
  225. def get_artists_in_genre(user, genre, max_songs):
  226. """Return count of artists in genre.
  227. :user: User object to return data for.
  228. :genre: genre to count artists for.
  229. :max_songs: max total songs to include to prevent overflow due to having
  230. multiple artists on each track.
  231. :returns: dict of artists in the genre along with the number of songs they
  232. have.
  233. """
  234. genre_obj = Genre.objects.get(name=genre)
  235. artist_counts = (Artist.objects.filter(track__users=user)
  236. .filter(genres=genre_obj)
  237. .annotate(num_songs=Count('track', distinct=True))
  238. .order_by('-num_songs')
  239. )
  240. processed_artist_counts = {}
  241. songs_added = 0
  242. for artist in artist_counts:
  243. # hacky way to not have total count overflow due to there being multiple
  244. # artists on a track
  245. if songs_added + artist.num_songs <= max_songs:
  246. processed_artist_counts[artist.name] = artist.num_songs
  247. songs_added += artist.num_songs
  248. # processed_artist_counts = [{'name': artist.name, 'num_songs': artist.num_songs} for artist in artist_counts]
  249. # processed_artist_counts = {artist.name: artist.num_songs for artist in artist_counts}
  250. # pprint.pprint(processed_artist_counts)
  251. return processed_artist_counts
  252. # }}} get_artists_in_genre #