Graphs and tables for your Spotify account.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

338 lines
11 KiB

  1. from django.shortcuts import render, redirect
  2. from django.http import HttpResponse, HttpResponseBadRequest
  3. import math
  4. import random
  5. import requests
  6. import os
  7. import urllib
  8. import json
  9. import pprint
  10. from datetime import datetime
  11. TIME_FORMAT = '%Y-%m-%d-%H-%M-%S'
  12. library_stats = {"audio_features":{}, "genres":{}, "year_released":{}, "artists":{}, "num_songs":0, "popularity":[], "total_runtime":0}
  13. # generate_random_string {{{ #
  14. def generate_random_string(length):
  15. """Generates a random string of a certain length
  16. Args:
  17. length: the desired length of the randomized string
  18. Returns:
  19. A random string
  20. """
  21. rand_str = ""
  22. possible_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
  23. for _ in range(length):
  24. rand_str += possible_chars[random.randint(0, len(possible_chars) - 1)]
  25. return rand_str
  26. # }}} generate_random_string #
  27. # token_expired {{{ #
  28. def token_expired(token_obtained_at, valid_for):
  29. """Returns True if token expired, False if otherwise
  30. Args:
  31. token_obtained_at: datetime object representing the date and time when the token was obtained
  32. valid_for: the time duration for which the token is valid, in seconds
  33. """
  34. time_elapsed = (datetime.today() - token_obtained_at).total_seconds()
  35. return time_elapsed >= valid_for
  36. # }}} token_expired #
  37. # index {{{ #
  38. # Create your views here.
  39. def index(request):
  40. return render(request, 'spotifyvis/index.html')
  41. # }}} index #
  42. # login {{{ #
  43. def login(request):
  44. # use a randomly generated state string to prevent cross-site request forgery attacks
  45. state_str = generate_random_string(16)
  46. request.session['state_string'] = state_str
  47. payload = {
  48. 'client_id': os.environ['SPOTIFY_CLIENT_ID'],
  49. 'response_type': 'code',
  50. 'redirect_uri': 'http://localhost:8000/callback',
  51. 'state': state_str,
  52. 'scope': 'user-library-read',
  53. 'show_dialog': False
  54. }
  55. params = urllib.parse.urlencode(payload) # turn the payload dict into a query string
  56. authorize_url = "https://accounts.spotify.com/authorize/?{}".format(params)
  57. return redirect(authorize_url)
  58. # }}} login #
  59. # callback {{{ #
  60. def callback(request):
  61. # Attempt to retrieve the authorization code from the query string
  62. try:
  63. code = request.GET['code']
  64. except KeyError:
  65. return HttpResponseBadRequest("<h1>Problem with login</h1>")
  66. payload = {
  67. 'grant_type': 'authorization_code',
  68. 'code': code,
  69. 'redirect_uri': 'http://localhost:8000/callback',
  70. 'client_id': os.environ['SPOTIFY_CLIENT_ID'],
  71. 'client_secret': os.environ['SPOTIFY_CLIENT_SECRET'],
  72. }
  73. response = requests.post('https://accounts.spotify.com/api/token', data = payload).json()
  74. # despite its name, datetime.today() returns a datetime object, not a date object
  75. # use datetime.strptime() to get a datetime object from a string
  76. request.session['token_obtained_at'] = datetime.strftime(datetime.today(), TIME_FORMAT)
  77. request.session['access_token'] = response['access_token']
  78. request.session['refresh_token'] = response['refresh_token']
  79. request.session['valid_for'] = response['expires_in']
  80. # print(response)
  81. return redirect('user_data')
  82. # }}} callback #
  83. # user_data {{{ #
  84. def user_data(request):
  85. token_obtained_at = datetime.strptime(request.session['token_obtained_at'], TIME_FORMAT)
  86. valid_for = int(request.session['valid_for'])
  87. if token_expired(token_obtained_at, valid_for):
  88. req_body = {
  89. 'grant_type': 'refresh_token',
  90. 'refresh_token': request.session['refresh_token'],
  91. 'client_id': os.environ['SPOTIFY_CLIENT_ID'],
  92. 'client_secret': os.environ['SPOTIFY_CLIENT_SECRET']
  93. }
  94. refresh_token_response = requests.post('https://accounts.spotify.com/api/token', data = req_body).json()
  95. request.session['access_token'] = refresh_token_response['access_token']
  96. request.session['valid_for'] = refresh_token_response['expires_in']
  97. auth_token_str = "Bearer " + request.session['access_token']
  98. headers = {
  99. 'Authorization': auth_token_str
  100. }
  101. user_data_response = requests.get('https://api.spotify.com/v1/me', headers = headers).json()
  102. context = {
  103. 'user_name': user_data_response['display_name'],
  104. 'id': user_data_response['id'],
  105. }
  106. tracks_to_query = 5
  107. parse_library(headers, tracks_to_query)
  108. return render(request, 'spotifyvis/user_data.html', context)
  109. # }}} user_data #
  110. # parse_library {{{ #
  111. def parse_library(headers, tracks):
  112. """Scans user's library for certain number of tracks to update library_stats with.
  113. :headers: For API call.
  114. :tracks: Number of tracks to get from user's library.
  115. :returns: None
  116. """
  117. # TODO: implement importing entire library with 0 as tracks param
  118. # number of tracks to get with each call
  119. limit = 5
  120. # keeps track of point to get songs from
  121. offset = 0
  122. payload = {'limit': str(limit)}
  123. for _ in range(0, tracks, limit):
  124. payload['offset'] = str(offset)
  125. saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
  126. num_samples = offset
  127. for track_dict in saved_tracks_response['items']:
  128. # Track the number of samples for calculating
  129. # audio feature averages and standard deviations on the fly
  130. num_samples += 1
  131. get_track_info(track_dict['track'])
  132. # get_genre(headers, track_dict['track']['album']['id'])
  133. audio_features_dict = get_audio_features(headers, track_dict['track']['id'])
  134. for feature, feature_data in audio_features_dict.items():
  135. update_audio_feature_stats(feature, feature_data, num_samples)
  136. for artist_dict in track_dict['track']['artists']:
  137. increase_artist_count(headers, artist_dict['name'], artist_dict['id'])
  138. # calculates num_songs with offset + songs retrieved
  139. library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
  140. offset += limit
  141. calculate_genres_from_artists(headers)
  142. pprint.pprint(library_stats)
  143. # }}} parse_library #
  144. def get_audio_features(headers, track_id):
  145. """Returns the audio features of a soundtrack
  146. Args:
  147. headers: headers containing the API token
  148. track_id: the id of the soundtrack, needed to query the Spotify API
  149. Returns:
  150. A dictionary with the features as its keys
  151. """
  152. response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
  153. features_dict = {}
  154. # Data that we don't need
  155. useless_keys = [
  156. "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
  157. ]
  158. for key, val in response.items():
  159. if key not in useless_keys:
  160. features_dict[key] = val
  161. return features_dict
  162. def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
  163. """Calculates the standard deviation for a sample without storing all data points
  164. Args:
  165. cur_mean: the current mean for N = (sample_size - 1)
  166. cur_std_dev: the current standard deviation for N = (sample_size - 1)
  167. new_data_point: a new data point
  168. sample_size: sample size including the new data point
  169. Returns:
  170. (new_mean, new_std_dev)
  171. """
  172. # This is an implementation of Welford's method
  173. # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
  174. new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
  175. delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean)
  176. new_std_dev = math.sqrt(
  177. (math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / (
  178. sample_size - 1
  179. ))
  180. return new_mean, new_std_dev
  181. def update_audio_feature_stats(feature, new_data_point, sample_size):
  182. """Updates the audio feature statistics in library_stats
  183. Args:
  184. feature: the audio feature to be updated (string)
  185. new_data_point: new data to update the stats with
  186. sample_size: sample size including the new data point
  187. Returns:
  188. None
  189. """
  190. # first time the feature is considered
  191. if sample_size < 2:
  192. library_stats['audio_features'][feature] = {
  193. "average": new_data_point,
  194. "std_dev": 0,
  195. }
  196. else:
  197. cur_mean = library_stats['audio_features'][feature]['average']
  198. cur_std_dev = library_stats['audio_features'][feature]['std_dev']
  199. new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size)
  200. library_stats['audio_features'][feature]['average'] = new_mean
  201. library_stats['audio_features'][feature]['std_dev'] = new_std_dev
  202. # increase_nested_key {{{ #
  203. def increase_nested_key(top_key, nested_key, amount=1):
  204. """Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes
  205. appropriate action.
  206. :top_key: First key of library_stats.
  207. :nested_key: Key in top_key's dict for which we want to increase value of.
  208. :returns: None
  209. """
  210. if nested_key not in library_stats[top_key]:
  211. library_stats[top_key][nested_key] = amount
  212. else:
  213. library_stats[top_key][nested_key] += amount
  214. # }}} increase_nested_key #
  215. # increase_artist_count {{{ #
  216. def increase_artist_count(headers, artist_name, artist_id):
  217. """Increases count for artist in library_stats and stores the artist_id.
  218. :headers: For making the API call.
  219. :artist_name: Artist to increase count for.
  220. :artist_id: The Spotify ID for the artist.
  221. :returns: None
  222. """
  223. if artist_name not in library_stats['artists']:
  224. library_stats['artists'][artist_name] = {}
  225. library_stats['artists'][artist_name]['count'] = 1
  226. library_stats['artists'][artist_name]['id'] = artist_id
  227. else:
  228. library_stats['artists'][artist_name]['count'] += 1
  229. # }}} increase_artist_count #
  230. # get_track_info {{{ #
  231. def get_track_info(track_dict):
  232. """Get all the info from the track_dict directly returned by the API call in parse_library.
  233. :track_dict: Dict returned from the API call containing the track info.
  234. :returns: None
  235. """
  236. # popularity
  237. library_stats['popularity'].append(track_dict['popularity'])
  238. # year
  239. year_released = track_dict['album']['release_date'].split('-')[0]
  240. increase_nested_key('year_released', year_released)
  241. # artist
  242. # artist_names = [artist['name'] for artist in track_dict['artists']]
  243. # for artist_name in artist_names:
  244. # increase_nested_key('artists', artist_name)
  245. # runtime
  246. library_stats['total_runtime'] += float(track_dict['duration_ms']) / 60
  247. # }}} get_track_info #
  248. # calculate_genres_from_artists {{{ #
  249. def calculate_genres_from_artists(headers):
  250. """Tallies up genre counts based on artists in library_stats.
  251. :headers: For making the API call.
  252. :returns: None
  253. """
  254. for artist_entry in library_stats['artists'].values():
  255. artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_entry['id'], headers=headers).json()
  256. # increase each genre count by artist count
  257. for genre in artist_response['genres']:
  258. increase_nested_key('genres', genre, artist_entry['count'])
  259. # }}} calculate_genres_from_artists #