Graphs and tables for your Spotify account.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

364 lines
12 KiB

  1. # imports {{{ #
  2. from django.shortcuts import render, redirect
  3. from django.http import HttpResponse, HttpResponseBadRequest
  4. import math
  5. import random
  6. import requests
  7. import os
  8. import urllib
  9. import json
  10. import pprint
  11. from datetime import datetime
  12. # }}} imports #
  13. # global vars {{{ #
  14. TIME_FORMAT = '%Y-%m-%d-%H-%M-%S'
  15. library_stats = {"audio_features":{}, "genres":{}, "year_released":{}, "artists":{}, "num_songs":0, "popularity":[], "total_runtime":0}
  16. # }}} global vars #
  17. # generate_random_string {{{ #
  18. def generate_random_string(length):
  19. """Generates a random string of a certain length
  20. Args:
  21. length: the desired length of the randomized string
  22. Returns:
  23. A random string
  24. """
  25. rand_str = ""
  26. possible_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
  27. for _ in range(length):
  28. rand_str += possible_chars[random.randint(0, len(possible_chars) - 1)]
  29. return rand_str
  30. # }}} generate_random_string #
  31. # token_expired {{{ #
  32. def token_expired(token_obtained_at, valid_for):
  33. """Returns True if token expired, False if otherwise
  34. Args:
  35. token_obtained_at: datetime object representing the date and time when the token was obtained
  36. valid_for: the time duration for which the token is valid, in seconds
  37. """
  38. time_elapsed = (datetime.today() - token_obtained_at).total_seconds()
  39. return time_elapsed >= valid_for
  40. # }}} token_expired #
  41. # index {{{ #
  42. # Create your views here.
  43. def index(request):
  44. return render(request, 'spotifyvis/index.html')
  45. # }}} index #
  46. # login {{{ #
  47. def login(request):
  48. # use a randomly generated state string to prevent cross-site request forgery attacks
  49. state_str = generate_random_string(16)
  50. request.session['state_string'] = state_str
  51. payload = {
  52. 'client_id': os.environ['SPOTIFY_CLIENT_ID'],
  53. 'response_type': 'code',
  54. 'redirect_uri': 'http://localhost:8000/callback',
  55. 'state': state_str,
  56. 'scope': 'user-library-read',
  57. 'show_dialog': False
  58. }
  59. params = urllib.parse.urlencode(payload) # turn the payload dict into a query string
  60. authorize_url = "https://accounts.spotify.com/authorize/?{}".format(params)
  61. return redirect(authorize_url)
  62. # }}} login #
  63. # callback {{{ #
  64. def callback(request):
  65. # Attempt to retrieve the authorization code from the query string
  66. try:
  67. code = request.GET['code']
  68. except KeyError:
  69. return HttpResponseBadRequest("<h1>Problem with login</h1>")
  70. payload = {
  71. 'grant_type': 'authorization_code',
  72. 'code': code,
  73. 'redirect_uri': 'http://localhost:8000/callback',
  74. 'client_id': os.environ['SPOTIFY_CLIENT_ID'],
  75. 'client_secret': os.environ['SPOTIFY_CLIENT_SECRET'],
  76. }
  77. response = requests.post('https://accounts.spotify.com/api/token', data = payload).json()
  78. # despite its name, datetime.today() returns a datetime object, not a date object
  79. # use datetime.strptime() to get a datetime object from a string
  80. request.session['token_obtained_at'] = datetime.strftime(datetime.today(), TIME_FORMAT)
  81. request.session['access_token'] = response['access_token']
  82. request.session['refresh_token'] = response['refresh_token']
  83. request.session['valid_for'] = response['expires_in']
  84. # print(response)
  85. return redirect('user_data')
  86. # }}} callback #
  87. # user_data {{{ #
  88. def user_data(request):
  89. token_obtained_at = datetime.strptime(request.session['token_obtained_at'], TIME_FORMAT)
  90. valid_for = int(request.session['valid_for'])
  91. if token_expired(token_obtained_at, valid_for):
  92. req_body = {
  93. 'grant_type': 'refresh_token',
  94. 'refresh_token': request.session['refresh_token'],
  95. 'client_id': os.environ['SPOTIFY_CLIENT_ID'],
  96. 'client_secret': os.environ['SPOTIFY_CLIENT_SECRET']
  97. }
  98. refresh_token_response = requests.post('https://accounts.spotify.com/api/token', data = req_body).json()
  99. request.session['access_token'] = refresh_token_response['access_token']
  100. request.session['valid_for'] = refresh_token_response['expires_in']
  101. auth_token_str = "Bearer " + request.session['access_token']
  102. headers = {
  103. 'Authorization': auth_token_str
  104. }
  105. tracks_to_query = 5
  106. parse_library(headers, tracks_to_query)
  107. user_data_response = requests.get('https://api.spotify.com/v1/me', headers = headers).json()
  108. context = {
  109. 'user_name': user_data_response['display_name'],
  110. 'id': user_data_response['id'],
  111. 'genre_dict': library_stats['genres']
  112. }
  113. return render(request, 'spotifyvis/user_data.html', context)
  114. # }}} user_data #
  115. # parse_library {{{ #
  116. def parse_library(headers, tracks):
  117. """Scans user's library for certain number of tracks to update library_stats with.
  118. :headers: For API call.
  119. :tracks: Number of tracks to get from user's library.
  120. :returns: None
  121. """
  122. # TODO: implement importing entire library with 0 as tracks param
  123. # number of tracks to get with each call
  124. limit = 5
  125. # keeps track of point to get songs from
  126. offset = 0
  127. payload = {'limit': str(limit)}
  128. for _ in range(0, tracks, limit):
  129. payload['offset'] = str(offset)
  130. saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
  131. num_samples = offset
  132. for track_dict in saved_tracks_response['items']:
  133. # Track the number of samples for calculating
  134. # audio feature averages and standard deviations on the fly
  135. num_samples += 1
  136. get_track_info(track_dict['track'])
  137. # get_genre(headers, track_dict['track']['album']['id'])
  138. audio_features_dict = get_audio_features(headers, track_dict['track']['id'])
  139. for feature, feature_data in audio_features_dict.items():
  140. update_audio_feature_stats(feature, feature_data, num_samples)
  141. for artist_dict in track_dict['track']['artists']:
  142. increase_artist_count(headers, artist_dict['name'], artist_dict['id'])
  143. # calculates num_songs with offset + songs retrieved
  144. library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
  145. offset += limit
  146. calculate_genres_from_artists(headers)
  147. pprint.pprint(library_stats)
  148. # }}} parse_library #
  149. # get_audio_features {{{ #
  150. def get_audio_features(headers, track_id):
  151. """Returns the audio features of a soundtrack
  152. Args:
  153. headers: headers containing the API token
  154. track_id: the id of the soundtrack, needed to query the Spotify API
  155. Returns:
  156. A dictionary with the features as its keys
  157. """
  158. response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
  159. features_dict = {}
  160. # Data that we don't need
  161. useless_keys = [
  162. "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
  163. ]
  164. for key, val in response.items():
  165. if key not in useless_keys:
  166. features_dict[key] = val
  167. return features_dict
  168. # }}} get_audio_features #
  169. # update_std_dev {{{ #
  170. def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
  171. """Calculates the standard deviation for a sample without storing all data points
  172. Args:
  173. cur_mean: the current mean for N = (sample_size - 1)
  174. cur_std_dev: the current standard deviation for N = (sample_size - 1)
  175. new_data_point: a new data point
  176. sample_size: sample size including the new data point
  177. Returns:
  178. (new_mean, new_std_dev)
  179. """
  180. # This is an implementationof Welford's method
  181. # http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
  182. new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
  183. delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean)
  184. new_std_dev = math.sqrt(
  185. (math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / (
  186. sample_size - 1
  187. ))
  188. return new_mean, new_std_dev
  189. # }}} update_std_dev #
  190. # update_audio_feature_stats {{{ #
  191. def update_audio_feature_stats(feature, new_data_point, sample_size):
  192. """Updates the audio feature statistics in library_stats
  193. Args:
  194. feature: the audio feature to be updated (string)
  195. new_data_point: new data to update the stats with
  196. sample_size: sample size including the new data point
  197. Returns:
  198. None
  199. """
  200. # first time the feature is considered
  201. if sample_size < 2:
  202. library_stats['audio_features'][feature] = {
  203. "average": new_data_point,
  204. "std_dev": 0,
  205. }
  206. else:
  207. cur_mean = library_stats['audio_features'][feature]['average']
  208. cur_std_dev = library_stats['audio_features'][feature]['std_dev']
  209. new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size)
  210. library_stats['audio_features'][feature]['average'] = new_mean
  211. library_stats['audio_features'][feature]['std_dev'] = new_std_dev
  212. # }}} update_audio_feature_stats #
  213. # increase_nested_key {{{ #
  214. def increase_nested_key(top_key, nested_key, amount=1):
  215. """Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes
  216. appropriate action.
  217. :top_key: First key of library_stats.
  218. :nested_key: Key in top_key's dict for which we want to increase value of.
  219. :returns: None
  220. """
  221. if nested_key not in library_stats[top_key]:
  222. library_stats[top_key][nested_key] = amount
  223. else:
  224. library_stats[top_key][nested_key] += amount
  225. # }}} increase_nested_key #
  226. # increase_artist_count {{{ #
  227. def increase_artist_count(headers, artist_name, artist_id):
  228. """Increases count for artist in library_stats and stores the artist_id.
  229. :headers: For making the API call.
  230. :artist_name: Artist to increase count for.
  231. :artist_id: The Spotify ID for the artist.
  232. :returns: None
  233. """
  234. if artist_name not in library_stats['artists']:
  235. library_stats['artists'][artist_name] = {}
  236. library_stats['artists'][artist_name]['count'] = 1
  237. library_stats['artists'][artist_name]['id'] = artist_id
  238. else:
  239. library_stats['artists'][artist_name]['count'] += 1
  240. # }}} increase_artist_count #
  241. # get_track_info {{{ #
  242. def get_track_info(track_dict):
  243. """Get all the info from the track_dict directly returned by the API call in parse_library.
  244. :track_dict: Dict returned from the API call containing the track info.
  245. :returns: None
  246. """
  247. # popularity
  248. library_stats['popularity'].append(track_dict['popularity'])
  249. # year
  250. year_released = track_dict['album']['release_date'].split('-')[0]
  251. increase_nested_key('year_released', year_released)
  252. # artist
  253. # artist_names = [artist['name'] for artist in track_dict['artists']]
  254. # for artist_name in artist_names:
  255. # increase_nested_key('artists', artist_name)
  256. # runtime
  257. library_stats['total_runtime'] += float(track_dict['duration_ms']) / 60
  258. # }}} get_track_info #
  259. # calculate_genres_from_artists {{{ #
  260. def calculate_genres_from_artists(headers):
  261. """Tallies up genre counts based on artists in library_stats.
  262. :headers: For making the API call.
  263. :returns: None
  264. """
  265. for artist_entry in library_stats['artists'].values():
  266. artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_entry['id'], headers=headers).json()
  267. # increase each genre count by artist count
  268. # for genre in artist_response['genres']:
  269. # print(genre, end='')
  270. # increase_nested_key('genres', genre, artist_entry['count'])
  271. # print('')
  272. # only use first genre for simplicity right now
  273. if len(artist_response['genres']) > 0:
  274. print(artist_response['genres'][0])
  275. increase_nested_key('genres', artist_response['genres'][0], artist_entry['count'])
  276. # }}} calculate_genres_from_artists #