From 0ca846805c005d4361a5866e658f119f4027f69c Mon Sep 17 00:00:00 2001 From: Kevin Mok Date: Sat, 19 May 2018 21:19:35 -0400 Subject: [PATCH 1/3] Wrote parse_library and get_track_info (#1) Initialized library_stats with top-level keys and default values. --- .gitignore | 3 + MVPs.txt | 1 - sample-track-obj.json | 250 ++++++++++++++++++++++++++++++++++++++++++ spotifyvis/views.py | 100 ++++++++++++++++- 4 files changed, 352 insertions(+), 2 deletions(-) delete mode 100644 MVPs.txt create mode 100644 sample-track-obj.json diff --git a/.gitignore b/.gitignore index ffa6a85..5f9800c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ db.sqlite3 *.bak .idea/ .vscode/* + +spotify-keys.sh +Pipfile diff --git a/MVPs.txt b/MVPs.txt deleted file mode 100644 index bd44dea..0000000 --- a/MVPs.txt +++ /dev/null @@ -1 +0,0 @@ -- login to Spotify using API and console.log number of songs in library diff --git a/sample-track-obj.json b/sample-track-obj.json new file mode 100644 index 0000000..c3a24ed --- /dev/null +++ b/sample-track-obj.json @@ -0,0 +1,250 @@ +{ + 'added_at':'2018-05-18T19:16:36Z', + 'track':{ + 'album':{ + 'album_type':'single', + 'artists':[ + { + 'external_urls':{ + 'spotify':'https://open.spotify.com/artist/64KEffDW9EtZ1y2vBYgq8T' + }, + 'href':'https://api.spotify.com/v1/artists/64KEffDW9EtZ1y2vBYgq8T', + 'id':'64KEffDW9EtZ1y2vBYgq8T', + 'name':'Marshmello', + 'type':'artist', + 'uri':'spotify:artist:64KEffDW9EtZ1y2vBYgq8T' + }, + { + 'external_urls':{ + 'spotify':'https://open.spotify.com/artist/5gCRApTajqwbnHHPbr2Fpi' + }, + 'href':'https://api.spotify.com/v1/artists/5gCRApTajqwbnHHPbr2Fpi', + 'id':'5gCRApTajqwbnHHPbr2Fpi', + 'name':'Juicy J', + 'type':'artist', + 'uri':'spotify:artist:5gCRApTajqwbnHHPbr2Fpi' + }, + { + 'external_urls':{ + 'spotify':'https://open.spotify.com/artist/4IWBUUAFIplrNtaOHcJPRM' + }, + 'href':'https://api.spotify.com/v1/artists/4IWBUUAFIplrNtaOHcJPRM', + 'id':'4IWBUUAFIplrNtaOHcJPRM', + 'name':'James Arthur', + 'type':'artist', + 'uri':'spotify:artist:4IWBUUAFIplrNtaOHcJPRM' + } + ], + 'available_markets':[ + 'AD', + 'AR', + 'AT', + 'AU', + 'BE', + 'BG', + 'BO', + 'BR', + 'CA', + 'CH', + 'CL', + 'CO', + 'CR', + 'CY', + 'CZ', + 'DE', + 'DK', + 'DO', + 'EC', + 'EE', + 'ES', + 'FI', + 'FR', + 'GB', + 'GR', + 'GT', + 'HK', + 'HN', + 'HU', + 'ID', + 'IE', + 'IL', + 'IS', + 'IT', + 'JP', + 'LI', + 'LT', + 'LU', + 'LV', + 'MC', + 'MT', + 'MX', + 'MY', + 'NI', + 'NL', + 'NO', + 'NZ', + 'PA', + 'PE', + 'PH', + 'PL', + 'PT', + 'PY', + 'RO', + 'SE', + 'SG', + 'SK', + 'SV', + 'TH', + 'TR', + 'TW', + 'US', + 'UY', + 'VN', + 'ZA' + ], + 'external_urls':{ + 'spotify':'https://open.spotify.com/album/6TvqOieExu0IJb9Q1gOoCz' + }, + 'href':'https://api.spotify.com/v1/albums/6TvqOieExu0IJb9Q1gOoCz', + 'id':'6TvqOieExu0IJb9Q1gOoCz', + 'images':[ + { + 'height':640, + 'url':'https://i.scdn.co/image/b3556956b8e4881c85228ada91aa953e5c0458ef', + 'width':640 + }, + { + 'height':300, + 'url':'https://i.scdn.co/image/d76072f5ca739466bd27f42f3356fa1a38c6a92d', + 'width':300 + }, + { + 'height':64, + 'url':'https://i.scdn.co/image/bfd092dfa503566d9c9a3042f213fe02bed8a5cc', + 'width':64 + } + ], + 'name':'You Can Cry', + 'release_date':'2018-05-04', + 'release_date_precision':'day', + 'type':'album', + 'uri':'spotify:album:6TvqOieExu0IJb9Q1gOoCz' + }, + 'artists':[ + { + 'external_urls':{ + 'spotify':'https://open.spotify.com/artist/64KEffDW9EtZ1y2vBYgq8T' + }, + 'href':'https://api.spotify.com/v1/artists/64KEffDW9EtZ1y2vBYgq8T', + 'id':'64KEffDW9EtZ1y2vBYgq8T', + 'name':'Marshmello', + 'type':'artist', + 'uri':'spotify:artist:64KEffDW9EtZ1y2vBYgq8T' + }, + { + 'external_urls':{ + 'spotify':'https://open.spotify.com/artist/5gCRApTajqwbnHHPbr2Fpi' + }, + 'href':'https://api.spotify.com/v1/artists/5gCRApTajqwbnHHPbr2Fpi', + 'id':'5gCRApTajqwbnHHPbr2Fpi', + 'name':'Juicy J', + 'type':'artist', + 'uri':'spotify:artist:5gCRApTajqwbnHHPbr2Fpi' + }, + { + 'external_urls':{ + 'spotify':'https://open.spotify.com/artist/4IWBUUAFIplrNtaOHcJPRM' + }, + 'href':'https://api.spotify.com/v1/artists/4IWBUUAFIplrNtaOHcJPRM', + 'id':'4IWBUUAFIplrNtaOHcJPRM', + 'name':'James Arthur', + 'type':'artist', + 'uri':'spotify:artist:4IWBUUAFIplrNtaOHcJPRM' + } + ], + 'available_markets':[ + 'AD', + 'AR', + 'AT', + 'AU', + 'BE', + 'BG', + 'BO', + 'BR', + 'CA', + 'CH', + 'CL', + 'CO', + 'CR', + 'CY', + 'CZ', + 'DE', + 'DK', + 'DO', + 'EC', + 'EE', + 'ES', + 'FI', + 'FR', + 'GB', + 'GR', + 'GT', + 'HK', + 'HN', + 'HU', + 'ID', + 'IE', + 'IL', + 'IS', + 'IT', + 'JP', + 'LI', + 'LT', + 'LU', + 'LV', + 'MC', + 'MT', + 'MX', + 'MY', + 'NI', + 'NL', + 'NO', + 'NZ', + 'PA', + 'PE', + 'PH', + 'PL', + 'PT', + 'PY', + 'RO', + 'SE', + 'SG', + 'SK', + 'SV', + 'TH', + 'TR', + 'TW', + 'US', + 'UY', + 'VN', + 'ZA' + ], + 'disc_number':1, + 'duration_ms':194533, + 'explicit':False, + 'external_ids':{ + 'isrc':'USQX91800946' + }, + 'external_urls':{ + 'spotify':'https://open.spotify.com/track/3ZbJMlEL4Kcme0ONRO7Slx' + }, + 'href':'https://api.spotify.com/v1/tracks/3ZbJMlEL4Kcme0ONRO7Slx', + 'id':'3ZbJMlEL4Kcme0ONRO7Slx', + 'name':'You Can Cry', + 'popularity':81, + 'preview_url':'https://p.scdn.co/mp3-preview/6c31f3dee18a1e7c452ce9b6948a6e04aa7629d6?cid=aefd4e45060d4f9ba5bea0f6e6d36359', + 'track_number':1, + 'type':'track', + 'uri':'spotify:track:3ZbJMlEL4Kcme0ONRO7Slx' + } +} diff --git a/spotifyvis/views.py b/spotifyvis/views.py index 5a4ee80..d823ebd 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -5,9 +5,14 @@ import random import requests import os import urllib +import json +import pprint from datetime import datetime TIME_FORMAT = '%Y-%m-%d-%H-%M-%S' +library_stats = {"audio_features":{}, "genres":{}, "year_released":{}, "artists":{}, "num_songs":0, "popularity":[], "total_runtime":0} + +# generate_random_string {{{ # def generate_random_string(length): """Generates a random string of a certain length @@ -26,6 +31,9 @@ def generate_random_string(length): return rand_str +# }}} generate_random_string # + +# token_expired {{{ # def token_expired(token_obtained_at, valid_for): """Returns True if token expired, False if otherwise @@ -37,11 +45,17 @@ def token_expired(token_obtained_at, valid_for): time_elapsed = (datetime.today() - token_obtained_at).total_seconds() return time_elapsed >= valid_for +# }}} token_expired # + +# index {{{ # # Create your views here. def index(request): return render(request, 'spotifyvis/index.html') +# }}} index # + +# login {{{ # def login(request): @@ -62,6 +76,10 @@ def login(request): authorize_url = "https://accounts.spotify.com/authorize/?{}".format(params) return redirect(authorize_url) +# }}} login # + +# callback {{{ # + def callback(request): # Attempt to retrieve the authorization code from the query string try: @@ -88,6 +106,9 @@ def callback(request): return redirect('user_data') +# }}} callback # + +# user_data {{{ # def user_data(request): @@ -116,4 +137,81 @@ def user_data(request): 'user_name': user_data_response['display_name'], 'id': user_data_response['id'], } - return render(request, 'spotifyvis/user_data.html', context) \ No newline at end of file + + parse_library(headers, 4) + return render(request, 'spotifyvis/user_data.html', context) + +# }}} user_data # + +# parse_library {{{ # + +def parse_library(headers, tracks): + """Scans user's library for certain number of tracks to update library_stats with. + + :headers: For API call. + :tracks: Number of tracks to get from user's library. + :returns: None + + """ + # TODO: implement importing entire library with 0 as tracks param + # number of tracks to get with each call + limit = 2 + # keeps track of point to get songs from + offset = 0 + payload = {'limit': str(limit)} + for i in range(0, tracks, limit): + payload['offset'] = str(offset) + saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json() + for track_dict in saved_tracks_response['items']: + get_track_info(track_dict['track']) + # calculates num_songs with offset + songs retrieved + library_stats['num_songs'] = offset + len(saved_tracks_response['items']) + offset += limit + + pprint.pprint(library_stats) + +# }}} parse_library # + +# increase_nested_key {{{ # + +def increase_nested_key(top_key, nested_key): + """Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes + appropriate action. + + :top_key: First key of library_stats. + :nested_key: Key in top_key's dict for which we want to increase value of. + :returns: None + + """ + if nested_key not in library_stats[top_key]: + library_stats[top_key][nested_key] = 1 + else: + library_stats[top_key][nested_key] += 1 + +# }}} increase_nested_key # + +# get_track_info {{{ # + +def get_track_info(track_dict): + """Get all the info from the track_dict directly returned by the API call in parse_library. + + :track_dict: Dict returned from the API call containing the track info. + :returns: None + + """ + # popularity + library_stats['popularity'].append(track_dict['popularity']) + + # year + year_released = track_dict['album']['release_date'].split('-')[0] + increase_nested_key('year_released', year_released) + + # artist + artist_names = [artist['name'] for artist in track_dict['artists']] + for artist_name in artist_names: + increase_nested_key('artists', artist_name) + + # runtime + library_stats['total_runtime'] += float(track_dict['duration_ms']) / 60 + +# }}} get_track_info # From cb75a0b2bb576635e4733ea86ec130b72f1bde07 Mon Sep 17 00:00:00 2001 From: Kevin Mok Date: Sat, 19 May 2018 22:56:28 -0400 Subject: [PATCH 2/3] Implemented get_genre functionality (#1) Code actually in increase_artist_count. Updates genre counts with each track from the artist. --- spotifyvis/views.py | 73 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index d823ebd..93c0cf4 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -102,7 +102,7 @@ def callback(request): request.session['access_token'] = response['access_token'] request.session['refresh_token'] = response['refresh_token'] request.session['valid_for'] = response['expires_in'] - print(response) + # print(response) return redirect('user_data') @@ -111,7 +111,6 @@ def callback(request): # user_data {{{ # def user_data(request): - token_obtained_at = datetime.strptime(request.session['token_obtained_at'], TIME_FORMAT) valid_for = int(request.session['valid_for']) @@ -138,7 +137,8 @@ def user_data(request): 'id': user_data_response['id'], } - parse_library(headers, 4) + tracks_to_query = 50 + parse_library(headers, tracks_to_query) return render(request, 'spotifyvis/user_data.html', context) # }}} user_data # @@ -155,7 +155,7 @@ def parse_library(headers, tracks): """ # TODO: implement importing entire library with 0 as tracks param # number of tracks to get with each call - limit = 2 + limit = 50 # keeps track of point to get songs from offset = 0 payload = {'limit': str(limit)} @@ -164,6 +164,9 @@ def parse_library(headers, tracks): saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json() for track_dict in saved_tracks_response['items']: get_track_info(track_dict['track']) + # get_genre(headers, track_dict['track']['album']['id']) + for artist_dict in track_dict['track']['artists']: + increase_artist_count(headers, artist_dict['name'], artist_dict['id']) # calculates num_songs with offset + songs retrieved library_stats['num_songs'] = offset + len(saved_tracks_response['items']) offset += limit @@ -190,6 +193,32 @@ def increase_nested_key(top_key, nested_key): # }}} increase_nested_key # +# increase_artist_count {{{ # + +def increase_artist_count(headers, artist_name, artist_id): + """Increases count for artist and genre in library_stats. Also looks up genre of artist if new key. + + :headers: For making the API call. + :artist_name: Artist to increase count for. + :artist_id: The Spotify ID for the artist. + :returns: None + + """ + if artist_name not in library_stats['artists']: + library_stats['artists'][artist_name] = {} + library_stats['artists'][artist_name]['count'] = 1 + # set genres for artist + artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_id, headers=headers).json() + library_stats['artists'][artist_name]['genres'] = artist_response['genres'] + else: + library_stats['artists'][artist_name]['count'] += 1 + + # update genre counts + for genre in library_stats['artists'][artist_name]['genres']: + increase_nested_key('genres', genre) + +# }}} increase_artist_count # + # get_track_info {{{ # def get_track_info(track_dict): @@ -207,11 +236,41 @@ def get_track_info(track_dict): increase_nested_key('year_released', year_released) # artist - artist_names = [artist['name'] for artist in track_dict['artists']] - for artist_name in artist_names: - increase_nested_key('artists', artist_name) + # artist_names = [artist['name'] for artist in track_dict['artists']] + # for artist_name in artist_names: + # increase_nested_key('artists', artist_name) # runtime library_stats['total_runtime'] += float(track_dict['duration_ms']) / 60 # }}} get_track_info # + +# get_genre {{{ # + +# Deprecated. Will remove in next commit. I queried 300 albums and none of them had genres. +# The organization app gets the genre from the artist, and I've implemented other functions +# to do the same. +def get_genre(headers, album_id): + """Updates library_stats with this track's genre. + + :headers: For making the API call. + :album_id: The Spotify ID for the album. + :returns: None + + """ + album_response = requests.get('https://api.spotify.com/v1/albums/' + album_id, headers=headers).json() + pprint.pprint(album_response['genres']) + for genre in album_response['genres']: + # print(genre) + increase_nested_key('genres', genre); + +# }}} get_genre # + +# def calculate_genres_from_artists(headers): + # """Tallies up genre counts based on artists in library_stats. + + # :headers: For making the API call. + # :returns: None + + # """ + # album_response = requests.get('https://api.spotify.com/v1/albums/' + album_id, headers=headers).json() From f94a861b847bc23aa6108cdfcad3cefbf0e86c8c Mon Sep 17 00:00:00 2001 From: Kevin Mok Date: Sat, 19 May 2018 23:39:10 -0400 Subject: [PATCH 3/3] Calculate genre counts once for each artist (#1) Versus last commit that incremented genres every track. Also now only stores ID for artist rather than all their genres. --- spotifyvis/views.py | 53 +++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/spotifyvis/views.py b/spotifyvis/views.py index 93c0cf4..b83e67e 100644 --- a/spotifyvis/views.py +++ b/spotifyvis/views.py @@ -137,7 +137,7 @@ def user_data(request): 'id': user_data_response['id'], } - tracks_to_query = 50 + tracks_to_query = 5 parse_library(headers, tracks_to_query) return render(request, 'spotifyvis/user_data.html', context) @@ -155,7 +155,7 @@ def parse_library(headers, tracks): """ # TODO: implement importing entire library with 0 as tracks param # number of tracks to get with each call - limit = 50 + limit = 5 # keeps track of point to get songs from offset = 0 payload = {'limit': str(limit)} @@ -170,14 +170,14 @@ def parse_library(headers, tracks): # calculates num_songs with offset + songs retrieved library_stats['num_songs'] = offset + len(saved_tracks_response['items']) offset += limit - + calculate_genres_from_artists(headers) pprint.pprint(library_stats) # }}} parse_library # # increase_nested_key {{{ # -def increase_nested_key(top_key, nested_key): +def increase_nested_key(top_key, nested_key, amount=1): """Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes appropriate action. @@ -187,16 +187,16 @@ def increase_nested_key(top_key, nested_key): """ if nested_key not in library_stats[top_key]: - library_stats[top_key][nested_key] = 1 + library_stats[top_key][nested_key] = amount else: - library_stats[top_key][nested_key] += 1 + library_stats[top_key][nested_key] += amount # }}} increase_nested_key # # increase_artist_count {{{ # def increase_artist_count(headers, artist_name, artist_id): - """Increases count for artist and genre in library_stats. Also looks up genre of artist if new key. + """Increases count for artist in library_stats and stores the artist_id. :headers: For making the API call. :artist_name: Artist to increase count for. @@ -207,16 +207,10 @@ def increase_artist_count(headers, artist_name, artist_id): if artist_name not in library_stats['artists']: library_stats['artists'][artist_name] = {} library_stats['artists'][artist_name]['count'] = 1 - # set genres for artist - artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_id, headers=headers).json() - library_stats['artists'][artist_name]['genres'] = artist_response['genres'] + library_stats['artists'][artist_name]['id'] = artist_id else: library_stats['artists'][artist_name]['count'] += 1 - # update genre counts - for genre in library_stats['artists'][artist_name]['genres']: - increase_nested_key('genres', genre) - # }}} increase_artist_count # # get_track_info {{{ # @@ -245,32 +239,19 @@ def get_track_info(track_dict): # }}} get_track_info # -# get_genre {{{ # +# calculate_genres_from_artists {{{ # -# Deprecated. Will remove in next commit. I queried 300 albums and none of them had genres. -# The organization app gets the genre from the artist, and I've implemented other functions -# to do the same. -def get_genre(headers, album_id): - """Updates library_stats with this track's genre. +def calculate_genres_from_artists(headers): + """Tallies up genre counts based on artists in library_stats. :headers: For making the API call. - :album_id: The Spotify ID for the album. :returns: None """ - album_response = requests.get('https://api.spotify.com/v1/albums/' + album_id, headers=headers).json() - pprint.pprint(album_response['genres']) - for genre in album_response['genres']: - # print(genre) - increase_nested_key('genres', genre); - -# }}} get_genre # - -# def calculate_genres_from_artists(headers): - # """Tallies up genre counts based on artists in library_stats. - - # :headers: For making the API call. - # :returns: None + for artist_entry in library_stats['artists'].values(): + artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_entry['id'], headers=headers).json() + # increase each genre count by artist count + for genre in artist_response['genres']: + increase_nested_key('genres', genre, artist_entry['count']) - # """ - # album_response = requests.get('https://api.spotify.com/v1/albums/' + album_id, headers=headers).json() +# }}} calculate_genres_from_artists #