Use "several" API call to get audio features

Closes #20. Also close #38 since parse_library is a bit cleaner now with some folds, and there's not much to be refactored any more.
2018-06-27 00:34:38 -04:00
parent bd597cdc23
commit 3fe52343aa
1 changed files with 67 additions and 34 deletions
--- a/spotifyvis/utils.py
+++ b/spotifyvis/utils.py
@@ -14,7 +14,8 @@ import json
 USER_TRACKS_LIMIT = 50
 #  ARTIST_LIMIT = 50
 ARTIST_LIMIT = 25
-FEATURES_LIMIT = 100
+#  FEATURES_LIMIT = 100
 FEATURES_LIMIT = 25
 #  parse_library {{{ # 
@@ -33,61 +34,66 @@ def parse_library(headers, tracks, user):
    offset = 0
    payload = {'limit': str(USER_TRACKS_LIMIT)}
    artist_genre_queue = []
    features_queue = []
    # iterate until hit requested num of tracks
    for _ in range(0, tracks, USER_TRACKS_LIMIT):
        payload['offset'] = str(offset)
-        # get current set of tracks
+        saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', 
-        saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
+                headers=headers,
                params=payload).json()
        # TODO: refactor the for loop body into helper function
        # iterate through each track
        for track_dict in saved_tracks_response['items']:
            #  add artists {{{ # 
            # update artist info before track so that Track object can reference
            # Artist object
            track_artists = []
            for artist_dict in track_dict['track']['artists']:
                artist_obj, artist_created = Artist.objects.get_or_create(
-                    artist_id=artist_dict['id'],
+                        artist_id=artist_dict['id'],
-                    name=artist_dict['name'],
+                        name=artist_dict['name'],)
-                    )
+                # only add/tally up artist genres if new
                if artist_created:
                    artist_genre_queue.append(artist_obj)
                    if len(artist_genre_queue) == ARTIST_LIMIT:
                        add_artist_genres(headers, artist_genre_queue)
                        artist_genre_queue = []
                #  update_artist_genre(headers, artist_obj)
                # get_or_create() returns a tuple (obj, created)
                track_artists.append(artist_obj)
-            #  top_genre = get_top_genre(headers,
+            #  }}} add artists # 
-                    #  track_dict['track']['artists'][0]['id'])
+            
            # WIP: get most common genre
            top_genre = ""
            track_obj, track_created = save_track_obj(track_dict['track'], 
                    track_artists, top_genre, user)
-            # if a new track is not created, the associated audio feature does not need to be created again
+            #  add audio features {{{ # 
-            #  if track_created:
+            
-            save_audio_features(headers, track_dict['track']['id'], track_obj)
+            # if a new track is not created, the associated audio feature does
-            """
+            # not need to be created again
-            TODO: Put this logic in another function
+            if track_created:
-            # Audio analysis could be empty if not present in Spotify database
+                features_queue.append(track_obj)
-            if len(audio_features_dict) != 0:
+                if len(features_queue) == FEATURES_LIMIT:
-                # Track the number of audio analyses for calculating
+                    get_audio_features(headers, features_queue)
-                # audio feature averages and standard deviations on the fly
+                    features_queue = []
-                feature_data_points += 1
+            
-                for feature, feature_data in audio_features_dict.items():
+            #  }}} add audio features # 
-                    update_audio_feature_stats(feature, feature_data, 
+
                            feature_data_points, library_stats)
            """
        # calculates num_songs with offset + songs retrieved
        offset += USER_TRACKS_LIMIT
    #  pprint.pprint(library_stats)
-    # update artists left in queue since there will be probably be leftover
+    #  clean-up {{{ # 
-    # artists that didn't hit ARTIST_LIMIT
+    
-    add_artist_genres(headers, artist_genre_queue)
+    # update remaining artists without genres and songs without features if
    # there are any
    if len(artist_genre_queue) > 0:
        add_artist_genres(headers, artist_genre_queue)
    if len(features_queue) > 0:
        get_audio_features(headers, features_queue)
    #  }}} clean-up # 
    update_track_genres(user)
 #  }}} parse_library # 
@@ -118,6 +124,7 @@ def save_track_obj(track_dict, artists, top_genre, user):
    :artists: artists of the song, passed in as a list of Artist objects.
    :top_genre: top genre associated with this track (see get_top_genre).
    :user: User object for which this Track is to be associated with.
    :returns: (The created/retrieved Track object, created) 
    """
@@ -144,6 +151,33 @@ def save_track_obj(track_dict, artists, top_genre, user):
 #  }}} save_track_obj # 
 def get_audio_features(headers, track_objs):
    """Creates and saves a new AudioFeatures objects for the respective
    track_objs. track_objs should contain the API limit for a single call
    (FEATURES_LIMIT) for maximum efficiency.
    :headers: headers containing the API token
    :track_objs: Track objects to associate with the new AudioFeatures object
    :returns: None
    """
    track_ids = str.join(",", [track_obj.track_id for track_obj in track_objs])
    params = {'ids': track_ids}
    features_response = requests.get("https://api.spotify.com/v1/audio-features",
            headers=headers,params=params).json()['audio_features']
    #  pprint.pprint(features_response)
    useless_keys = [ "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature", ]
    for i in range(len(track_objs)):
        if features_response[i] is not None:
            # Data that we don't need
            cur_features_obj = AudioFeatures()
            cur_features_obj.track = track_objs[i]
            for key, val in features_response[i].items():
                if key not in useless_keys:
                    setattr(cur_features_obj, key, val)
            cur_features_obj.save()
 #  get_audio_features {{{ # 
 def save_audio_features(headers, track_id, track):
@@ -157,8 +191,6 @@ def save_audio_features(headers, track_id, track):
    """
    response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
    if track_id is '5S1IUPueD0xE0vj4zU3nSf':
        pprint.pprint(response)
    if 'error' in response:
        return
@@ -383,10 +415,11 @@ def add_artist_genres(headers, artist_objs):
    """
    artist_ids = str.join(",", [artist_obj.artist_id for artist_obj in artist_objs])
-    #  print(len(artist_objs), artist_ids)
+    print(len(artist_objs), artist_ids)
    params = {'ids': artist_ids}
    artists_response = requests.get('https://api.spotify.com/v1/artists/',
            headers=headers, params=params).json()['artists']
    #  pprint.pprint(artists_response)
    for i in range(len(artist_objs)):
        for genre in artists_response[i]['genres']:
            genre_obj, created = Genre.objects.get_or_create(name=genre,