Merge branch 'chris/wip' into vis-page

This commit is contained in:
2018-11-17 21:34:14 -05:00
10 changed files with 249 additions and 113 deletions

View File

@@ -1,8 +1,9 @@
from django.contrib import admin
from .models import Track, Artist, AudioFeatures, User
from .models import Track, Artist, AudioFeatures, User, Genre
# Register your models here.
admin.site.register(Track)
admin.site.register(Artist)
admin.site.register(AudioFeatures)
admin.site.register(User)
admin.site.register(Genre)

View File

@@ -30,6 +30,8 @@ class Artist(models.Model):
id = models.CharField(primary_key=True, max_length=MAX_ID)
name = models.CharField(max_length=50)
genres = models.ManyToManyField(Genre, blank=True)
# genre = models.ForeignKey(Genre, on_delete=models.CASCADE, blank=True,
# null=True)
def __str__(self):
return self.name
@@ -52,7 +54,7 @@ class Track(models.Model):
name = models.CharField(max_length=200)
users = models.ManyToManyField(User, blank=True)
genre = models.ForeignKey(Genre, on_delete=models.CASCADE, blank=True,
null=True)
null=True)
def __str__(self):
track_str = "{}, genre: {}, artists: [".format(self.name, self.genre)

89
api/tests.py Normal file
View File

@@ -0,0 +1,89 @@
from django.test import TestCase
from api.models import Track, Genre, Artist
from login.models import User
from api import utils
import math
import pprint
class GenreDataTestCase(TestCase):
@classmethod
def setUpTestData(cls):
test_user = User.objects.create(id="chrisshi", refresh_token="blah", access_token="blah",
access_expires_in=10)
genre = Genre.objects.create(name="classical", num_songs=3)
artist_1 = Artist.objects.create(id='art1', name="Beethoven")
artist_2 = Artist.objects.create(id='art2', name="Mozart")
artist_3 = Artist.objects.create(id='art3', name='Chopin')
track_1 = Track.objects.create(id='track1', year=2013,
popularity=5, runtime=20,
name='concerto1',
genre=genre)
track_1.users.add(test_user)
track_1.artists.add(artist_1)
track_1.artists.add(artist_2)
track_2 = Track.objects.create(id='track2', year=2013,
popularity=5, runtime=20,
name='concerto2',
genre=genre)
track_2.users.add(test_user)
track_2.artists.add(artist_2)
track_2.artists.add(artist_3)
track_2.artists.add(artist_1)
track_3 = Track.objects.create(id='track3', year=2013,
popularity=5, runtime=20,
name='concerto3',
genre=genre)
track_3.users.add(test_user)
track_3.artists.add(artist_1)
track_3.artists.add(artist_3)
pop_genre = Genre.objects.create(name='pop', num_songs=3)
pop_artist1 = Artist.objects.create(id='art4', name="Taylor Swift")
pop_artist2 = Artist.objects.create(id='art5', name="Justin Bieber")
pop_artist3 = Artist.objects.create(id='art6', name="Rihanna")
pop_track_1 = Track.objects.create(id='track4', year=2013,
popularity=5, runtime=20,
name='poptrack1',
genre=pop_genre)
pop_track_1.users.add(test_user)
pop_track_1.artists.add(pop_artist1)
pop_track_1.artists.add(pop_artist2)
pop_track_2 = Track.objects.create(id='track5', year=2013,
popularity=5, runtime=20,
name='poptrack2',
genre=pop_genre)
pop_track_2.users.add(test_user)
pop_track_2.artists.add(pop_artist3)
pop_track_2.artists.add(pop_artist2)
pop_track_2.artists.add(pop_artist1)
pop_track_3 = Track.objects.create(id='track6', year=2013,
popularity=5, runtime=20,
name='poptrack3',
genre=pop_genre)
pop_track_3.users.add(test_user)
pop_track_3.artists.add(pop_artist3)
pop_track_3.artists.add(pop_artist2)
pop_track_3.artists.add(pop_artist1)
def test_get_artist_counts_two_genres(self):
test_user = User.objects.get(id='chrisshi')
artist_counts = utils.get_artists_in_genre(test_user, 'classical')
# pprint.pprint(artist_counts)
self.assertTrue(math.isclose(artist_counts['Beethoven'], 1.3, rel_tol=0.05))
self.assertTrue(math.isclose(artist_counts['Mozart'], 0.85, rel_tol=0.05))
self.assertTrue(math.isclose(artist_counts['Chopin'], 0.85, rel_tol=0.05))
self.assertTrue(math.isclose(sum(artist_counts.values()), 3, rel_tol=0.01))
# test the pop genre
artist_counts = utils.get_artists_in_genre(test_user, 'pop')
self.assertTrue(math.isclose(artist_counts['Taylor Swift'], 1.125, rel_tol=0.05))
self.assertTrue(math.isclose(artist_counts['Justin Bieber'], 1.125, rel_tol=0.05))
self.assertTrue(math.isclose(artist_counts['Rihanna'], 0.75, rel_tol=0.05))
self.assertTrue(math.isclose(sum(artist_counts.values()), 3, rel_tol=0.01))

View File

@@ -14,6 +14,8 @@ from login.models import User
from pprint import pprint
from dateutil.parser import parse
from datetime import datetime
from django.db.models import FloatField
from django.db.models.functions import Cast
HISTORY_ENDPOINT = 'https://api.spotify.com/v1/me/player/recently-played'
@@ -39,16 +41,13 @@ def update_track_genres(user_obj):
user_tracks = Track.objects.filter(users__exact=user_obj)
for track in user_tracks:
# just using this variable to save another call to db
track_artists = track.artists.all()
# set genres to first artist's genres then find intersection with others
shared_genres = track_artists.first().genres.all()
for artist in track_artists:
shared_genres = shared_genres.intersection(artist.genres.all())
shared_genres = shared_genres.order_by('-num_songs')
track_artists = list(track.artists.all())
# TODO: Use the most popular genre of the first artist as the Track genre
first_artist_genres = track_artists[0].genres.all().order_by('-num_songs')
undefined_genre_obj = Genre.objects.get(name="undefined")
most_common_genre = shared_genres.first() if shared_genres.first() is \
not undefined_genre_obj else shared_genres[1]
most_common_genre = first_artist_genres.first() if first_artist_genres.first() is \
not undefined_genre_obj else first_artist_genres[1]
track.genre = most_common_genre if most_common_genre is not None \
else undefined_genre_obj
track.save()
@@ -153,16 +152,15 @@ def get_audio_features(headers, track_objs):
# process_artist_genre {{{ #
def process_artist_genre(genre_name, artist_obj):
"""Increase count for correspoding Genre object to genre_name and add that
Genre to artist_obj.
"""Increase count for corresponding Genre object to genre_name and associate that
Genre object with artist_obj.
:genre_name: Name of genre.
:artist_obj: Artist object to add Genre object to.
:artist_obj: Artist object to associate Genre object with
:returns: None
"""
genre_obj, created = Genre.objects.get_or_create(name=genre_name,
defaults={'num_songs':1})
genre_obj, created = Genre.objects.get_or_create(name=genre_name, defaults={'num_songs': 1})
if not created:
genre_obj.num_songs = F('num_songs') + 1
genre_obj.save()
@@ -185,7 +183,6 @@ def add_artist_genres(headers, artist_objs):
"""
artist_ids = str.join(",", [artist_obj.id for artist_obj in artist_objs])
params = {'ids': artist_ids}
artists_response = requests.get('https://api.spotify.com/v1/artists/',
headers=headers,
params={'ids': artist_ids},
@@ -208,34 +205,26 @@ def add_artist_genres(headers, artist_objs):
# get_artists_in_genre {{{ #
def get_artists_in_genre(user, genre, max_songs):
def get_artists_in_genre(user, genre):
"""Return count of artists in genre.
:user: User object to return data for.
:genre: genre to count artists for.
:max_songs: max total songs to include to prevent overflow due to having
multiple artists on each track.
:genre: genre to count artists for. (string)
:returns: dict of artists in the genre along with the number of songs they
have.
"""
genre_obj = Genre.objects.get(name=genre)
artist_counts = (Artist.objects.filter(track__users=user)
.filter(genres=genre_obj)
.annotate(num_songs=Count('track', distinct=True))
.order_by('-num_songs')
)
tracks_in_genre = Track.objects.filter(genre=genre_obj, users=user)
track_count = tracks_in_genre.count()
user_artists = Artist.objects.filter(track__users=user) # use this variable to save on db queries
total_artist_counts = tracks_in_genre.aggregate(counts=Count('artists'))['counts']
processed_artist_counts = {}
songs_added = 0
for artist in artist_counts:
# hacky way to not have total count overflow due to there being multiple
# artists on a track
if songs_added + artist.num_songs <= max_songs:
processed_artist_counts[artist.name] = artist.num_songs
songs_added += artist.num_songs
# processed_artist_counts = [{'name': artist.name, 'num_songs': artist.num_songs} for artist in artist_counts]
# processed_artist_counts = {artist.name: artist.num_songs for artist in artist_counts}
# pprint.pprint(processed_artist_counts)
for artist in user_artists:
processed_artist_counts[artist.name] = round(artist.track_set
.filter(genre=genre_obj, users=user)
.count() * track_count / total_artist_counts, 2)
return processed_artist_counts
# }}} get_artists_in_genre #

View File

@@ -33,9 +33,8 @@ FEATURES_LIMIT = 100
# FEATURES_LIMIT = 25
TRACKS_TO_QUERY = 100
TRACKS_ENDPOINT = 'https://api.spotify.com/v1/tracks'
console_logging = True
# console_logging = False
CONSOLE_LOGGING = True
# CONSOLE_LOGGING = False
# }}} constants #
@@ -66,9 +65,7 @@ def parse_library(request, user_secret):
headers=user_headers,
params=payload).json()['items']
if console_logging:
tracks_processed = 0
tracks_processed = 0
for track_dict in saved_tracks_response:
track_artists = save_track_artists(track_dict['track'], artist_genre_queue,
user_headers)
@@ -76,7 +73,7 @@ def parse_library(request, user_secret):
track_artists, user_obj)
# add audio features {{{ #
# if a new track is not created, the associated audio feature does
# not need to be created again
if track_created:
@@ -87,7 +84,7 @@ def parse_library(request, user_secret):
# }}} add audio features #
if console_logging:
if CONSOLE_LOGGING:
tracks_processed += 1
print("Added track #{}: {} - {}".format(
offset + tracks_processed,
@@ -99,7 +96,7 @@ def parse_library(request, user_secret):
offset += USER_TRACKS_LIMIT
# clean-up {{{ #
# update remaining artists without genres and songs without features if
# there are any
if len(artist_genre_queue) > 0:
@@ -177,19 +174,39 @@ def get_audio_feature_data(request, audio_feature, user_secret):
# get_genre_data {{{ #
def get_genre_data(request, user_secret):
"""Return genre data needed to create the graph user.
"""Return genre data needed to create the graph
TODO
"""
user = User.objects.get(secret=user_secret)
genre_counts = (Track.objects.filter(users__exact=user)
.values('genre')
.order_by('genre')
.annotate(num_songs=Count('genre'))
)
.values('genre')
.order_by('genre')
# annotates each genre and not each Track, due to the earlier values() call
.annotate(num_songs=Count('genre'))
)
# genre_counts is a QuerySet with the format
# [{'genre': 'classical', 'num_songs': 100}, {'genre': 'pop', 'num_songs': 50}...]
for genre_dict in genre_counts:
genre_dict['artists'] = get_artists_in_genre(user, genre_dict['genre'],
genre_dict['num_songs'])
genre_dict['artists'] = get_artists_in_genre(user, genre_dict['genre'])
'''
Now genre_counts has the format
[
{'genre': 'classical',
'num_songs': 100,
'artists': {
'Helene Grimaud': 40.5,
'Beethoven': 31.2,
'Mozart': 22...
}
},
{'genre': 'pop',
'num_songs': 150,
'artists': {...}
},...
]
'''
print("*** Genre Breakdown ***")
pprint(list(genre_counts))
return JsonResponse(data=list(genre_counts), safe=False)
@@ -258,7 +275,7 @@ def import_history(request, upload_id):
history_obj = save_history_obj(upload_obj.user, timestamp,
track_obj)
if console_logging:
if CONSOLE_LOGGING:
print("Processed row #{}: {}".format(
(rows_read - TRACKS_LIMIT) + responses_processed, history_obj,))
responses_processed += 1