From fc6c30ec32f488c33c0f3e2114886c5de8b19af9 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sat, 30 Jun 2018 17:59:34 -0400 Subject: [PATCH 1/6] Refactor audio feat graph Fixes #51. Instead of passing in an array of hard coded values as the interval end points, an object specifying the beginning, the end, and the step size is used. Categories can be more easily defined and modified this way. --- api/utils.py | 2 +- api/views.py | 3 +- .../static/graphs/scripts/audio_feat_graph.js | 29 +++++++++++++------ graphs/templates/graphs/features_graphs.html | 16 +++++----- reset_db.sh | 16 +++++----- 5 files changed, 39 insertions(+), 27 deletions(-) diff --git a/api/utils.py b/api/utils.py index d9db42e..ef0c99f 100644 --- a/api/utils.py +++ b/api/utils.py @@ -170,7 +170,7 @@ def add_artist_genres(headers, artist_objs): params = {'ids': artist_ids} artists_response = requests.get('https://api.spotify.com/v1/artists/', headers=headers, - params={'ids': artist_ids}, + params=params, ).json()['artists'] for i in range(len(artist_objs)): if len(artists_response[i]['genres']) == 0: diff --git a/api/views.py b/api/views.py index 5a482d2..2f8e96c 100644 --- a/api/views.py +++ b/api/views.py @@ -166,8 +166,9 @@ def get_audio_feature_data(request, audio_feature, user_secret): # get_genre_data {{{ # + def get_genre_data(request, user_secret): - """Return genre data needed to create the graph user. + """Return genre data needed to create the graph TODO """ user = User.objects.get(secret=user_secret) diff --git a/graphs/static/graphs/scripts/audio_feat_graph.js b/graphs/static/graphs/scripts/audio_feat_graph.js index 1fab293..2b3d97e 100644 --- a/graphs/static/graphs/scripts/audio_feat_graph.js +++ b/graphs/static/graphs/scripts/audio_feat_graph.js @@ -3,10 +3,10 @@ * a designated parent element * * @param audioFeature: the name of the audio feature (string) - * @param intervalEndPoints: a sorted array of 5 real numbers defining the intervals (categories) of values, + * @param intervalEndPoints: a object defining the intervals (categories) of values, * for example: - * [0, 0.25, 0.5, 0.75, 1.0] for instrumentalness would define ranges - * (0-0.25), (0.25-0.5), (0.5-0.75), (0.75-1.0) + * {begin: 0, end: 1.0, step: 0.25} for instrumentalness would define ranges + * [0-0.25), [0.25-0.5), [0.5-0.75), [0.75-1.0] * @param parentElem: the DOM element to append the graph to (a selector string) * @param userSecret: the user secret string for identification * @return None @@ -18,11 +18,20 @@ function drawAudioFeatGraph(audioFeature, intervalEndPoints, parentElem, userSec height = 270 - margin.top - margin.bottom; let featureData = {}; + let currentEndPoint = intervalEndPoints.begin; // start at beginning // Create the keys first in order - for (let index = 0; index < intervalEndPoints.length - 1; index++) { - let key = `${intervalEndPoints[index]} ~ ${intervalEndPoints[index + 1]}`; + while (currentEndPoint !== intervalEndPoints.end) { + let startOfRange = currentEndPoint; + let endOfRange = startOfRange + intervalEndPoints.step; + + let key = `${startOfRange} ~ ${endOfRange}`; featureData[key] = 0; + currentEndPoint = endOfRange; } + // for (let index = 0; index < intervalEndPoints.length - 1; index++) { + // let key = `${intervalEndPoints[index]} ~ ${intervalEndPoints[index + 1]}`; + // featureData[key] = 0; + // } // define the vertical scaling function let vScale = d3.scaleLinear().range([height, 0]); @@ -31,12 +40,14 @@ function drawAudioFeatGraph(audioFeature, intervalEndPoints, parentElem, userSec // categorize the data points for (let dataPoint of response.data_points) { dataPoint = parseFloat(dataPoint); - let index = intervalEndPoints.length - 2; + let currLowerBound = intervalEndPoints.end - intervalEndPoints.step; + let stepSize = intervalEndPoints.step; // find the index of the first element greater than dataPoint - while (dataPoint < intervalEndPoints[index]) { - index -= 1; + while (dataPoint < currLowerBound) { + currLowerBound -= stepSize; } - let key = `${intervalEndPoints[index]} ~ ${intervalEndPoints[index + 1]}`; + let upperBound = currLowerBound + stepSize; + let key = `${currLowerBound} ~ ${upperBound}`; featureData[key] += 1; } diff --git a/graphs/templates/graphs/features_graphs.html b/graphs/templates/graphs/features_graphs.html index ea01793..d8273af 100644 --- a/graphs/templates/graphs/features_graphs.html +++ b/graphs/templates/graphs/features_graphs.html @@ -24,14 +24,14 @@ diff --git a/reset_db.sh b/reset_db.sh index b833b8a..b4805d6 100755 --- a/reset_db.sh +++ b/reset_db.sh @@ -1,15 +1,15 @@ # check if in virtual environment # https://stackoverflow.com/questions/15454174/how-can-a-shell-function-know-if-it-is-running-within-a-virtualenv/15454916 -# python -c 'import sys; print(sys.real_prefix)' 2>/dev/null && INVENV=1 || INVENV=0 +python -c 'import sys; print(sys.real_prefix)' 2>/dev/null && INVENV=1 || INVENV=0 # INVENV=$(python -c 'import sys; print ("1" if hasattr(sys, "real_prefix") else "0")') # if $INVENV is 1, then in virtualenv # echo $INVENV -# if [ $INVENV -eq 1 ]; then -rm login/migrations/0* api/migrations/0* -sudo -u postgres psql -f reset_db.sql -python manage.py makemigrations -python manage.py migrate -python manage.py runserver -# fi +if [ $INVENV -eq 1 ]; then + rm login/migrations/0* api/migrations/0* + sudo -u postgres psql -f reset_db.sql + python manage.py makemigrations login api + python manage.py migrate + python manage.py runserver +fi From 01759c59b304cb1faffaf2eb172a56c7d4bccd6e Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sat, 30 Jun 2018 18:51:52 -0400 Subject: [PATCH 2/6] Fix floating point precision issue in audio feat The last commit (fc6c30ec32f488c33c0f3e2114886c5de8b19af9) was affected by a floating point addition/subtraction precision bug. The bug caused nonsensical categories to appear on the audio feature bar charts. Now fixed. --- .../static/graphs/scripts/audio_feat_graph.js | 22 ++++++++++++++----- graphs/templates/graphs/features_graphs.html | 14 ++++++------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/graphs/static/graphs/scripts/audio_feat_graph.js b/graphs/static/graphs/scripts/audio_feat_graph.js index 2b3d97e..5be85fb 100644 --- a/graphs/static/graphs/scripts/audio_feat_graph.js +++ b/graphs/static/graphs/scripts/audio_feat_graph.js @@ -20,9 +20,9 @@ function drawAudioFeatGraph(audioFeature, intervalEndPoints, parentElem, userSec let featureData = {}; let currentEndPoint = intervalEndPoints.begin; // start at beginning // Create the keys first in order - while (currentEndPoint !== intervalEndPoints.end) { + while (currentEndPoint < intervalEndPoints.end) { let startOfRange = currentEndPoint; - let endOfRange = startOfRange + intervalEndPoints.step; + let endOfRange = precise(startOfRange + intervalEndPoints.step); let key = `${startOfRange} ~ ${endOfRange}`; featureData[key] = 0; @@ -40,13 +40,14 @@ function drawAudioFeatGraph(audioFeature, intervalEndPoints, parentElem, userSec // categorize the data points for (let dataPoint of response.data_points) { dataPoint = parseFloat(dataPoint); - let currLowerBound = intervalEndPoints.end - intervalEndPoints.step; + let currLowerBound = precise(intervalEndPoints.end - intervalEndPoints.step); let stepSize = intervalEndPoints.step; // find the index of the first element greater than dataPoint - while (dataPoint < currLowerBound) { - currLowerBound -= stepSize; + while (dataPoint < currLowerBound && currLowerBound >= intervalEndPoints.begin) { + currLowerBound = precise(currLowerBound - stepSize); } - let upperBound = currLowerBound + stepSize; + let upperBound = precise(currLowerBound + stepSize); + currLowerBound = precise(currLowerBound); let key = `${currLowerBound} ~ ${upperBound}`; featureData[key] += 1; } @@ -113,4 +114,13 @@ function drawAudioFeatGraph(audioFeature, intervalEndPoints, parentElem, userSec */ function capFeatureStr(audioFeature) { return audioFeature.charAt(0).toUpperCase() + audioFeature.slice(1); +} + +/** + * Converts a number to a floating point value with 2 significant figures + * @param number: the number to be converted + * @returns the input converted to two significant digits + */ +function precise(number) { + return Number.parseFloat(number.toPrecision(2)); } \ No newline at end of file diff --git a/graphs/templates/graphs/features_graphs.html b/graphs/templates/graphs/features_graphs.html index d8273af..cf8f01a 100644 --- a/graphs/templates/graphs/features_graphs.html +++ b/graphs/templates/graphs/features_graphs.html @@ -24,14 +24,14 @@ From 6665532feaac3ab8fc300862f1f53ff6e71e05a4 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Sun, 1 Jul 2018 21:09:30 -0400 Subject: [PATCH 3/6] Made minor fixes to docstrings --- api/utils.py | 9 ++++----- api/views.py | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/api/utils.py b/api/utils.py index ef0c99f..f2c16b3 100644 --- a/api/utils.py +++ b/api/utils.py @@ -135,11 +135,11 @@ def get_audio_features(headers, track_objs): # process_artist_genre {{{ # def process_artist_genre(genre_name, artist_obj): - """Increase count for correspoding Genre object to genre_name and add that - Genre to artist_obj. + """Increase count for corresponding Genre object to genre_name and associate that + Genre object with artist_obj. :genre_name: Name of genre. - :artist_obj: Artist object to add Genre object to. + :artist_obj: Artist object to associate Genre object with :returns: None """ @@ -167,10 +167,9 @@ def add_artist_genres(headers, artist_objs): """ artist_ids = str.join(",", [artist_obj.id for artist_obj in artist_objs]) - params = {'ids': artist_ids} artists_response = requests.get('https://api.spotify.com/v1/artists/', headers=headers, - params=params, + params={'ids': artist_ids}, ).json()['artists'] for i in range(len(artist_objs)): if len(artists_response[i]['genres']) == 0: diff --git a/api/views.py b/api/views.py index 2f8e96c..5e44c3f 100644 --- a/api/views.py +++ b/api/views.py @@ -56,6 +56,7 @@ def parse_library(request, user_secret): if console_logging: tracks_processed = 0 + tracks_processed = 0 for track_dict in saved_tracks_response: # add artists {{{ # @@ -103,7 +104,7 @@ def parse_library(request, user_secret): offset += USER_TRACKS_LIMIT # clean-up {{{ # - + # update remaining artists without genres and songs without features if # there are any if len(artist_genre_queue) > 0: From 54c541426c029433dc6e05ce2499eeec47634157 Mon Sep 17 00:00:00 2001 From: Chris Shyi Date: Tue, 24 Jul 2018 00:24:47 -0400 Subject: [PATCH 4/6] Genre Artist Breakdown Implemented the generation of genre artist breakdown data for the genre graphs. More test cases needed. --- api/admin.py | 3 +- api/models.py | 5 ++- api/tests.py | 50 +++++++++++++++++++++ api/utils.py | 43 +++++++++--------- api/views.py | 13 +++--- graphs/static/graphs/scripts/genre_graph.js | 14 +++--- graphs/templates/graphs/genre_graph.html | 6 +-- 7 files changed, 93 insertions(+), 41 deletions(-) create mode 100644 api/tests.py diff --git a/api/admin.py b/api/admin.py index bd71265..31e6347 100644 --- a/api/admin.py +++ b/api/admin.py @@ -1,8 +1,9 @@ from django.contrib import admin -from .models import Track, Artist, AudioFeatures, User +from .models import Track, Artist, AudioFeatures, User, Genre # Register your models here. admin.site.register(Track) admin.site.register(Artist) admin.site.register(AudioFeatures) admin.site.register(User) +admin.site.register(Genre) diff --git a/api/models.py b/api/models.py index 41daa01..886b6fb 100644 --- a/api/models.py +++ b/api/models.py @@ -30,6 +30,8 @@ class Artist(models.Model): id = models.CharField(primary_key=True, max_length=MAX_ID) name = models.CharField(max_length=50) genres = models.ManyToManyField(Genre, blank=True) + # genre = models.ForeignKey(Genre, on_delete=models.CASCADE, blank=True, + # null=True) def __str__(self): return self.name @@ -45,7 +47,6 @@ class Track(models.Model): verbose_name_plural = "Tracks" id = models.CharField(primary_key=True, max_length=MAX_ID) - # artist = models.ForeignKey(Artist, on_delete=models.CASCADE) artists = models.ManyToManyField(Artist, blank=True) year = models.PositiveSmallIntegerField() popularity = models.PositiveSmallIntegerField() @@ -53,7 +54,7 @@ class Track(models.Model): name = models.CharField(max_length=200) users = models.ManyToManyField(User, blank=True) genre = models.ForeignKey(Genre, on_delete=models.CASCADE, blank=True, - null=True) + null=True) def __str__(self): track_str = "{}, genre: {}, artists: [".format(self.name, self.genre) diff --git a/api/tests.py b/api/tests.py new file mode 100644 index 0000000..436d3ee --- /dev/null +++ b/api/tests.py @@ -0,0 +1,50 @@ +from django.test import TestCase +from api.models import Track, Genre, Artist +from login.models import User +from api import utils +import math +import pprint + +class GenreDataTestCase(TestCase): + + def setUp(self): + test_user = User.objects.create(id="chrisshi", refresh_token="blah", access_token="blah", + access_expires_in=10) + genre = Genre.objects.create(name="classical", num_songs=3) + artist_1 = Artist.objects.create(id='art1', name="Beethoven") + artist_2 = Artist.objects.create(id='art2', name="Mozart") + artist_3 = Artist.objects.create(id='art3', name='Chopin') + + track_1 = Track.objects.create(id='track1', year=2013, + popularity=5, runtime=20, + name='concerto1', + genre=genre) + track_1.users.add(test_user) + track_1.artists.add(artist_1) + track_1.artists.add(artist_2) + + track_2 = Track.objects.create(id='track2', year=2013, + popularity=5, runtime=20, + name='concerto2', + genre=genre) + track_2.users.add(test_user) + track_2.artists.add(artist_2) + track_2.artists.add(artist_3) + track_2.artists.add(artist_1) + + track_3 = Track.objects.create(id='track3', year=2013, + popularity=5, runtime=20, + name='concerto3', + genre=genre) + track_3.users.add(test_user) + track_3.artists.add(artist_1) + track_3.artists.add(artist_3) + + def test_get_artist_counts_in_genre(self): + test_user = User.objects.get(id='chrisshi') + artist_counts = utils.get_artists_in_genre(test_user, 'classical', 10) + # pprint.pprint(artist_counts) + self.assertTrue(math.isclose(artist_counts['Beethoven'], 1.3, rel_tol=0.05)) + self.assertTrue(math.isclose(artist_counts['Mozart'], 0.85, rel_tol=0.05)) + self.assertTrue(math.isclose(artist_counts['Chopin'], 0.85, rel_tol=0.05)) + self.assertTrue(math.isclose(sum(artist_counts.values()), 3, rel_tol=0.01)) \ No newline at end of file diff --git a/api/utils.py b/api/utils.py index f2c16b3..5da8c7e 100644 --- a/api/utils.py +++ b/api/utils.py @@ -11,6 +11,8 @@ from django.core import serializers from django.utils import timezone from .models import * from login.models import User +from django.db.models import FloatField +from django.db.models.functions import Cast # }}} imports # @@ -34,16 +36,13 @@ def update_track_genres(user_obj): user_tracks = Track.objects.filter(users__exact=user_obj) for track in user_tracks: # just using this variable to save another call to db - track_artists = track.artists.all() - # set genres to first artist's genres then find intersection with others - shared_genres = track_artists.first().genres.all() - for artist in track_artists: - shared_genres = shared_genres.intersection(artist.genres.all()) - shared_genres = shared_genres.order_by('-num_songs') + track_artists = list(track.artists.all()) + # TODO: Use the most popular genre of the first artist as the Track genre + first_artist_genres = track_artists[0].genres.all().order_by('-num_songs') undefined_genre_obj = Genre.objects.get(name="undefined") - most_common_genre = shared_genres.first() if shared_genres.first() is \ - not undefined_genre_obj else shared_genres[1] + most_common_genre = first_artist_genres.first() if first_artist_genres.first() is \ + not undefined_genre_obj else first_artist_genres[1] track.genre = most_common_genre if most_common_genre is not None \ else undefined_genre_obj track.save() @@ -143,8 +142,7 @@ def process_artist_genre(genre_name, artist_obj): :returns: None """ - genre_obj, created = Genre.objects.get_or_create(name=genre_name, - defaults={'num_songs':1}) + genre_obj, created = Genre.objects.get_or_create(name=genre_name, defaults={'num_songs': 1}) if not created: genre_obj.num_songs = F('num_songs') + 1 genre_obj.save() @@ -192,7 +190,7 @@ def get_artists_in_genre(user, genre, max_songs): """Return count of artists in genre. :user: User object to return data for. - :genre: genre to count artists for. + :genre: genre to count artists for. (string) :max_songs: max total songs to include to prevent overflow due to having multiple artists on each track. @@ -200,19 +198,22 @@ def get_artists_in_genre(user, genre, max_songs): have. """ genre_obj = Genre.objects.get(name=genre) - artist_counts = (Artist.objects.filter(track__users=user) - .filter(genres=genre_obj) - .annotate(num_songs=Count('track', distinct=True)) - .order_by('-num_songs') - ) + tracks_in_genre = Track.objects.filter(genre=genre_obj, users=user) + track_count = tracks_in_genre.count() + user_artists = Artist.objects.filter(track__users=user) # use this variable to save on db queries + total_artist_counts = tracks_in_genre.aggregate(counts=Count('artists'))['counts'] + processed_artist_counts = {} - songs_added = 0 - for artist in artist_counts: + # songs_added = 0 + for artist in user_artists: # hacky way to not have total count overflow due to there being multiple # artists on a track - if songs_added + artist.num_songs <= max_songs: - processed_artist_counts[artist.name] = artist.num_songs - songs_added += artist.num_songs + # if songs_added + artist.num_songs <= max_songs: + # processed_artist_counts[artist.name] = artist.num_songs + # songs_added += artist.num_songs + processed_artist_counts[artist.name] = round(artist.track_set + .filter(genre=genre_obj, users=user) + .count() * track_count / total_artist_counts, 2) # processed_artist_counts = [{'name': artist.name, 'num_songs': artist.num_songs} for artist in artist_counts] # processed_artist_counts = {artist.name: artist.num_songs for artist in artist_counts} # pprint.pprint(processed_artist_counts) diff --git a/api/views.py b/api/views.py index 5e44c3f..5433080 100644 --- a/api/views.py +++ b/api/views.py @@ -81,7 +81,7 @@ def parse_library(request, user_secret): track_artists, user_obj) # add audio features {{{ # - + # if a new track is not created, the associated audio feature does # not need to be created again if track_created: @@ -174,13 +174,12 @@ def get_genre_data(request, user_secret): """ user = User.objects.get(secret=user_secret) genre_counts = (Track.objects.filter(users__exact=user) - .values('genre') - .order_by('genre') - .annotate(num_songs=Count('genre')) - ) + .values('genre') + .order_by('genre') + .annotate(num_songs=Count('genre')) + ) for genre_dict in genre_counts: - genre_dict['artists'] = get_artists_in_genre(user, genre_dict['genre'], - genre_dict['num_songs']) + genre_dict['artists'] = get_artists_in_genre(user, genre_dict['genre'], genre_dict['num_songs']) print("*** Genre Breakdown ***") pprint.pprint(list(genre_counts)) return JsonResponse(data=list(genre_counts), safe=False) diff --git a/graphs/static/graphs/scripts/genre_graph.js b/graphs/static/graphs/scripts/genre_graph.js index 6422d22..6c5e02e 100644 --- a/graphs/static/graphs/scripts/genre_graph.js +++ b/graphs/static/graphs/scripts/genre_graph.js @@ -4,7 +4,7 @@ function create_genre_graph(data) { data.forEach(function(d) { d.num_songs = +d.num_songs; console.log(d.genre, d.num_songs); - var artist_names = Object.keys(d.artists); + let artist_names = Object.keys(d.artists); artist_names.forEach(function(e) { d.artists[e] = +d.artists[e]; console.log(e, d.artists[e]); @@ -31,22 +31,22 @@ function create_genre_graph(data) { // setup bar colors {{{ // - var max_artists = d3.max(data, function(d) { + let max_artists = d3.max(data, function(d) { return Object.keys(d.artists).length; }); - var z = d3.scaleOrdinal().range(randomColor({ + let z = d3.scaleOrdinal().range(randomColor({ count: max_artists, luminosity: 'light', })); // }}} setup bar colors // - for (var genre_dict of data) { + for (let genre_dict of data) { // process artist breakdown {{{ // - var keys = Object.keys(genre_dict.artists); - var stack = d3.stack() + let keys = Object.keys(genre_dict.artists); + let stack = d3.stack() //.order(d3.stackOrderAscending) .order(d3.stackOrderDescending) .keys(keys)([genre_dict.artists]) @@ -112,7 +112,7 @@ function create_genre_graph(data) { // https://gist.github.com/guypursey/f47d8cd11a8ff24854305505dbbd8c07#file-index-html function wrap(text, width) { text.each(function() { - var text = d3.select(this), + let text = d3.select(this), words = text.text().split(/\s+/).reverse(), word, line = [], diff --git a/graphs/templates/graphs/genre_graph.html b/graphs/templates/graphs/genre_graph.html index bf7324e..7f2ba76 100644 --- a/graphs/templates/graphs/genre_graph.html +++ b/graphs/templates/graphs/genre_graph.html @@ -26,16 +26,16 @@