Browse Source

Merge branch 'database' of https://github.com/Kevin-Mok/spotify-lib-vis into database

master
Chris Shyi 6 years ago
parent
commit
77662a6a5e
  1. 5
      .gitignore
  2. 2
      musicvis/settings.py
  3. 26
      spotifyvis/migrations/0001_initial.py
  4. 53
      spotifyvis/migrations/0002_auto_20180606_0523.py
  5. 23
      spotifyvis/migrations/0003_auto_20180606_0525.py
  6. 50
      spotifyvis/models.py
  7. 1
      spotifyvis/templates/spotifyvis/user_data.html
  8. 100
      spotifyvis/utils.py
  9. 28
      spotifyvis/views.py

5
.gitignore

@ -6,6 +6,5 @@ db.sqlite3
api-keys.sh
Pipfile
super-pass.txt
*.js
*.ini
*.txt
graph.js

2
musicvis/settings.py

@ -110,7 +110,7 @@ AUTH_PASSWORD_VALIDATORS = [
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
TIME_ZONE = 'America/Toronto'
USE_I18N = True

26
spotifyvis/migrations/0001_initial.py

@ -1,4 +1,4 @@
# Generated by Django 2.0.5 on 2018-06-03 23:01
# Generated by Django 2.0.5 on 2018-06-06 07:26
from django.db import migrations, models
import django.db.models.deletion
@ -20,34 +20,32 @@ class Migration(migrations.Migration):
('genre', models.CharField(max_length=20)),
],
options={
'verbose_name': 'Artist',
'verbose_name_plural': 'Artists',
'verbose_name': 'Artist',
},
),
migrations.CreateModel(
name='Track',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('track_id', models.CharField(max_length=30)),
('track_id', models.CharField(max_length=30, primary_key=True, serialize=False)),
('year', models.PositiveSmallIntegerField()),
('popularity', models.DecimalField(decimal_places=2, max_digits=2)),
('popularity', models.PositiveSmallIntegerField()),
('runtime', models.PositiveSmallIntegerField()),
('name', models.CharField(max_length=75)),
],
options={
'verbose_name': 'Track',
'verbose_name_plural': 'Tracks',
'verbose_name': 'Track',
},
),
migrations.CreateModel(
name='User',
fields=[
('user_id', models.CharField(max_length=30, primary_key=True, serialize=False)),
('username', models.CharField(max_length=30)),
],
options={
'verbose_name': 'User',
'verbose_name_plural': 'Users',
'verbose_name': 'User',
},
),
migrations.CreateModel(
@ -64,22 +62,18 @@ class Migration(migrations.Migration):
('tempo', models.DecimalField(decimal_places=2, max_digits=2)),
],
options={
'verbose_name': 'AudioFeatures',
'verbose_name_plural': 'AudioFeatures',
'verbose_name': 'AudioFeatures',
},
),
migrations.AddField(
model_name='track',
name='artist',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='spotifyvis.Artist'),
name='artists',
field=models.ManyToManyField(blank=True, to='spotifyvis.Artist'),
),
migrations.AddField(
model_name='track',
name='users',
field=models.ManyToManyField(to='spotifyvis.User'),
),
migrations.AlterUniqueTogether(
name='track',
unique_together={('track_id', 'artist')},
field=models.ManyToManyField(blank=True, to='spotifyvis.User'),
),
]

53
spotifyvis/migrations/0002_auto_20180606_0523.py

@ -0,0 +1,53 @@
# Generated by Django 2.0.5 on 2018-06-06 09:23
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('spotifyvis', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='audiofeatures',
name='acousticness',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
migrations.AlterField(
model_name='audiofeatures',
name='danceability',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
migrations.AlterField(
model_name='audiofeatures',
name='energy',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
migrations.AlterField(
model_name='audiofeatures',
name='instrumentalness',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
migrations.AlterField(
model_name='audiofeatures',
name='loudness',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
migrations.AlterField(
model_name='audiofeatures',
name='speechiness',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
migrations.AlterField(
model_name='audiofeatures',
name='tempo',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
migrations.AlterField(
model_name='audiofeatures',
name='valence',
field=models.DecimalField(decimal_places=3, max_digits=3),
),
]

23
spotifyvis/migrations/0003_auto_20180606_0525.py

@ -0,0 +1,23 @@
# Generated by Django 2.0.5 on 2018-06-06 09:25
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('spotifyvis', '0002_auto_20180606_0523'),
]
operations = [
migrations.AlterField(
model_name='audiofeatures',
name='loudness',
field=models.DecimalField(decimal_places=3, max_digits=6),
),
migrations.AlterField(
model_name='audiofeatures',
name='tempo',
field=models.DecimalField(decimal_places=3, max_digits=6),
),
]

50
spotifyvis/models.py

@ -1,12 +1,16 @@
from django.db import models
# id's are 22 in length in examples but set to 30 for buffer
id_length=30
# Artist {{{ #
class Artist(models.Model):
class Meta:
verbose_name = "Artist"
verbose_name_plural = "Artists"
artist_id = models.CharField(primary_key=True, max_length=30)
artist_id = models.CharField(primary_key=True, max_length=id_length)
# unique since only storing one genre per artist right now
name = models.CharField(unique=True, max_length=50)
genre = models.CharField(max_length=20)
@ -14,37 +18,47 @@ class Artist(models.Model):
def __str__(self):
return self.name
# }}} Artist #
# User {{{ #
class User(models.Model):
class Meta:
verbose_name = "User"
verbose_name_plural = "Users"
user_id = models.CharField(primary_key=True, max_length=30) # the user's Spotify ID
user_name = models.CharField(max_length=30, blank=True) # User's Spotify user name, if set
user_id = models.CharField(primary_key=True, max_length=id_length) # the user's Spotify ID
# username = models.CharField(max_length=30) # User's Spotify user name, if set
def __str__(self):
return self.username
return self.user_id
# }}} User #
# Track {{{ #
class Track(models.Model):
class Meta:
verbose_name = "Track"
verbose_name_plural = "Tracks"
unique_together = ('track_id', 'artist',)
# unique_together = ('track_id', 'artist',)
track_id = models.CharField(max_length=30)
artist = models.ForeignKey(Artist, on_delete=models.CASCADE)
track_id = models.CharField(primary_key=True, max_length=id_length)
# artist = models.ForeignKey(Artist, on_delete=models.CASCADE)
artists = models.ManyToManyField(Artist, blank=True)
year = models.PositiveSmallIntegerField()
popularity = models.DecimalField(decimal_places=2, max_digits=2)
popularity = models.PositiveSmallIntegerField()
runtime = models.PositiveSmallIntegerField()
name = models.CharField(max_length=75)
users = models.ManyToManyField(User)
users = models.ManyToManyField(User, blank=True)
def __str__(self):
return self.name
# }}} Track #
# AudioFeatures {{{ #
class AudioFeatures(models.Model):
@ -53,14 +67,16 @@ class AudioFeatures(models.Model):
verbose_name_plural = "AudioFeatures"
track = models.OneToOneField(Track, on_delete=models.CASCADE, primary_key=True,)
danceability = models.DecimalField(decimal_places=2, max_digits=2)
energy = models.DecimalField(decimal_places=2, max_digits=2)
loudness = models.DecimalField(decimal_places=2, max_digits=2)
speechiness = models.DecimalField(decimal_places=2, max_digits=2)
acousticness = models.DecimalField(decimal_places=2, max_digits=2)
instrumentalness = models.DecimalField(decimal_places=2, max_digits=2)
valence = models.DecimalField(decimal_places=2, max_digits=2)
tempo = models.DecimalField(decimal_places=2, max_digits=2)
acousticness = models.DecimalField(decimal_places=3, max_digits=3)
danceability = models.DecimalField(decimal_places=3, max_digits=3)
energy = models.DecimalField(decimal_places=3, max_digits=3)
instrumentalness = models.DecimalField(decimal_places=3, max_digits=3)
loudness = models.DecimalField(decimal_places=3, max_digits=6)
speechiness = models.DecimalField(decimal_places=3, max_digits=3)
tempo = models.DecimalField(decimal_places=3, max_digits=6)
valence = models.DecimalField(decimal_places=3, max_digits=3)
def __str__(self):
return super(AudioFeatures, self).__str__()
# }}} AudioFeatures #

1
spotifyvis/templates/spotifyvis/user_data.html

@ -22,6 +22,5 @@
<li>{{ genre_name }} - {{ genre_count }}</li>
{% endfor %}
</ul>
<script src="{% static 'spotifyvis/scripts/user_data.js' %}"></script>
</body>
</html>

100
spotifyvis/utils.py

@ -1,8 +1,12 @@
# imports {{{ #
import requests
import math
import pprint
from .models import Artist, User, Track, AudioFeatures
# }}} imports #
# parse_library {{{ #
def parse_library(headers, tracks, library_stats, user):
@ -26,37 +30,90 @@ def parse_library(headers, tracks, library_stats, user):
num_samples = 0 # number of actual track samples
feature_data_points = 0 # number of feature data analyses (some tracks do not have analyses available)
# iterate until hit requested num of tracks
for _ in range(0, tracks, limit):
payload['offset'] = str(offset)
# get current set of tracks
saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
# TODO: refactor the for loop body into helper function
# iterate through each track
for track_dict in saved_tracks_response['items']:
num_samples += 1
# update artist info before track so that Track object can reference
# Artist object
track_artists = []
for artist_dict in track_dict['track']['artists']:
increase_artist_count(headers, artist_dict['name'],
artist_dict['id'], library_stats)
track_artists.append(Artist.objects.get_or_create(
artist_id=artist_dict['id'],
name=artist_dict['name'],
)[0])
track_obj = save_track_obj(track_dict['track'], track_artists, user)
get_track_info(track_dict['track'], library_stats, num_samples)
# get_genre(headers, track_dict['track']['album']['id'])
audio_features_dict = get_audio_features(headers, track_dict['track']['id'])
audio_features_dict = get_audio_features(headers,
track_dict['track']['id'], track_obj)
if len(audio_features_dict) != 0:
# Track the number of audio analyses for calculating
# audio feature averages and standard deviations on the fly
feature_data_points += 1
for feature, feature_data in audio_features_dict.items():
update_audio_feature_stats(feature, feature_data, feature_data_points, library_stats)
for artist_dict in track_dict['track']['artists']:
increase_artist_count(headers, artist_dict['name'], artist_dict['id'], library_stats)
update_audio_feature_stats(feature, feature_data,
feature_data_points, library_stats)
# calculates num_songs with offset + songs retrieved
library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
offset += limit
calculate_genres_from_artists(headers, library_stats)
pprint.pprint(library_stats)
# pprint.pprint(library_stats)
# }}} parse_library #
def get_audio_features(headers, track_id):
# save_track_obj {{{ #
def save_track_obj(track_dict, artists, user):
"""Make an entry in the database for this track if it doesn't exist already.
:track_dict: dictionary from the API call containing track information.
:artists: artists of the song, passed in as a list of Artist objects.
:user: User object for which this Track is to be associated with.
:returns: The created/retrieved Track object.
"""
track_obj_query = Track.objects.filter(track_id__exact=track_dict['id'])
if len(track_obj_query) == 0:
new_track = Track.objects.create(
track_id=track_dict['id'],
year=track_dict['album']['release_date'].split('-')[0],
popularity=int(track_dict['popularity']),
runtime=int(float(track_dict['duration_ms']) / 1000),
name=track_dict['name'],
)
# print("pop/run: ", new_track.popularity, new_track.runtime)
# have to add artists and user after saving object since track needs to
# have ID before filling in m2m field
for artist in artists:
new_track.artists.add(artist)
new_track.users.add(user)
new_track.save()
return new_track
elif len(track_obj_query) == 1:
return track_obj_query[0]
# }}} save_track_obj #
# get_audio_features {{{ #
def get_audio_features(headers, track_id, track):
"""Returns the audio features of a soundtrack
Args:
headers: headers containing the API token
track_id: the id of the soundtrack, needed to query the Spotify API
track: Track object to associate with the AudioFeatures object
Returns:
A dictionary with the features as its keys, if audio feature data is missing for the track,
@ -72,12 +129,19 @@ def get_audio_features(headers, track_id):
useless_keys = [
"key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
]
audio_features_entry = AudioFeatures()
audio_features_entry.track = track
for key, val in response.items():
if key not in useless_keys:
features_dict[key] = val
setattr(audio_features_entry, key, val)
audio_features_entry.save()
return features_dict
# }}} get_audio_features #
# update_std_dev {{{ #
def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
"""Calculates the standard deviation for a sample without storing all data points
@ -101,6 +165,9 @@ def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
))
return new_mean, new_std_dev
# }}} update_std_dev #
# update_audio_feature_stats {{{ #
def update_audio_feature_stats(feature, new_data_point, sample_size, library_stats):
"""Updates the audio feature statistics in library_stats
@ -131,6 +198,7 @@ def update_audio_feature_stats(feature, new_data_point, sample_size, library_sta
"std_dev": new_std_dev
}
# }}} update_audio_feature_stats #
# increase_nested_key {{{ #
@ -174,6 +242,8 @@ def increase_artist_count(headers, artist_name, artist_id, library_stats):
# }}} increase_artist_count #
# update_popularity_stats {{{ #
def update_popularity_stats(new_data_point, library_stats, sample_size):
"""Updates the popularity statistics in library_stats
@ -200,6 +270,8 @@ def update_popularity_stats(new_data_point, library_stats, sample_size):
"std_dev": new_std_dev,
}
# }}} update_popularity_stats #
# get_track_info {{{ #
def get_track_info(track_dict, library_stats, sample_size):
@ -219,11 +291,6 @@ def get_track_info(track_dict, library_stats, sample_size):
year_released = track_dict['album']['release_date'].split('-')[0]
increase_nested_key('year_released', year_released, library_stats)
# artist
# artist_names = [artist['name'] for artist in track_dict['artists']]
# for artist_name in artist_names:
# increase_nested_key('artists', artist_name)
# runtime
library_stats['total_runtime'] += float(track_dict['duration_ms']) / (1000 * 60)
@ -246,8 +313,13 @@ def calculate_genres_from_artists(headers, library_stats):
for genre in artist_response['genres']:
increase_nested_key('genres', genre, library_stats, artist_entry['count'])
# update genre for artist in database with top genre
Artist.objects.filter(artist_id=artist_entry['id']).update(genre=artist_response['genres'][0])
# }}} calculate_genres_from_artists #
# process_library_stats {{{ #
def process_library_stats(library_stats):
"""Processes library_stats into format more suitable for D3 consumption
@ -298,3 +370,5 @@ def process_library_stats(library_stats):
processed_library_stats[key] = library_stats[key]
return processed_library_stats
# }}} process_library_stats #

28
spotifyvis/views.py

@ -1,3 +1,5 @@
# imports {{{ #
from django.shortcuts import render, redirect
from django.http import HttpResponse, HttpResponseBadRequest
import math
@ -11,6 +13,8 @@ from datetime import datetime
from .utils import parse_library, process_library_stats
from .models import User, Track, AudioFeatures, Artist
# }}} imports #
TIME_FORMAT = '%Y-%m-%d-%H-%M-%S'
TRACKS_TO_QUERY = 5
@ -135,20 +139,12 @@ def user_data(request):
user_data_response = requests.get('https://api.spotify.com/v1/me', headers = headers).json()
request.session['user_id'] = user_data_response['id'] # store the user_id so it may be used to create model
display_name = user_data_response['display_name']
if display_name is not None:
request.session['user_name'] = display_name
else:
request.session['user_name'] = ""
user = None # will be set to the current user object later
try:
user = User.objects.get(user_id=request.session['user_id'])
except User.DoesNotExist:
user = User.objects.create(user_id=request.session['user_id'], user_name=request.session['user_name'])
# request.session['user_name'] = user_data_response['display_name']
user = User.objects.get_or_create(user_id=user_data_response['id'])[0]
context = {
'user_name': user_data_response['display_name'],
'id': user_data_response['id'],
'user_name': user_data_response['display_name'],
'id': user_data_response['id'],
}
library_stats = {
@ -165,9 +161,9 @@ def user_data(request):
}
parse_library(headers, TRACKS_TO_QUERY, library_stats, user)
processed_library_stats = process_library_stats(library_stats)
print("================================================")
print("Processed data follows\n")
pprint.pprint(processed_library_stats)
# print("================================================")
# print("Processed data follows\n")
# pprint.pprint(processed_library_stats)
return render(request, 'spotifyvis/user_data.html', context)
# }}} user_data #
# }}} user_data #
Loading…
Cancel
Save