Browse Source

Merge pull request #42 from Kevin-Mok/database

Merge database into master
master
Chris Shyi 6 years ago
committed by GitHub
parent
commit
3af1ce852e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 6
      .gitignore
  2. 2
      musicvis/settings.py
  3. 8
      recreate-db.txt
  4. 2
      requirements.txt
  5. 0
      sample-track-obj.py
  6. 5
      spotifyvis/admin.py
  7. 85
      spotifyvis/migrations/0001_initial.py
  8. 78
      spotifyvis/models.py
  9. 8
      spotifyvis/static/spotifyvis/css/dark_bg.css
  10. 137
      spotifyvis/static/spotifyvis/scripts/genre_graph.js
  11. 0
      spotifyvis/static/spotifyvis/scripts/user_data.js
  12. 141
      spotifyvis/templates/spotifyvis/audio_features.html
  13. 44
      spotifyvis/templates/spotifyvis/genre_graph.html
  14. 13
      spotifyvis/templates/spotifyvis/index.html
  15. 17
      spotifyvis/templates/spotifyvis/logged_in.html
  16. 9
      spotifyvis/templates/spotifyvis/user_data.html
  17. 22
      spotifyvis/urls.py
  18. 481
      spotifyvis/utils.py
  19. 175
      spotifyvis/views.py

6
.gitignore

@ -3,9 +3,9 @@ db.sqlite3
*.bak
.idea/
.vscode/*
*/migrations/*
api-keys.sh
Pipfile
super-pass.txt
*.js
*.ini
*.txt
scrap.py

2
musicvis/settings.py

@ -110,7 +110,7 @@ AUTH_PASSWORD_VALIDATORS = [
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
TIME_ZONE = 'America/Toronto'
USE_I18N = True

8
recreate-db.txt

@ -0,0 +1,8 @@
# https://stackoverflow.com/a/34576062/8811872
sudo su postgres
psql
drop database spotifyvis;
create database spotifyvis with owner django;
\q
exit

2
requirements.txt

@ -7,7 +7,7 @@ idna==2.6
isort==4.3.4
lazy-object-proxy==1.3.1
mccabe==0.6.1
psycopg2==2.7.4
psycopg2-binary==2.7.4
pylint==1.8.4
pytz==2018.4
requests==2.18.4

0
sample-track-obj.json → sample-track-obj.py

5
spotifyvis/admin.py

@ -1,3 +1,8 @@
from django.contrib import admin
from .models import Track, Artist, AudioFeatures, User
# Register your models here.
admin.site.register(Track)
admin.site.register(Artist)
admin.site.register(AudioFeatures)
admin.site.register(User)

85
spotifyvis/migrations/0001_initial.py

@ -1,85 +0,0 @@
# Generated by Django 2.0.5 on 2018-06-03 23:01
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Artist',
fields=[
('artist_id', models.CharField(max_length=30, primary_key=True, serialize=False)),
('name', models.CharField(max_length=50, unique=True)),
('genre', models.CharField(max_length=20)),
],
options={
'verbose_name': 'Artist',
'verbose_name_plural': 'Artists',
},
),
migrations.CreateModel(
name='Track',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('track_id', models.CharField(max_length=30)),
('year', models.PositiveSmallIntegerField()),
('popularity', models.DecimalField(decimal_places=2, max_digits=2)),
('runtime', models.PositiveSmallIntegerField()),
('name', models.CharField(max_length=75)),
],
options={
'verbose_name': 'Track',
'verbose_name_plural': 'Tracks',
},
),
migrations.CreateModel(
name='User',
fields=[
('user_id', models.CharField(max_length=30, primary_key=True, serialize=False)),
('username', models.CharField(max_length=30)),
],
options={
'verbose_name': 'User',
'verbose_name_plural': 'Users',
},
),
migrations.CreateModel(
name='AudioFeatures',
fields=[
('track', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, to='spotifyvis.Track')),
('danceability', models.DecimalField(decimal_places=2, max_digits=2)),
('energy', models.DecimalField(decimal_places=2, max_digits=2)),
('loudness', models.DecimalField(decimal_places=2, max_digits=2)),
('speechiness', models.DecimalField(decimal_places=2, max_digits=2)),
('acousticness', models.DecimalField(decimal_places=2, max_digits=2)),
('instrumentalness', models.DecimalField(decimal_places=2, max_digits=2)),
('valence', models.DecimalField(decimal_places=2, max_digits=2)),
('tempo', models.DecimalField(decimal_places=2, max_digits=2)),
],
options={
'verbose_name': 'AudioFeatures',
'verbose_name_plural': 'AudioFeatures',
},
),
migrations.AddField(
model_name='track',
name='artist',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='spotifyvis.Artist'),
),
migrations.AddField(
model_name='track',
name='users',
field=models.ManyToManyField(to='spotifyvis.User'),
),
migrations.AlterUniqueTogether(
name='track',
unique_together={('track_id', 'artist')},
),
]

78
spotifyvis/models.py

@ -1,50 +1,86 @@
from django.db import models
# id's are 22 in length in examples but set to 30 for buffer
MAX_ID = 30
# Genre {{{ #
class Genre(models.Model):
class Meta:
verbose_name = "Genre"
verbose_name_plural = "Genres"
name = models.CharField(primary_key=True, max_length=50)
num_songs = models.PositiveIntegerField()
def __str__(self):
return self.name
# }}} Genre #
# Artist {{{ #
class Artist(models.Model):
class Meta:
verbose_name = "Artist"
verbose_name_plural = "Artists"
artist_id = models.CharField(primary_key=True, max_length=30)
artist_id = models.CharField(primary_key=True, max_length=MAX_ID)
# unique since only storing one genre per artist right now
name = models.CharField(unique=True, max_length=50)
genre = models.CharField(max_length=20)
genres = models.ManyToManyField(Genre, blank=True)
def __str__(self):
return self.name
# }}} Artist #
# User {{{ #
class User(models.Model):
class Meta:
verbose_name = "User"
verbose_name_plural = "Users"
user_id = models.CharField(primary_key=True, max_length=30) # the user's Spotify ID
username = models.CharField(max_length=30) # User's Spotify user name, if set
user_id = models.CharField(primary_key=True, max_length=MAX_ID) # the user's Spotify ID
user_secret = models.CharField(max_length=50, default='')
def __str__(self):
return self.username
return self.user_id
# }}} User #
# Track {{{ #
class Track(models.Model):
class Meta:
verbose_name = "Track"
verbose_name_plural = "Tracks"
unique_together = ('track_id', 'artist',)
track_id = models.CharField(max_length=30)
artist = models.ForeignKey(Artist, on_delete=models.CASCADE)
track_id = models.CharField(primary_key=True, max_length=MAX_ID)
# artist = models.ForeignKey(Artist, on_delete=models.CASCADE)
artists = models.ManyToManyField(Artist, blank=True)
year = models.PositiveSmallIntegerField()
popularity = models.DecimalField(decimal_places=2, max_digits=2)
popularity = models.PositiveSmallIntegerField()
runtime = models.PositiveSmallIntegerField()
name = models.CharField(max_length=75)
users = models.ManyToManyField(User)
name = models.CharField(max_length=200)
users = models.ManyToManyField(User, blank=True)
genre = models.ForeignKey(Genre, on_delete=models.CASCADE, blank=True,
null=True)
def __str__(self):
return self.name
track_str = "{}, genre: {}, artists: [".format(self.name, self.genre)
for artist in self.artists.all():
track_str += "{}, ".format(artist.name)
track_str += "]"
return track_str
# }}} Track #
# AudioFeatures {{{ #
class AudioFeatures(models.Model):
@ -53,14 +89,16 @@ class AudioFeatures(models.Model):
verbose_name_plural = "AudioFeatures"
track = models.OneToOneField(Track, on_delete=models.CASCADE, primary_key=True,)
danceability = models.DecimalField(decimal_places=2, max_digits=2)
energy = models.DecimalField(decimal_places=2, max_digits=2)
loudness = models.DecimalField(decimal_places=2, max_digits=2)
speechiness = models.DecimalField(decimal_places=2, max_digits=2)
acousticness = models.DecimalField(decimal_places=2, max_digits=2)
instrumentalness = models.DecimalField(decimal_places=2, max_digits=2)
valence = models.DecimalField(decimal_places=2, max_digits=2)
tempo = models.DecimalField(decimal_places=2, max_digits=2)
acousticness = models.DecimalField(decimal_places=3, max_digits=3)
danceability = models.DecimalField(decimal_places=3, max_digits=3)
energy = models.DecimalField(decimal_places=3, max_digits=3)
instrumentalness = models.DecimalField(decimal_places=3, max_digits=3)
loudness = models.DecimalField(decimal_places=3, max_digits=6)
speechiness = models.DecimalField(decimal_places=3, max_digits=3)
tempo = models.DecimalField(decimal_places=3, max_digits=6)
valence = models.DecimalField(decimal_places=3, max_digits=3)
def __str__(self):
return super(AudioFeatures, self).__str__()
# }}} AudioFeatures #

8
spotifyvis/static/spotifyvis/css/dark_bg.css

@ -0,0 +1,8 @@
body {
background-color: #1e1e1e;
}
h1,p {
color: grey;
}

137
spotifyvis/static/spotifyvis/scripts/genre_graph.js

@ -0,0 +1,137 @@
function create_genre_graph(data) {
// convert strings to nums {{{ //
data.forEach(function(d) {
d.num_songs = +d.num_songs;
console.log(d.genre, d.num_songs);
var artist_names = Object.keys(d.artists);
artist_names.forEach(function(e) {
d.artists[e] = +d.artists[e];
console.log(e, d.artists[e]);
//console.log(e, d.artists[e], d.artists[e] + 1);
});
});
// }}} convert strings to nums //
// domains {{{ //
data.sort(function(a, b) {
return b.num_songs - a.num_songs;
});
x.domain(data.map(function(d) {
return d.genre;
}));
//y.domain([0, d3.max(data, function(d) { return d.num_songs; }) * 1.25]).nice();
y.domain([0, d3.max(data, function(d) {
return d.num_songs;
})]).nice();
// }}} domains //
// setup bar colors {{{ //
var max_artists = d3.max(data, function(d) {
return Object.keys(d.artists).length;
});
var z = d3.scaleOrdinal().range(randomColor({
count: max_artists,
luminosity: 'light',
}));
// }}} setup bar colors //
for (var genre_dict of data) {
// process artist breakdown {{{ //
var keys = Object.keys(genre_dict.artists);
var stack = d3.stack()
//.order(d3.stackOrderAscending)
.order(d3.stackOrderDescending)
.keys(keys)([genre_dict.artists])
//unpack the column
.map((d, i) => {
return {
key: keys[i],
data: d[0]
}
});
// }}} process artist breakdown //
// add bars {{{ //
g.append("g")
.selectAll("rect")
.data(stack)
.enter().append("rect")
.attr("x", x(genre_dict.genre))
.attr("y", function(d) {
return y(d.data[1]);
})
.attr("height", d => y(d.data[0]) - y(d.data[1]))
.attr("width", x.bandwidth())
.attr('fill', (d, i) => z(i))
.append('title').text(d => d.key + ': ' + (d.data[1] - d.data[0]));
// }}} add bars //
// x-axis {{{ //
g.append("g")
.attr("class", "axis")
.attr("transform", "translate(0," + height + ")")
.call(d3.axisBottom(x))
.selectAll(".tick text")
.call(wrap, x.bandwidth());
// }}} x-axis //
// y-axis {{{ //
g.append("g")
.attr("class", "axis")
.call(d3.axisLeft(y).ticks(null, "s"))
.append("text")
.attr("x", 2)
.attr("y", y(y.ticks().pop()) + 0.5)
.attr("dy", "0.32em")
.attr("fill", "#000")
.attr("font-weight", "bold")
.attr("text-anchor", "start")
.text("Songs");
// }}} y-axis //
}
}
// wrap text {{{ //
// https://gist.github.com/guypursey/f47d8cd11a8ff24854305505dbbd8c07#file-index-html
function wrap(text, width) {
text.each(function() {
var text = d3.select(this),
words = text.text().split(/\s+/).reverse(),
word,
line = [],
lineNumber = 0,
lineHeight = 1.1, // ems
y = text.attr("y"),
dy = parseFloat(text.attr("dy")),
tspan = text.text(null).append("tspan").attr("x", 0).attr("y", y).attr("dy", dy + "em")
while (word = words.pop()) {
line.push(word)
tspan.text(line.join(" "))
if (tspan.node().getComputedTextLength() > width) {
line.pop()
tspan.text(line.join(" "))
line = [word]
tspan = text.append("tspan").attr("x", 0).attr("y", y).attr("dy", `${++lineNumber * lineHeight + dy}em`).text(word)
}
}
})
}
// }}} wrap text //

0
spotifyvis/static/spotifyvis/scripts/user_data.js

141
spotifyvis/templates/spotifyvis/audio_features.html

@ -0,0 +1,141 @@
{% load static %}
<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<title>User Spotify Data</title>
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
.tick {
font-size: 15px;
}
</style>
</head>
<body>
<!--[if lt IE 7]>
<p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="#">upgrade your browser</a> to improve your experience.</p>
<![endif]-->
<p>Logged in as {{ user_id }}</p>
<script src="https://d3js.org/d3.v5.js"></script>
<script type="text/javascript">
/** Queries the backend for audio feature data, draws the bar chart
* illustrating the frequencies of values, and appends the chart to
* a designated parent element
*
* @param audioFeature: the name of the audio feature (string)
* @param intervalEndPoints: a sorted array of 5 real numbers defining the intervals (categories) of values,
* for example:
* [0, 0.25, 0.5, 0.75, 1.0] for instrumentalness would define ranges
* (0-0.25), (0.25-0.5), (0.5-0.75), (0.75-1.0)
* @param parentElem: the DOM element to append the graph to (a selector string)
* @return None
*/
function drawAudioFeatGraph(audioFeature, intervalEndPoints, parentElem) {
let margin = {top: 20, right: 30, bottom: 30, left: 40};
let width = 480 - margin.left - margin.right,
height = 270 - margin.top - margin.bottom;
let featureData = {};
// Create the keys first in order
for (let index = 0; index < intervalEndPoints.length - 1; index++) {
let key = `${intervalEndPoints[index]} ~ ${intervalEndPoints[index + 1]}`;
featureData[key] = 0;
}
// define the vertical scaling function
let vScale = d3.scaleLinear().range([height, 0]);
d3.json(`/audio_features/${audioFeature}/{{ user_secret }}`)
.then(function(response) {
// categorize the data points
for (let dataPoint of response.data_points) {
dataPoint = parseFloat(dataPoint);
let index = intervalEndPoints.length - 2;
// find the index of the first element greater than dataPoint
while (dataPoint < intervalEndPoints[index]) {
index -= 1;
}
let key = `${intervalEndPoints[index]} ~ ${intervalEndPoints[index + 1]}`;
featureData[key] += 1;
}
let dataSet = Object.values(featureData);
let dataRanges = Object.keys(featureData); // Ranges of audio features, e.g. 0-0.25, 0.25-0.5, etc
let dataArr = [];
// turn the counts into an array of objects, e.g. {range: "0-0.25", counts: 5}
for (let i = 0; i < dataRanges.length; i++) {
dataArr.push({
range: dataRanges[i],
counts: featureData[dataRanges[i]]
});
}
vScale.domain([0, d3.max(dataSet)]).nice();
let hScale = d3.scaleBand().domain(dataRanges).rangeRound([0, width]).padding(0.5);
let xAxis = d3.axisBottom().scale(hScale);
let yAxis = d3.axisLeft().scale(vScale);
let featureSVG = d3.select(parentElem)
.append('svg').attr('width', width + margin.left + margin.right)
.attr('height', height + margin.top + margin.bottom);
let featureGraph = featureSVG.append("g")
.attr("transform", `translate(${margin.left}, ${margin.top})`)
.attr("fill", "teal");
featureGraph.selectAll(".bar")
.data(dataArr)
.enter().append('rect')
.attr('class', 'bar')
.attr('x', function(d) { return hScale(d.range); })
.attr('y', function(d) { return vScale(d.counts); })
.attr("height", function(d) { return height - vScale(d.counts); })
.attr("width", hScale.bandwidth());
// function(d) { return hScale(d.range); }
featureGraph.append('g')
.attr('class', 'axis')
.attr('transform', `translate(0, ${height})`)
.call(xAxis);
featureGraph.append('g')
.attr('class', 'axis')
.call(yAxis);
featureSVG.append("text")
.attr('x', (width / 2))
.attr('y', (margin.top / 2))
.attr('text-anchor', 'middle')
.style('font-size', '14px')
.text(`${capFeatureStr(audioFeature)}`);
});
}
/**
* Returns the audio feature name string with the first letter capitalized
* @param audioFeature: the name of the audio feature
* @returns the audio feature name string with the first letter capitalized
*/
function capFeatureStr(audioFeature) {
return audioFeature.charAt(0).toUpperCase() + audioFeature.slice(1);
}
drawAudioFeatGraph("instrumentalness", [0, 0.25, 0.5, 0.75, 1.0], 'body');
drawAudioFeatGraph("valence", [0, 0.25, 0.5, 0.75, 1.0], 'body');
drawAudioFeatGraph("energy", [0, 0.25, 0.5, 0.75, 1.0], 'body');
drawAudioFeatGraph("tempo", [40, 80, 120, 160, 200], 'body');
drawAudioFeatGraph("danceability", [0, 0.25, 0.5, 0.75, 1.0], 'body');
drawAudioFeatGraph("acousticness", [0, 0.25, 0.5, 0.75, 1.0], 'body');
drawAudioFeatGraph("loudness", [-60, -45, -30, -15, 0], 'body');
drawAudioFeatGraph("speechiness", [0, 0.25, 0.5, 0.75, 1.0], 'body');
</script>
</body>
</html>

44
spotifyvis/templates/spotifyvis/genre_graph.html

@ -0,0 +1,44 @@
<!-- header {{{ -->
<!DOC
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
{% load static %}
<html class="no-js"> <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<title>Test DB Page</title>
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="{% static 'spotifyvis/css/dark_bg.css' %}">
</head>
<!-- }}} header -->
<body>
<script src="https://d3js.org/d3.v5.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/randomcolor/0.5.2/randomColor.min.js"></script>
{% load static %}
<script src="{% static "spotifyvis/scripts/genre_graph.js" %}"></script>
<svg width="1920" height="740"></svg>
<script>
var svg = d3.select("svg"),
margin = {top: 20, right: 20, bottom: 30, left: 40},
width = +svg.attr("width") - margin.left - margin.right,
height = +svg.attr("height") - margin.top - margin.bottom,
g = svg.append("g").attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var x = d3.scaleBand()
.rangeRound([0, width])
.paddingInner(0.05)
.align(0.1);
var y = d3.scaleLinear()
.rangeRound([height, 0]);
d3.json("{% url "get_genre_data" user_secret %}").then(create_genre_graph);
</script>
</body>
</html>

13
spotifyvis/templates/spotifyvis/index.html

@ -4,6 +4,7 @@
<head>
<title>User Login</title>
<link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css">
<link rel="stylesheet" href="{% static 'spotifyvis/css/dark_bg.css' %}">
<style type="text/css">
.text-overflow {
overflow: hidden;
@ -18,15 +19,9 @@
<body>
<div class="container">
<div id="login">
<h1>This is an example of the Authorization Code flow</h1>
<a href="/login" class="btn btn-primary">Log In (Original)</a>
<button id="login-btn">Log In</button>
</div>
<div id="data-container">
<ul id="data-list">
</ul>
<h1>spotify-lib-vis</h1>
<a href="/login" class="btn btn-primary">Scan Library</a>
<a href="{% url "admin_graphs" %}" class="btn btn-primary">Admin Graphs</a>
</div>
</div>

17
spotifyvis/templates/spotifyvis/logged_in.html

@ -0,0 +1,17 @@
<!DOCTYPE html>
{% load static %}
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Logged In</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
<link rel="stylesheet" href="{% static 'spotifyvis/css/dark_bg.css' %}">
</head>
<body>
<h1>{{ user_id }}'s Graphs</h1>
<a class="btn btn-primary" href="/audio_features/{{ user_secret }}"
role="button">Audio Features</a>
<a class="btn btn-primary" href="{% url "display_genre_graph" user_secret %}"
role="button">Genres</a>
</body>
</html>

9
spotifyvis/templates/spotifyvis/user_data.html

@ -1,3 +1,4 @@
{% load static %}
<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
@ -9,18 +10,12 @@
<title>User Spotify Data</title>
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="{% static 'spotifyvis/css/dark_bg.css' %}">
</head>
<body>
<!--[if lt IE 7]>
<p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="#">upgrade your browser</a> to improve your experience.</p>
<![endif]-->
<p>Logged in as {{ id }}</p>
<h2>Display name {{ user_name }}</h2>
<ul>
{% for genre_name, genre_count in genre_dict.items %}
<li>{{ genre_name }} - {{ genre_count }}</li>
{% endfor %}
</ul>
<script src="{% static 'spotifyvis/scripts/user_data.js' %}"></script>
</body>
</html>

22
spotifyvis/urls.py

@ -1,9 +1,19 @@
from django.urls import path, include
from . import views
from django.conf.urls import url
from .views import *
urlpatterns = [
path('', views.index, name='index'),
path('login', views.login, name='login'),
path('callback', views.callback, name='callback'),
path('user_data', views.user_data, name='user_data'),
]
path('', index, name='index'),
path('login', login, name='login'),
path('callback', callback, name='callback'),
path('user_data', user_data, name='user_data'),
path('admin_graphs', admin_graphs, name='admin_graphs'),
path('user_artists/<str:user_id>', get_artist_data, name='get_artist_data'),
path('api/user_genres/<str:user_secret>', get_genre_data, name='get_genre_data'),
path('graphs/genre/<str:client_secret>', display_genre_graph,
name='display_genre_graph'),
path('audio_features/<str:client_secret>', audio_features, name='audio_features'),
path('audio_features/<str:audio_feature>/<str:client_secret>',
get_audio_feature_data, name='get_audio_feature_data'),
]

481
spotifyvis/utils.py

@ -1,300 +1,281 @@
# imports {{{ #
import requests
import math
import pprint
from .models import Artist, User, Track, AudioFeatures
from .models import *
from django.db.models import Count, Q, F
from django.http import JsonResponse
from django.core import serializers
import json
# }}} imports #
USER_TRACKS_LIMIT = 50
ARTIST_LIMIT = 50
FEATURES_LIMIT = 100
# ARTIST_LIMIT = 25
# FEATURES_LIMIT = 25
# parse_library {{{ #
def parse_library(headers, tracks, library_stats, user):
"""Scans user's library for certain number of tracks to update library_stats with.
def parse_library(headers, tracks, user):
"""Scans user's library for certain number of tracks and store the information in a database
:headers: For API call.
:tracks: Number of tracks to get from user's library.
:library_stats: Dictionary containing the data mined from user's library
:user: a User object representing the user whose library we are parsing
:returns: None
"""
# TODO: implement importing entire library with 0 as tracks param
# number of tracks to get with each call
limit = 5
# keeps track of point to get songs from
offset = 0
payload = {'limit': str(limit)}
# use two separate variables to track, because the average popularity also requires num_samples
num_samples = 0 # number of actual track samples
feature_data_points = 0 # number of feature data analyses (some tracks do not have analyses available)
payload = {'limit': str(USER_TRACKS_LIMIT)}
artist_genre_queue = []
features_queue = []
for _ in range(0, tracks, limit):
# iterate until hit requested num of tracks
for i in range(0, tracks, USER_TRACKS_LIMIT):
payload['offset'] = str(offset)
saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers, params=payload).json()
saved_tracks_response = requests.get('https://api.spotify.com/v1/me/tracks',
headers=headers,
params=payload).json()
for track_dict in saved_tracks_response['items']:
num_samples += 1
get_track_info(track_dict['track'], library_stats, num_samples)
# get_genre(headers, track_dict['track']['album']['id'])
audio_features_dict = get_audio_features(headers, track_dict['track']['id'])
if len(audio_features_dict) != 0:
# Track the number of audio analyses for calculating
# audio feature averages and standard deviations on the fly
feature_data_points += 1
for feature, feature_data in audio_features_dict.items():
update_audio_feature_stats(feature, feature_data, feature_data_points, library_stats)
# add artists {{{ #
# update artist info before track so that Track object can reference
# Artist object
track_artists = []
for artist_dict in track_dict['track']['artists']:
increase_artist_count(headers, artist_dict['name'], artist_dict['id'], library_stats)
# calculates num_songs with offset + songs retrieved
library_stats['num_songs'] = offset + len(saved_tracks_response['items'])
offset += limit
calculate_genres_from_artists(headers, library_stats)
pprint.pprint(library_stats)
artist_obj, artist_created = Artist.objects.get_or_create(
artist_id=artist_dict['id'],
name=artist_dict['name'],)
# only add/tally up artist genres if new
if artist_created:
artist_genre_queue.append(artist_obj)
if len(artist_genre_queue) == ARTIST_LIMIT:
add_artist_genres(headers, artist_genre_queue)
artist_genre_queue = []
track_artists.append(artist_obj)
# }}} add artists #
# TODO: fix this, don't need any more
top_genre = ""
track_obj, track_created = save_track_obj(track_dict['track'],
track_artists, top_genre, user)
# add audio features {{{ #
# if a new track is not created, the associated audio feature does
# not need to be created again
if track_created:
features_queue.append(track_obj)
if len(features_queue) == FEATURES_LIMIT:
get_audio_features(headers, features_queue)
features_queue = []
# }}} add audio features #
# temporary console logging
print("#{}-{}: {} - {}".format(offset + 1,
offset + USER_TRACKS_LIMIT,
track_obj.artists.first(),
track_obj.name))
# }}} parse_library #
def get_audio_features(headers, track_id):
"""Returns the audio features of a soundtrack
# calculates num_songs with offset + songs retrieved
offset += USER_TRACKS_LIMIT
Args:
headers: headers containing the API token
track_id: the id of the soundtrack, needed to query the Spotify API
Returns:
A dictionary with the features as its keys, if audio feature data is missing for the track,
an empty dictionary is returned.
"""
# clean-up {{{ #
response = requests.get("https://api.spotify.com/v1/audio-features/{}".format(track_id), headers = headers).json()
if 'error' in response:
return {}
features_dict = {}
# Data that we don't need
useless_keys = [
"key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature",
]
for key, val in response.items():
if key not in useless_keys:
features_dict[key] = val
return features_dict
def update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size):
"""Calculates the standard deviation for a sample without storing all data points
Args:
cur_mean: the current mean for N = (sample_size - 1)
cur_std_dev: the current standard deviation for N = (sample_size - 1)
new_data_point: a new data point
sample_size: sample size including the new data point
# update remaining artists without genres and songs without features if
# there are any
if len(artist_genre_queue) > 0:
add_artist_genres(headers, artist_genre_queue)
if len(features_queue) > 0:
get_audio_features(headers, features_queue)
Returns:
(new_mean, new_std_dev)
"""
# This is an implementation of Welford's method
# http://jonisalonen.com/2013/deriving-welfords-method-for-computing-variance/
new_mean = ((sample_size - 1) * cur_mean + new_data_point) / sample_size
delta_variance = (new_data_point - new_mean) * (new_data_point - cur_mean)
new_std_dev = math.sqrt(
(math.pow(cur_std_dev, 2) * (sample_size - 2) + delta_variance) / (
sample_size - 1
))
return new_mean, new_std_dev
def update_audio_feature_stats(feature, new_data_point, sample_size, library_stats):
"""Updates the audio feature statistics in library_stats
Args:
feature: the audio feature to be updated (string)
new_data_point: new data to update the stats with
sample_size: sample size including the new data point
library_stats Dictionary containing the data mined from user's Spotify library
Returns:
None
"""
# first time the feature is considered
if sample_size < 2:
library_stats['audio_features'][feature] = {
"average": new_data_point,
"std_dev": 0,
}
else:
cur_mean = library_stats['audio_features'][feature]['average']
cur_std_dev = library_stats['audio_features'][feature]['std_dev']
new_mean, new_std_dev = update_std_dev(cur_mean, cur_std_dev, new_data_point, sample_size)
# }}} clean-up #
library_stats['audio_features'][feature] = {
"average": new_mean,
"std_dev": new_std_dev
}
update_track_genres(user)
# }}} parse_library #
# increase_nested_key {{{ #
# update_track_genres {{{ #
def increase_nested_key(top_key, nested_key, library_stats, amount=1):
"""Increases count for the value of library_stats[top_key][nested_key]. Checks if nested_key exists already and takes
appropriate action.
def update_track_genres(user):
"""Updates user's tracks with the most common genre associated with the
songs' artist(s).
:top_key: First key of library_stats.
:nested_key: Key in top_key's dict for which we want to increase value of.
:library_stats: Dictionary containing the data mined from user's Spotify library
:user: User object who's tracks are being updated.
:returns: None
"""
if nested_key not in library_stats[top_key]:
library_stats[top_key][nested_key] = amount
else:
library_stats[top_key][nested_key] += amount
# }}} increase_nested_key #
# increase_artist_count {{{ #
def increase_artist_count(headers, artist_name, artist_id, library_stats):
"""Increases count for artist in library_stats and stores the artist_id.
:headers: For making the API call.
:artist_name: Artist to increase count for.
:artist_id: The Spotify ID for the artist.
:library_stats: Dictionary containing the data mined from user's Spotify library
:returns: None
user_tracks = Track.objects.filter(users__exact=user)
for track in user_tracks:
# just using this variable to save another call to db
track_artists = track.artists.all()
# set genres to first artist's genres then find intersection with others
shared_genres = track_artists.first().genres.all()
for artist in track_artists:
shared_genres = shared_genres.intersection(artist.genres.all())
shared_genres = shared_genres.order_by('-num_songs')
undefined_genre_obj = Genre.objects.get(name="undefined")
most_common_genre = shared_genres.first() if shared_genres.first() is \
not undefined_genre_obj else shared_genres[1]
track.genre = most_common_genre if most_common_genre is not None \
else undefined_genre_obj
track.save()
# print(track.name, track.genre)
# }}} update_track_genres #
# save_track_obj {{{ #
def save_track_obj(track_dict, artists, top_genre, user):
"""Make an entry in the database for this track if it doesn't exist already.
:track_dict: dictionary from the API call containing track information.
:artists: artists of the song, passed in as a list of Artist objects.
:top_genre: top genre associated with this track (see get_top_genre).
:user: User object for which this Track is to be associated with.
:returns: (The created/retrieved Track object, created)
"""
if artist_name not in library_stats['artists']:
library_stats['artists'][artist_name] = {}
library_stats['artists'][artist_name]['count'] = 1
library_stats['artists'][artist_name]['id'] = artist_id
track_query = Track.objects.filter(track_id__exact=track_dict['id'])
if len(track_query) != 0:
return track_query[0], False
else:
library_stats['artists'][artist_name]['count'] += 1
# }}} increase_artist_count #
def update_popularity_stats(new_data_point, library_stats, sample_size):
"""Updates the popularity statistics in library_stats
Args:
new_data_point: new data to update the popularity stats with
library_stats: Dictionary containing data mined from user's Spotify library
sample_size: The sample size including the new data
Returns:
None
new_track = Track.objects.create(
track_id=track_dict['id'],
year=track_dict['album']['release_date'].split('-')[0],
popularity=int(track_dict['popularity']),
runtime=int(float(track_dict['duration_ms']) / 1000),
name=track_dict['name'],
# genre=top_genre,
)
# have to add artists and user after saving object since track needs to
# have ID before filling in m2m field
for artist in artists:
new_track.artists.add(artist)
new_track.users.add(user)
new_track.save()
return new_track, True
# }}} save_track_obj #
# get_audio_features {{{ #
def get_audio_features(headers, track_objs):
"""Creates and saves a new AudioFeatures objects for the respective
track_objs. track_objs should contain the API limit for a single call
(FEATURES_LIMIT) for maximum efficiency.
:headers: headers containing the API token
:track_objs: Track objects to associate with the new AudioFeatures object
:returns: None
"""
if sample_size < 2:
library_stats['popularity'] = {
"average": new_data_point,
"std_dev": 0,
}
else :
cur_mean_popularity = library_stats['popularity']['average']
cur_popularity_stdev = library_stats['popularity']['std_dev']
new_mean, new_std_dev = update_std_dev(
cur_mean_popularity, cur_popularity_stdev, new_data_point, sample_size)
library_stats['popularity'] = {
"average": new_mean,
"std_dev": new_std_dev,
}
# get_track_info {{{ #
def get_track_info(track_dict, library_stats, sample_size):
"""Get all the info from the track_dict directly returned by the API call in parse_library.
:track_dict: Dict returned from the API call containing the track info.
:library_stats: Dictionary containing the data mined from user's Spotify library
:sample_size: The sample size so far including this track
track_ids = str.join(",", [track_obj.track_id for track_obj in track_objs])
params = {'ids': track_ids}
features_response = requests.get("https://api.spotify.com/v1/audio-features",
headers=headers,params=params).json()['audio_features']
# pprint.pprint(features_response)
useless_keys = [ "key", "mode", "type", "liveness", "id", "uri", "track_href", "analysis_url", "time_signature", ]
for i in range(len(track_objs)):
if features_response[i] is not None:
# Data that we don't need
cur_features_obj = AudioFeatures()
cur_features_obj.track = track_objs[i]
for key, val in features_response[i].items():
if key not in useless_keys:
setattr(cur_features_obj, key, val)
cur_features_obj.save()
# }}} get_audio_features #
def process_artist_genre(genre_name, artist_obj):
"""Increase count for correspoding Genre object to genre_name and add that
Genre to artist_obj.
:genre_name: Name of genre.
:artist_obj: Artist object to add Genre object to.
:returns: None
"""
# popularity
update_popularity_stats(track_dict['popularity'], library_stats, sample_size)
# year
year_released = track_dict['album']['release_date'].split('-')[0]
increase_nested_key('year_released', year_released, library_stats)
# artist
# artist_names = [artist['name'] for artist in track_dict['artists']]
# for artist_name in artist_names:
# increase_nested_key('artists', artist_name)
# runtime
library_stats['total_runtime'] += float(track_dict['duration_ms']) / (1000 * 60)
# }}} get_track_info #
genre_obj, created = Genre.objects.get_or_create(name=genre_name,
defaults={'num_songs':1})
if not created:
genre_obj.num_songs = F('num_songs') + 1
genre_obj.save()
artist_obj.genres.add(genre_obj)
artist_obj.save()
# calculate_genres_from_artists {{{ #
# add_artist_genres {{{ #
def calculate_genres_from_artists(headers, library_stats):
"""Tallies up genre counts based on artists in library_stats.
def add_artist_genres(headers, artist_objs):
"""Adds genres to artist_objs and increases the count the respective Genre
object. artist_objs should contain the API limit for a single call
(ARTIST_LIMIT) for maximum efficiency.
:headers: For making the API call.
:library_stats: Dictionary containing the data mined from user's Spotify library
:artist_objs: List of Artist objects for which to add/tally up genres for.
:returns: None
"""
for artist_entry in library_stats['artists'].values():
artist_response = requests.get('https://api.spotify.com/v1/artists/' + artist_entry['id'], headers=headers).json()
# increase each genre count by artist count
for genre in artist_response['genres']:
increase_nested_key('genres', genre, library_stats, artist_entry['count'])
# }}} calculate_genres_from_artists #
def process_library_stats(library_stats):
"""Processes library_stats into format more suitable for D3 consumption
Args:
library_stats: Dictionary containing the data mined from user's Spotify library
Returns:
A new dictionary that contains the data in library_stats, in a format more suitable for D3 consumption
artist_ids = str.join(",", [artist_obj.artist_id for artist_obj in artist_objs])
params = {'ids': artist_ids}
artists_response = requests.get('https://api.spotify.com/v1/artists/',
headers=headers, params=params).json()['artists']
# pprint.pprint(artists_response)
for i in range(len(artist_objs)):
if len(artists_response[i]['genres']) == 0:
process_artist_genre("undefined", artist_objs[i])
else:
for genre in artists_response[i]['genres']:
process_artist_genre(genre, artist_objs[i])
# }}} add_artist_genres #
# get_artists_in_genre {{{ #
def get_artists_in_genre(user, genre, max_songs):
"""Return count of artists in genre.
:user: User object to return data for.
:genre: genre to count artists for.
:max_songs: max total songs to include to prevent overflow due to having
multiple artists on each track.
:returns: dict of artists in the genre along with the number of songs they
have.
"""
processed_library_stats = {}
for key in library_stats:
if key == 'artists' or key == 'genres' or key == 'year_released':
for inner_key in library_stats[key]:
if key not in processed_library_stats:
processed_library_stats[key] = []
processed_item_key = '' # identifier key for each dict in the list
count = 0
if 'artist' in key:
processed_item_key = 'name'
count = library_stats[key][inner_key]['count']
elif 'genre' in key:
processed_item_key = 'genre'
count = library_stats[key][inner_key]
else:
processed_item_key = 'year'
count = library_stats[key][inner_key]
processed_library_stats[key].append({
processed_item_key: inner_key,
"count": count
})
elif key == 'audio_features':
for audio_feature in library_stats[key]:
if 'audio_features' not in processed_library_stats:
processed_library_stats['audio_features'] = []
processed_library_stats['audio_features'].append({
'feature': audio_feature,
'average': library_stats[key][audio_feature]['average'],
'std_dev': library_stats[key][audio_feature]['std_dev']
})
# TODO: Not sure about final form for 'popularity'
# elif key == 'popularity':
# processed_library_stats[key] = []
# processed_library_stats[key].append({
# })
elif key == 'num_songs' or key == 'total_runtime' or key == 'popularity':
processed_library_stats[key] = library_stats[key]
return processed_library_stats
genre_obj = Genre.objects.get(name=genre)
artist_counts = (Artist.objects.filter(track__users=user)
.filter(genres=genre_obj)
.annotate(num_songs=Count('track', distinct=True))
.order_by('-num_songs')
)
processed_artist_counts = {}
songs_added = 0
for artist in artist_counts:
# hacky way to not have total count overflow due to there being multiple
# artists on a track
if songs_added + artist.num_songs <= max_songs:
processed_artist_counts[artist.name] = artist.num_songs
songs_added += artist.num_songs
# processed_artist_counts = [{'name': artist.name, 'num_songs': artist.num_songs} for artist in artist_counts]
# processed_artist_counts = {artist.name: artist.num_songs for artist in artist_counts}
# pprint.pprint(processed_artist_counts)
return processed_artist_counts
# }}} get_artists_in_genre #

175
spotifyvis/views.py

@ -1,21 +1,29 @@
from django.shortcuts import render, redirect
from django.http import HttpResponse, HttpResponseBadRequest
# imports {{{ #
import math
import random
import requests
import os
import urllib
import json
import secrets
import pprint
import string
from datetime import datetime
from .utils import parse_library, process_library_stats
from django.shortcuts import render, redirect
from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse
from django.db.models import Count, Q
from .utils import parse_library, get_artists_in_genre, update_track_genres
from .models import User, Track, AudioFeatures, Artist
# }}} imports #
TIME_FORMAT = '%Y-%m-%d-%H-%M-%S'
TRACKS_TO_QUERY = 5
TRACKS_TO_QUERY = 200
# generate_random_string {{{ #
def generate_random_string(length):
"""Generates a random string of a certain length
@ -25,11 +33,8 @@ def generate_random_string(length):
Returns:
A random string
"""
rand_str = ""
possible_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
for _ in range(length):
rand_str += possible_chars[random.randint(0, len(possible_chars) - 1)]
all_chars = string.ascii_letters + string.digits
rand_str = "".join(random.choice(all_chars) for _ in range(length))
return rand_str
@ -59,8 +64,8 @@ def index(request):
# login {{{ #
# uses Authorization Code flow
def login(request):
# use a randomly generated state string to prevent cross-site request forgery attacks
state_str = generate_random_string(16)
request.session['state_string'] = state_str
@ -97,7 +102,7 @@ def callback(request):
'client_secret': os.environ['SPOTIFY_CLIENT_SECRET'],
}
response = requests.post('https://accounts.spotify.com/api/token', data = payload).json()
response = requests.post('https://accounts.spotify.com/api/token', data=payload).json()
# despite its name, datetime.today() returns a datetime object, not a date object
# use datetime.strptime() to get a datetime object from a string
request.session['token_obtained_at'] = datetime.strftime(datetime.today(), TIME_FORMAT)
@ -112,7 +117,11 @@ def callback(request):
# user_data {{{ #
def user_data(request):
# get user token {{{ #
token_obtained_at = datetime.strptime(request.session['token_obtained_at'], TIME_FORMAT)
valid_for = int(request.session['valid_for'])
@ -124,9 +133,11 @@ def user_data(request):
'client_secret': os.environ['SPOTIFY_CLIENT_SECRET']
}
refresh_token_response = requests.post('https://accounts.spotify.com/api/token', data = req_body).json()
refresh_token_response = requests.post('https://accounts.spotify.com/api/token', data=req_body).json()
request.session['access_token'] = refresh_token_response['access_token']
request.session['valid_for'] = refresh_token_response['expires_in']
# }}} get user token #
auth_token_str = "Bearer " + request.session['access_token']
headers = {
@ -134,35 +145,115 @@ def user_data(request):
}
user_data_response = requests.get('https://api.spotify.com/v1/me', headers = headers).json()
request.session['user_id'] = user_data_response['id'] # store the user_id so it may be used to create model
request.session['user_name'] = user_data_response['display_name']
user = None # will be set to the current user object later
# store the user_id so it may be used to create model
request.session['user_id'] = user_data_response['id']
# create user obj {{{ #
try:
user = User.objects.get(user_id=request.session['user_id'])
user = User.objects.get(user_id=user_data_response['id'])
except User.DoesNotExist:
user = User.objects.create(user_id=request.session['user_id'], user_name=request.session['user_name'])
# context = {
# 'user_name': user_data_response['display_name'],
# 'id': user_data_response['id'],
# }
library_stats = {
"audio_features":{},
"genres":{},
"year_released":{},
"artists":{},
"num_songs": 0,
"popularity": {
"average": 0,
"std_dev": 0,
},
"total_runtime": 0
# Python docs recommends 32 bytes of randomness against brute force attacks
user = User(user_id=user_data_response['id'], user_secret=secrets.token_urlsafe(32))
request.session['user_secret'] = user.user_secret
user.save()
# }}} create user obj #
context = {
'user_id': user.user_id,
'user_secret': user.user_secret,
}
parse_library(headers, TRACKS_TO_QUERY, library_stats, user)
processed_library_stats = process_library_stats(library_stats)
print("================================================")
print("Processed data follows\n")
pprint.pprint(processed_library_stats)
return render(request, 'spotifyvis/user_data.html', context)
# }}} user_data #
parse_library(headers, TRACKS_TO_QUERY, user)
return render(request, 'spotifyvis/logged_in.html', context)
# }}} user_data #
def admin_graphs(request):
"""TODO
"""
user_id = "polarbier"
# user_id = "chrisshyi13"
user_obj = User.objects.get(user_id=user_id)
context = {
'user_id': user_id,
'user_secret': user_obj.user_secret,
}
update_track_genres(user_obj)
return render(request, 'spotifyvis/logged_in.html', context)
# get_artist_data {{{ #
def get_artist_data(request, user_secret):
"""TODO
"""
user = User.objects.get(user_id=user_secret)
artist_counts = Artist.objects.annotate(num_songs=Count('track',
filter=Q(track__users=user)))
processed_artist_counts = [{'name': artist.name,
'num_songs': artist.num_songs} for artist in artist_counts]
return JsonResponse(data=processed_artist_counts, safe=False)
# }}} get_artist_data #
def display_genre_graph(request, client_secret):
user = User.objects.get(user_secret=client_secret)
context = {
'user_secret': client_secret,
}
return render(request, "spotifyvis/genre_graph.html", context)
def audio_features(request, client_secret):
user = User.objects.get(user_secret=client_secret)
context = {
'user_id': user.user_id,
'user_secret': client_secret,
}
return render(request, "spotifyvis/audio_features.html", context)
# get_audio_feature_data {{{ #
def get_audio_feature_data(request, audio_feature, client_secret):
"""Returns all data points for a given audio feature
Args:
request: the HTTP request
audio_feature: The audio feature to be queried
client_secret: client secret, used to identify the user
"""
user = User.objects.get(user_secret=client_secret)
user_tracks = Track.objects.filter(users=user)
response_payload = {
'data_points': [],
}
for track in user_tracks:
try:
audio_feature_obj = AudioFeatures.objects.get(track=track)
response_payload['data_points'].append(getattr(audio_feature_obj, audio_feature))
except AudioFeatures.DoesNotExist:
continue
return JsonResponse(response_payload)
# }}} get_audio_feature_data #
# get_genre_data {{{ #
def get_genre_data(request, user_secret):
"""Return genre data needed to create the graph user.
TODO
"""
user = User.objects.get(user_secret=user_secret)
genre_counts = (Track.objects.filter(users__exact=user)
.values('genre')
.order_by('genre')
.annotate(num_songs=Count('genre'))
)
for genre_dict in genre_counts:
genre_dict['artists'] = get_artists_in_genre(user, genre_dict['genre'],
genre_dict['num_songs'])
print("*** Genre Breakdown ***")
pprint.pprint(list(genre_counts))
return JsonResponse(data=list(genre_counts), safe=False)
# }}} get_genre_data #
Loading…
Cancel
Save