2018PZ
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 68 additions & 0 deletions b/‎README.md
Lines changed: 68 additions & 0 deletions
diff --git a/‎Slides.pdf
1.57 MB b/‎Slides.pdf
1.57 MB
diff --git a/‎audio_processor.py
Lines changed: 107 additions & 0 deletions b/‎audio_processor.py
Lines changed: 107 additions & 0 deletions
diff --git a/‎list_example.txt
Lines changed: 1 addition & 0 deletions b/‎list_example.txt
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,3 @@
+.idea/
+venv/
+bazel-0.19.2-installer-linux-x86_64.sh
@@ -0,0 +1,68 @@
+# Music Genre Classification with Deep Learning
+
+## Abstract
+
+In this project we adapt the model from [Choi et al.](https://github.com/keunwoochoi/music-auto_tagging-keras) to train a custom music genre classification system with our own genres and data. The model takes as an input the spectogram of music frames and analyzes the image using a Convolutional Neural Network (CNN) plus a Recurrent Neural Network (RNN). The output of the system is a vector of predicted genres for the song. 
+
+We fine-tuned their model with a small dataset (30 songs per genre) and test it on the GTZAN dataset providing a final accuracy of 80%. 
+
+## Slides and Report
+
+- [Slides](https://github.com/jsalbert/music-genre-classification/blob/master/Slides.pdf)
+
+- [Report](https://github.com/jsalbert/music-genre-classification/blob/master/Music_genre_recognition.pdf)
+
+## Code 
+
+In this repository we provide the scripts to fine-tune the pre-trained model and a quick music genre prediction algorithm using our own weights. 
+
+Currently the genres supported are the [GTZAN dataset](http://marsyasweb.appspot.com/download/data_sets/) tags:
+
+- Blues
+- Classical
+- Country
+- Disco
+- HipHop
+- Jazz
+- Metal
+- Pop
+- Reggae
+- Rock
+
+### Prerequisites
+
+We have used Keras running over Theano to perform the experiments. Was done previous to Keras 2.0, not sure if it will work with the new version. It should work on CPU and GPU. 
+- Have [pip](https://pip.pypa.io/en/stable/installing/) 
+- Suggested install: [virtualenv](https://virtualenv.pypa.io/en/stable/)
+
+Python packages necessary specified in *requirements.txt* run:
+```
+ # Create environment
+ virtualenv env_song
+ # Activate environment
+ source env_song/bin/activate
+ # Install dependencies
+ pip install -r requirements.txt
+ 
+```
+
+### Example Code
+
+Fill the folder music with songs. Fill the example list with the song names. 
+```
+ python quick_test.py
+ 
+```
+
+## Results
+
+### Sea of Dreams - Obenhofer
+[![Sea of Dreams - Oberhofer](https://github.com/jsalbert/Music-Genre-Classification-with-Deep-Learning/blob/master/figs/sea.png?raw=true)](https://www.youtube.com/watch?v=mIDWsTwstgs)
+![fig_sea](https://github.com/jsalbert/Music-Genre-Classification-with-Deep-Learning/blob/master/figs/seaofdreams.png?raw=true) 
+![Results](https://github.com/jsalbert/Music-Genre-Classification-with-Deep-Learning/blob/master/figs/output.png?raw=true)
+
+### Sky Full of Stars - Coldplay
+[![Sky Full of Stars- Coldplay](https://github.com/jsalbert/Music-Genre-Classification-with-Deep-Learning/blob/master/figs/sky.png?raw=true)](https://www.youtube.com/watch?v=zp7NtW_hKJI) 
+![fig_sky](https://github.com/jsalbert/Music-Genre-Classification-with-Deep-Learning/blob/master/figs/skyfullofstars.png?raw=true) 
+
+
@@ -0,0 +1,107 @@
+import librosa
+import numpy as np
+from math import floor
+
+def compute_melgram(audio_path):
+    ''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where
+    96 == #mel-bins and 1366 == #time frame
+
+    parameters
+    ----------
+    audio_path: path for the audio file.
+                Any format supported by audioread will work.
+    More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load
+
+    '''
+
+    # mel-spectrogram parameters
+    SR = 12000
+    N_FFT = 512
+    N_MELS = 96
+    HOP_LEN = 256
+    DURA = 29.12  # to make it 1366 frame..
+
+    src, sr = librosa.load(audio_path, sr=SR)  # whole signal
+    n_sample = src.shape[0]
+    n_sample_fit = int(DURA*SR)
+
+    if n_sample < n_sample_fit:  # if too short
+        src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
+    elif n_sample > n_sample_fit:  # if too long
+        src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]
+    logam = librosa.logamplitude
+    melgram = librosa.feature.melspectrogram
+    ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
+                        n_fft=N_FFT, n_mels=N_MELS)**2,
+                ref_power=1.0)
+    ret = ret[np.newaxis, np.newaxis, :]
+    return ret
+
+
+def compute_melgram_multiframe(audio_path, all_song=True):
+    ''' Compute a mel-spectrogram in multiple frames of the song and returns it in a shape of (N,1,96,1366), where
+    96 == #mel-bins, 1366 == #time frame, and N=#frames
+
+    parameters
+    ----------
+    audio_path: path for the audio file.
+                Any format supported by audioread will work.
+    More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load
+
+    '''
+
+    # mel-spectrogram parameters
+    SR = 12000
+    N_FFT = 512
+    N_MELS = 96
+    HOP_LEN = 256
+    DURA = 29.12  # to make it 1366 frame..
+    if all_song:
+        DURA_TRASH = 0
+    else:
+        DURA_TRASH = 20
+
+    src, sr = librosa.load(audio_path, sr=SR)  # whole signal
+    n_sample = src.shape[0]
+    n_sample_fit = int(DURA*SR)
+    n_sample_trash = int(DURA_TRASH*SR)
+
+    #remove the trash at the beginning and at the end
+    src = src[n_sample_trash:(n_sample-n_sample_trash)]
+    n_sample=n_sample-2*n_sample_trash
+
+
+    #print n_sample
+    #print n_sample_fit
+
+    ret = np.zeros((0, 1, 96, 1366), dtype=np.float32)
+
+    if n_sample < n_sample_fit:  # if too short
+        src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
+        logam = librosa.logamplitude
+        melgram = librosa.feature.melspectrogram
+        ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
+                            n_fft=N_FFT, n_mels=N_MELS)**2,
+                    ref_power=1.0)
+        ret = ret[np.newaxis, np.newaxis, :]
+
+    elif n_sample > n_sample_fit:  # if too long
+        N=int(floor(n_sample/n_sample_fit))
+
+        src_total=src
+
+        for i in range(0,N):
+            src = src_total[(i*n_sample_fit):(i+1)*(n_sample_fit)]
+
+            logam = librosa.logamplitude
+            melgram = librosa.feature.melspectrogram
+            retI = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
+                                n_fft=N_FFT, n_mels=N_MELS)**2,
+                        ref_power=1.0)
+            retI = retI[np.newaxis, np.newaxis, :]
+
+            #print retI.shape
+
+            ret = np.concatenate((ret, retI), axis=0)
+
+    return ret
@@ -0,0 +1 @@
+music/example.mp3
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+.idea/`
	`2`	`+venv/`
	`3`	`+bazel-0.19.2-installer-linux-x86_64.sh`