In [1]:
import numpy as np
import librosa, librosa.display
from scipy import signal
import matplotlib.pyplot as plt
from fastai import *
from fastai.vision import *
import os
from scipy.io.wavfile import read
import jams

In [2]:
path = Config.data_path()/'GuitarSet'
path.mkdir(parents=True, exist_ok=True)
audio_path = path/"audio/audio_mic"
save_path = path/"spectrogram"
save_path.mkdir(parents=True, exist_ok=True)
anno_path = path/'annotation'

In [3]:
THRESHOLD = 40 # dB
RATE = 44100
INPUT_BLOCK_TIME = .4 # .4s
INPUT_FRAMES_PER_BLOCK = int(RATE * INPUT_BLOCK_TIME)
diff_thresh = 1000
fmin = 10 # Hz
fmax = 600 # Hz
prev_block = []
store_block = []

In [4]:
def get_rms(block):
    return np.sqrt(np.mean(np.square(block)))

def detect_onset(snd_block, diff_thresh):
    rms_diff = np.diff(snd_block)
    if rms_diff.max() > diff_thresh:
        return True
    else:
        return False

def save_spectrogram(y_path, sr, save_path, t1, t2, anno):    
    t1, t2 = librosa.core.time_to_frames([t1,t2], sr)
    y = read(y_path)
    y = y[1][t1:t2]
    f, t, Sxx = signal.spectrogram(y, sr, nfft=4094, nperseg=2048, window = 'blackman')
    freq_slice = np.where((f >= fmin) & (f <= fmax))
    f = f[freq_slice]
    Sxx = Sxx[freq_slice,:][0]
    plt.figure(figsize=(12,4))
    plt.pcolormesh(t, f, Sxx)
    plt.axis('off')
    plt.savefig(save_path, bbox_inches='tight')
    plt.close

In [6]:
'''
for fname in os.listdir(save_path):
    os.remove(save_path/fname)
    print("removed "+ str(fname))
'''

removed 03_BN2-131-B_solo_mic9800-10200.png
removed 03_BN2-131-B_solo_mic800-1200.png
removed 05_Jazz2-110-Bb_solo_mic4200-4600.png
removed 05_SS3-84-Bb_solo_mic40000-40400.png
removed 05_Funk2-108-Eb_solo_mic28800-29200.png
removed 05_Funk2-108-Eb_solo_mic18000-18400.png
removed 05_SS3-84-Bb_solo_mic32000-32400.png
removed 04_Jazz2-110-Bb_solo_mic33000-33400.png
removed 05_SS3-84-Bb_solo_mic30400-30800.png
removed 05_SS3-84-Bb_solo_mic15000-15400.png
removed 05_Funk2-108-Eb_solo_mic24600-25000.png
removed 04_Jazz2-110-Bb_solo_mic15000-15400.png
removed 03_BN2-131-B_solo_mic11400-11800.png
removed 04_Jazz2-110-Bb_solo_mic30200-30600.png
removed 05_Jazz2-110-Bb_solo_mic13600-14000.png
removed 05_SS3-84-Bb_solo_mic9200-9600.png
removed 04_Jazz2-110-Bb_solo_mic4000-4400.png
removed 04_Jazz2-110-Bb_solo_mic10200-10600.png
removed 05_Funk2-108-Eb_solo_mic32800-33200.png
removed 04_Jazz2-110-Bb_solo_mic16200-16600.png
removed 05_Funk2-108-Eb_solo_mic33400-33800.png
removed 05_SS3-84-Bb_solo_

In [11]:
save_path.ls()

[PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/03_Rock2-142-D_comp_mic2200-2600.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/00_Rock2-142-D_comp_mic1400-1800.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/00_Funk2-108-Eb_solo_mic3000-3400.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/05_Funk3-112-C#_comp_mic2400-2800.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/01_Rock3-117-Bb_comp_mic2600-3000.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/03_Funk1-114-Ab_solo_mic400-800.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/05_Rock1-130-A_solo_mic1400-1800.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/03_Jazz3-137-Eb_solo_mic400-800.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/02_BN1-147-Gb_solo_mic600-1000.png'),
 PosixPath('/home/ubuntu/.fastai/data/GuitarSet/spectrogram/00_Funk1-97-C_comp_mic1600-2000.png'),
 P

In [10]:
spec_length = 400
for fname in os.listdir(audio_path):
    if fname.endswith('.wav'):
        y, sr = librosa.load(audio_path/fname)
        track_duration = librosa.get_duration(y=y, sr=sr)*1000
        t1 = 0
        t2 = spec_length
        anno = jams.load("/home/ubuntu/.fastai/data/GuitarSet/annotation/05_Jazz2-110-Bb_solo.jams")
        #anno = jams.load(anno_path/(fname[:-8]+".jams"))
        while t2<track_duration//10:
            outpath = save_path/(fname[:-4]+str(t1)+"-"+str(t2)+".png")
            save_spectrogram(audio_path/fname, sr, outpath, t1, t2, anno)
            t1 = t1 + spec_length//2 #Works in milliseconds
            t2 = t2 + spec_length//2
            plt.close('all')
        print(fname)
    else:
        print("Not a .wav file: " + fname)

05_Jazz2-110-Bb_solo_mic.wav
05_SS3-84-Bb_solo_mic.wav
04_Jazz2-110-Bb_solo_mic.wav
03_BN2-131-B_solo_mic.wav
05_Funk2-108-Eb_solo_mic.wav
04_Jazz2-187-F#_solo_mic.wav
04_BN1-129-Eb_comp_mic.wav
04_BN3-154-E_comp_mic.wav
00_BN3-119-G_comp_mic.wav
02_BN1-147-Gb_solo_mic.wav
01_Funk1-114-Ab_comp_mic.wav
03_Rock3-148-C_solo_mic.wav
04_Rock1-130-A_comp_mic.wav
01_Jazz1-200-B_solo_mic.wav
01_BN3-119-G_solo_mic.wav
00_Rock3-117-Bb_solo_mic.wav
01_SS2-88-F_comp_mic.wav
04_SS3-84-Bb_solo_mic.wav
04_BN2-131-B_comp_mic.wav
05_Jazz1-130-D_comp_mic.wav
04_BN3-119-G_solo_mic.wav
01_BN3-154-E_solo_mic.wav
00_Jazz1-200-B_solo_mic.wav
02_Funk1-114-Ab_solo_mic.wav
03_Funk1-97-C_solo_mic.wav
00_Funk2-119-G_solo_mic.wav
02_SS2-88-F_comp_mic.wav
03_Funk2-119-G_comp_mic.wav
03_BN1-147-Gb_solo_mic.wav
01_BN1-129-Eb_comp_mic.wav
05_BN2-166-Ab_solo_mic.wav
00_Rock2-142-D_comp_mic.wav
02_Jazz1-200-B_solo_mic.wav
05_Funk1-97-C_solo_mic.wav
05_Rock3-148-C_comp_mic.wav
02_BN3-119-G_comp_mic.wav
05_SS2-88-F_solo_m

04_Funk3-98-A_solo_mic.wav
02_SS3-84-Bb_comp_mic.wav
04_SS2-107-Ab_comp_mic.wav
00_Rock1-130-A_solo_mic.wav
02_Jazz1-130-D_comp_mic.wav
00_SS2-107-Ab_solo_mic.wav
00_Rock1-90-C#_solo_mic.wav
01_Jazz1-130-D_solo_mic.wav
02_Jazz2-187-F#_comp_mic.wav
01_Rock2-142-D_comp_mic.wav
05_Jazz1-200-B_comp_mic.wav
01_SS3-98-C_solo_mic.wav
01_BN1-147-Gb_solo_mic.wav
00_Jazz1-200-B_comp_mic.wav
04_Rock3-117-Bb_solo_mic.wav
05_SS2-88-F_comp_mic.wav
03_BN2-131-B_comp_mic.wav
04_Funk3-112-C#_solo_mic.wav
05_Rock2-142-D_solo_mic.wav
05_Funk2-119-G_solo_mic.wav
05_Funk2-108-Eb_comp_mic.wav
04_Jazz1-200-B_comp_mic.wav
03_Rock1-90-C#_comp_mic.wav
04_Rock2-85-F_comp_mic.wav
05_Funk2-119-G_comp_mic.wav
02_BN1-129-Eb_solo_mic.wav
05_Rock1-130-A_comp_mic.wav
02_Funk1-97-C_comp_mic.wav
05_Funk1-114-Ab_solo_mic.wav
05_SS1-68-E_comp_mic.wav
04_BN1-129-Eb_solo_mic.wav
00_Rock1-90-C#_comp_mic.wav
03_Funk3-98-A_comp_mic.wav
01_BN2-166-Ab_comp_mic.wav
05_Rock3-117-Bb_comp_mic.wav
04_BN1-147-Gb_solo_mic.wav
04_Rock1-9

In [10]:
import jams
path = str(anno_path/'05_Jazz2-110-Bb_solo.jams')
jam_obj = jams.load(path)
print(jam_obj)

{
  "annotations": [
    {
      "annotation_metadata": {
        "curator": {
          "name": "",
          "email": ""
        },
        "annotator": {},
        "version": "",
        "corpus": "",
        "annotation_tools": "",
        "annotation_rules": "",
        "validation": "",
        "data_source": "0"
      },
      "namespace": "pitch_contour",
      "data": {
        "time": [],
        "duration": [],
        "value": [],
        "confidence": []
      },
      "sandbox": {},
      "time": 811.7436,
      "duration": 34.90319999999997
    },
    {
      "annotation_metadata": {
        "curator": {
          "name": "",
          "email": ""
        },
        "annotator": {},
        "version": "",
        "corpus": "",
        "annotation_tools": "",
        "annotation_rules": "",
        "validation": "",
        "data_source": "0"
      },
      "namespace": "note_midi",
      "data": [],
      "sandbox": {},
      "time": 811.7436,
      "duration": 34.903199