import os, sys, json from scipy.io import wavfile from scipy.signal import stft import matplotlib.pyplot as plt import numpy as np def is_nanV( xV ): for i in range(xV.shape[0]): if np.isnan( xV[i] ): return True return False def calc_harm_bins( srate, binHz, midiPitch, harmN ): semi_tone = 1.0/12 quarter_tone = 1.0/24 eigth_tone = 1.0/48 band_width_st = 3.0/48 # 3/8 tone fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12)) fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)] fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)] fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)] for i in range(len(fund_m_binL)): if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0: fund_l_binL[i] = fund_m_binL[i] - 1 if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1: fund_u_binL[i] = fund_m_binL[i] + 1 return fund_l_binL, fund_m_binL, fund_u_binL def audio_rms( srate, xV, rmsWndMs, hopMs ): wndSmpN = int(round( rmsWndMs * srate / 1000.0)) hopSmpN = int(round( hopMs * srate / 1000.0)) xN = xV.shape[0] yN = int(((xN - wndSmpN) / hopSmpN) + 1) assert( yN > 0) yV = np.zeros( (yN, ) ) assert( wndSmpN > 1 ) i = 0 j = 0 while i < xN and j < yN: if i == 0: yV[j] = np.sqrt(xV[0]*xV[0]) elif i < wndSmpN: yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) ) else: yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) ) i += hopSmpN j += 1 return yV, srate / hopSmpN def audio_db_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs ): rmsV, rms_srate = audio_rms( srate, xV, rmsWndMs, hopMs ) dbWndN = int(round(dbRefWndMs * rms_srate / 1000.0)) dbRef = ref = np.mean(rmsV[0:dbWndN]) return 20.0 * np.log10( rmsV / dbRef ), rms_srate def audio_stft_rms( srate, xV, rmsWndMs, hopMs, spectrumIdx ): wndSmpN = int(round( rmsWndMs * srate / 1000.0)) hopSmpN = int(round( hopMs * srate / 1000.0)) binHz = srate / wndSmpN f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True ) specHopIdx = int(round( spectrumIdx )) specV = np.sqrt(np.abs(xM[:, specHopIdx ])) mV = np.zeros((xM.shape[1])) for i in range(xM.shape[1]): mV[i] = np.max(np.sqrt(np.abs(xM[:,i]))) return mV, srate / hopSmpN, specV, specHopIdx, binHz def audio_stft_db_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, spectrumIdx ): rmsV, rms_srate, specV, specHopIdx, binHz = audio_stft_rms( srate, xV, rmsWndMs, hopMs, spectrumIdx ) dbWndN = int(round(dbRefWndMs * rms_srate / 1000.0)) dbRef = ref = np.mean(rmsV[0:dbWndN]) rmsDbV = 20.0 * np.log10( rmsV / dbRef ) return rmsDbV, rms_srate, specV, specHopIdx, binHz def audio_harm_rms( srate, xV, rmsWndMs, hopMs, midiPitch, harmCandN, harmN ): wndSmpN = int(round( rmsWndMs * srate / 1000.0)) hopSmpN = int(round( hopMs * srate / 1000.0)) binHz = srate / wndSmpN f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True ) harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN ) rmsV = np.zeros((xM.shape[1],)) for i in range(xM.shape[1]): mV = np.sqrt(np.abs(xM[:,i])) pV = np.zeros((len(harmLBinL,))) for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )): pV[j] = np.max(mV[b0i:b1i]) rmsV[i] = np.mean( sorted(pV)[-harmN:] ) return rmsV, srate / hopSmpN, binHz def audio_harm_db_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ): rmsV, rms_srate, binHz = audio_harm_rms( srate, xV, rmsWndMs, hopMs, midiPitch, harmCandN, harmN ) dbWndN = int(round(dbRefWndMs * rms_srate / 1000.0)) dbRef = ref = np.mean(rmsV[0:dbWndN]) rmsDbV = 20.0 * np.log10( rmsV / dbRef ) return rmsDbV, rms_srate, binHz def locate_peak_indexes( xV, xV_srate, eventMsL ): pkIdxL = [] for begMs, endMs in eventMsL: begSmpIdx = int(begMs * xV_srate / 1000.0) endSmpIdx = int(endMs * xV_srate / 1000.0) pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) ) return pkIdxL def plot_spectrum( ax, srate, binHz, specV, midiPitch, harmN ): binN = specV.shape[0] harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmN ) fundHz = harmMBinL[0] * binHz maxPlotHz = fundHz * (harmN+1) maxPlotBinN = int(round(maxPlotHz/binHz)) hzV = np.arange(binN) * (srate/(binN*2)) specV = 20.0 * np.log10(specV) ax.plot(hzV[0:maxPlotBinN], specV[0:maxPlotBinN] ) for h0,h1,h2 in zip(harmLBinL,harmMBinL,harmUBinL): ax.axvline( x=h0 * binHz, color="blue") ax.axvline( x=h1 * binHz, color="black") ax.axvline( x=h2 * binHz, color="blue") ax.set_ylabel(str(midiPitch)) def plot_spectral_ranges( inDir, pitchL, rmsWndMs=300, rmsHopMs=30, harmN=5, dbRefWndMs=500 ): plotN = len(pitchL) fig,axL = plt.subplots(plotN,1) for plot_idx,midiPitch in enumerate(pitchL): # get the audio and meta-data file names seqFn = os.path.join( inDir, str(midiPitch), "seq.json") audioFn = os.path.join( inDir, str(midiPitch), "audio.wav") # read the meta data object with open( seqFn, "rb") as f: r = json.load(f) # read the audio file srate, signalM = wavfile.read(audioFn) sigV = signalM / float(0x7fff) # calc. the RMS envelope in the time domain rms0DbV, rms0_srate = audio_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs ) # locate the sample index of the peak of each note attack pkIdx0L = locate_peak_indexes( rms0DbV, rms0_srate, r['eventTimeL'] ) # select the 7th to last note for spectrum measurement # # TODO: come up with a better way to select the note to measure # spectrumSmpIdx = pkIdx0L[ len(pkIdx0L) - 7 ] # calc. the RMS envelope by taking the max spectral peak in each STFT window rmsDbV, rms_srate, specV, specHopIdx, binHz = audio_stft_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, spectrumSmpIdx) # specV[] is the spectrum of the note at spectrumSmpIdx # plot the spectrum and the harmonic selection ranges plot_spectrum( axL[plot_idx], srate, binHz, specV, midiPitch, harmN ) plt.show() def do_td_plot( inDir ): rmsWndMs = 300 rmsHopMs = 30 dbRefWndMs = 500 harmCandN = 5 harmN = 3 seqFn = os.path.join( inDir, "seq.json") audioFn = os.path.join( inDir, "audio.wav") midiPitch = int(inDir.split("/")[-1]) with open( seqFn, "rb") as f: r = json.load(f) srate, signalM = wavfile.read(audioFn) sigV = signalM / float(0x7fff) rms0DbV, rms0_srate = audio_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs ) rmsDbV, rms_srate, binHz = audio_harm_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midiPitch, harmCandN, harmN ) pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] ) fig,ax = plt.subplots() fig.set_size_inches(18.5, 10.5, forward=True) secV = np.arange(0,len(rmsDbV)) / rms_srate ax.plot( secV, rmsDbV ) ax.plot( np.arange(0,len(rms0DbV)) / rms0_srate, rms0DbV, color="black" ) for begMs, endMs in r['eventTimeL']: ax.axvline( x=begMs/1000.0, color="green") ax.axvline( x=endMs/1000.0, color="red") for i,pki in enumerate(pkIdxL): ax.plot( [pki / rms_srate], [ rmsDbV[pki] ], marker='.', color="black") plt.show() if __name__ == "__main__": inDir = sys.argv[1] do_td_plot(inDir) #plot_spectral_ranges( inDir, [ 24, 36, 48, 60, 72, 84, 96, 104] )