280 lines
8.1 KiB
Python
280 lines
8.1 KiB
Python
import os, sys, json
|
|
from scipy.io import wavfile
|
|
from scipy.signal import stft
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
|
|
def is_nanV( xV ):
|
|
|
|
for i in range(xV.shape[0]):
|
|
if np.isnan( xV[i] ):
|
|
return True
|
|
|
|
return False
|
|
|
|
def calc_harm_bins( srate, binHz, midiPitch, harmN ):
|
|
|
|
semi_tone = 1.0/12
|
|
quarter_tone = 1.0/24
|
|
eigth_tone = 1.0/48
|
|
band_width_st = 3.0/48 # 3/8 tone
|
|
|
|
fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
|
|
fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
|
|
fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
|
|
fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
|
|
|
|
for i in range(len(fund_m_binL)):
|
|
if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
|
|
fund_l_binL[i] = fund_m_binL[i] - 1
|
|
|
|
if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
|
|
fund_u_binL[i] = fund_m_binL[i] + 1
|
|
|
|
return fund_l_binL, fund_m_binL, fund_u_binL
|
|
|
|
|
|
|
|
def audio_rms( srate, xV, rmsWndMs, hopMs ):
|
|
|
|
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
|
|
hopSmpN = int(round( hopMs * srate / 1000.0))
|
|
|
|
xN = xV.shape[0]
|
|
yN = int(((xN - wndSmpN) / hopSmpN) + 1)
|
|
assert( yN > 0)
|
|
yV = np.zeros( (yN, ) )
|
|
|
|
assert( wndSmpN > 1 )
|
|
|
|
i = 0
|
|
j = 0
|
|
while i < xN and j < yN:
|
|
|
|
if i == 0:
|
|
yV[j] = np.sqrt(xV[0]*xV[0])
|
|
elif i < wndSmpN:
|
|
yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
|
|
else:
|
|
yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
|
|
|
|
i += hopSmpN
|
|
j += 1
|
|
|
|
return yV, srate / hopSmpN
|
|
|
|
def audio_db_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs ):
|
|
|
|
rmsV, rms_srate = audio_rms( srate, xV, rmsWndMs, hopMs )
|
|
dbWndN = int(round(dbRefWndMs * rms_srate / 1000.0))
|
|
dbRef = ref = np.mean(rmsV[0:dbWndN])
|
|
return 20.0 * np.log10( rmsV / dbRef ), rms_srate
|
|
|
|
|
|
|
|
def audio_stft_rms( srate, xV, rmsWndMs, hopMs, spectrumIdx ):
|
|
|
|
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
|
|
hopSmpN = int(round( hopMs * srate / 1000.0))
|
|
binHz = srate / wndSmpN
|
|
|
|
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
|
|
|
|
specHopIdx = int(round( spectrumIdx ))
|
|
specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
|
|
|
|
mV = np.zeros((xM.shape[1]))
|
|
|
|
for i in range(xM.shape[1]):
|
|
mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
|
|
|
|
return mV, srate / hopSmpN, specV, specHopIdx, binHz
|
|
|
|
def audio_stft_db_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, spectrumIdx ):
|
|
rmsV, rms_srate, specV, specHopIdx, binHz = audio_stft_rms( srate, xV, rmsWndMs, hopMs, spectrumIdx )
|
|
|
|
dbWndN = int(round(dbRefWndMs * rms_srate / 1000.0))
|
|
dbRef = ref = np.mean(rmsV[0:dbWndN])
|
|
rmsDbV = 20.0 * np.log10( rmsV / dbRef )
|
|
|
|
return rmsDbV, rms_srate, specV, specHopIdx, binHz
|
|
|
|
def audio_harm_rms( srate, xV, rmsWndMs, hopMs, midiPitch, harmCandN, harmN ):
|
|
|
|
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
|
|
hopSmpN = int(round( hopMs * srate / 1000.0))
|
|
|
|
binHz = srate / wndSmpN
|
|
|
|
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
|
|
|
|
harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
|
|
|
|
rmsV = np.zeros((xM.shape[1],))
|
|
|
|
|
|
for i in range(xM.shape[1]):
|
|
mV = np.sqrt(np.abs(xM[:,i]))
|
|
|
|
pV = np.zeros((len(harmLBinL,)))
|
|
|
|
for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
|
|
pV[j] = np.max(mV[b0i:b1i])
|
|
|
|
rmsV[i] = np.mean( sorted(pV)[-harmN:] )
|
|
|
|
|
|
|
|
|
|
return rmsV, srate / hopSmpN, binHz
|
|
|
|
|
|
|
|
def audio_harm_db_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ):
|
|
|
|
rmsV, rms_srate, binHz = audio_harm_rms( srate, xV, rmsWndMs, hopMs, midiPitch, harmCandN, harmN )
|
|
|
|
dbWndN = int(round(dbRefWndMs * rms_srate / 1000.0))
|
|
dbRef = ref = np.mean(rmsV[0:dbWndN])
|
|
rmsDbV = 20.0 * np.log10( rmsV / dbRef )
|
|
|
|
return rmsDbV, rms_srate, binHz
|
|
|
|
|
|
|
|
def locate_peak_indexes( xV, xV_srate, eventMsL ):
|
|
|
|
pkIdxL = []
|
|
for begMs, endMs in eventMsL:
|
|
|
|
begSmpIdx = int(begMs * xV_srate / 1000.0)
|
|
endSmpIdx = int(endMs * xV_srate / 1000.0)
|
|
|
|
pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
|
|
|
|
return pkIdxL
|
|
|
|
|
|
def plot_spectrum( ax, srate, binHz, specV, midiPitch, harmN ):
|
|
|
|
binN = specV.shape[0]
|
|
harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmN )
|
|
|
|
fundHz = harmMBinL[0] * binHz
|
|
maxPlotHz = fundHz * (harmN+1)
|
|
maxPlotBinN = int(round(maxPlotHz/binHz))
|
|
|
|
hzV = np.arange(binN) * (srate/(binN*2))
|
|
|
|
specV = 20.0 * np.log10(specV)
|
|
|
|
ax.plot(hzV[0:maxPlotBinN], specV[0:maxPlotBinN] )
|
|
|
|
for h0,h1,h2 in zip(harmLBinL,harmMBinL,harmUBinL):
|
|
ax.axvline( x=h0 * binHz, color="blue")
|
|
ax.axvline( x=h1 * binHz, color="black")
|
|
ax.axvline( x=h2 * binHz, color="blue")
|
|
|
|
ax.set_ylabel(str(midiPitch))
|
|
|
|
def plot_spectral_ranges( inDir, pitchL, rmsWndMs=300, rmsHopMs=30, harmN=5, dbRefWndMs=500 ):
|
|
|
|
plotN = len(pitchL)
|
|
fig,axL = plt.subplots(plotN,1)
|
|
|
|
for plot_idx,midiPitch in enumerate(pitchL):
|
|
|
|
# get the audio and meta-data file names
|
|
seqFn = os.path.join( inDir, str(midiPitch), "seq.json")
|
|
audioFn = os.path.join( inDir, str(midiPitch), "audio.wav")
|
|
|
|
# read the meta data object
|
|
with open( seqFn, "rb") as f:
|
|
r = json.load(f)
|
|
|
|
# read the audio file
|
|
srate, signalM = wavfile.read(audioFn)
|
|
sigV = signalM / float(0x7fff)
|
|
|
|
# calc. the RMS envelope in the time domain
|
|
rms0DbV, rms0_srate = audio_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
|
|
|
|
# locate the sample index of the peak of each note attack
|
|
pkIdx0L = locate_peak_indexes( rms0DbV, rms0_srate, r['eventTimeL'] )
|
|
|
|
# select the 7th to last note for spectrum measurement
|
|
|
|
#
|
|
# TODO: come up with a better way to select the note to measure
|
|
#
|
|
spectrumSmpIdx = pkIdx0L[ len(pkIdx0L) - 7 ]
|
|
|
|
|
|
# calc. the RMS envelope by taking the max spectral peak in each STFT window
|
|
rmsDbV, rms_srate, specV, specHopIdx, binHz = audio_stft_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, spectrumSmpIdx)
|
|
|
|
# specV[] is the spectrum of the note at spectrumSmpIdx
|
|
|
|
# plot the spectrum and the harmonic selection ranges
|
|
plot_spectrum( axL[plot_idx], srate, binHz, specV, midiPitch, harmN )
|
|
plt.show()
|
|
|
|
|
|
|
|
def do_td_plot( inDir ):
|
|
|
|
rmsWndMs = 300
|
|
rmsHopMs = 30
|
|
dbRefWndMs = 500
|
|
harmCandN = 5
|
|
harmN = 3
|
|
|
|
seqFn = os.path.join( inDir, "seq.json")
|
|
audioFn = os.path.join( inDir, "audio.wav")
|
|
midiPitch = int(inDir.split("/")[-1])
|
|
|
|
|
|
with open( seqFn, "rb") as f:
|
|
r = json.load(f)
|
|
|
|
|
|
srate, signalM = wavfile.read(audioFn)
|
|
sigV = signalM / float(0x7fff)
|
|
|
|
rms0DbV, rms0_srate = audio_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
|
|
|
|
rmsDbV, rms_srate, binHz = audio_harm_db_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midiPitch, harmCandN, harmN )
|
|
|
|
pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
|
|
|
|
fig,ax = plt.subplots()
|
|
fig.set_size_inches(18.5, 10.5, forward=True)
|
|
|
|
secV = np.arange(0,len(rmsDbV)) / rms_srate
|
|
|
|
ax.plot( secV, rmsDbV )
|
|
ax.plot( np.arange(0,len(rms0DbV)) / rms0_srate, rms0DbV, color="black" )
|
|
|
|
for begMs, endMs in r['eventTimeL']:
|
|
ax.axvline( x=begMs/1000.0, color="green")
|
|
ax.axvline( x=endMs/1000.0, color="red")
|
|
|
|
|
|
for i,pki in enumerate(pkIdxL):
|
|
ax.plot( [pki / rms_srate], [ rmsDbV[pki] ], marker='.', color="black")
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
inDir = sys.argv[1]
|
|
|
|
do_td_plot(inDir)
|
|
|
|
#plot_spectral_ranges( inDir, [ 24, 36, 48, 60, 72, 84, 96, 104] )
|