piccal/rms_analysis.py

448 lines
13 KiB
Python
Raw Normal View History

import os,types,json,pickle,types
2019-09-01 14:54:09 +00:00
from scipy.io import wavfile
from scipy.signal import stft
import numpy as np
from common import parse_yaml_cfg
2019-09-01 14:54:09 +00:00
def calc_harm_bins( srate, binHz, midiPitch, harmN ):
semi_tone = 1.0/12
quarter_tone = 1.0/24
eigth_tone = 1.0/48
band_width_st = 3.0/48 # 3/8 tone
fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
for i in range(len(fund_m_binL)):
if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
fund_l_binL[i] = fund_m_binL[i] - 1
if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
fund_u_binL[i] = fund_m_binL[i] + 1
return fund_l_binL, fund_m_binL, fund_u_binL
def rms_to_db( xV, rms_srate, refWndMs ):
#dbWndN = int(round(refWndMs * rms_srate / 1000.0))
#dbRef = ref = np.mean(xV[0:dbWndN])
dbRef = refWndMs ######################################################### HACK HACK HACK HACK HACK
2019-09-01 14:54:09 +00:00
rmsDbV = 20.0 * np.log10( xV / dbRef )
return rmsDbV
def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
xN = xV.shape[0]
yN = int(((xN - wndSmpN) / hopSmpN) + 1)
assert( yN > 0)
yV = np.zeros( (yN, ) )
assert( wndSmpN > 1 )
i = 0
j = 0
while i < xN and j < yN:
if i == 0:
yV[j] = np.sqrt(xV[0]*xV[0])
elif i < wndSmpN:
yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
else:
yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
i += hopSmpN
j += 1
rms_srate = srate / hopSmpN
return rms_to_db( yV, rms_srate, refWndMs ), rms_srate
def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
specHopIdx = int(round( spectrumIdx ))
specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
mV = np.zeros((xM.shape[1]))
for i in range(xM.shape[1]):
mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
rms_srate = srate / hopSmpN
mV = rms_to_db( mV, rms_srate, refWndMs )
return mV, rms_srate, specV, specHopIdx, binHz
def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
#print( "STFT:", rmsWndMs, hopMs, wndSmpN, hopSmpN, wndSmpN-hopSmpN )
2019-09-01 14:54:09 +00:00
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
rmsV = np.zeros((xM.shape[1],))
for i in range(xM.shape[1]):
mV = np.sqrt(np.abs(xM[:,i]))
pV = np.zeros((len(harmLBinL,)))
for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
pV[j] = np.max(mV[b0i:b1i])
rmsV[i] = np.mean( sorted(pV)[-harmN:] )
rms_srate = srate / hopSmpN
rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs )
return rmsV, rms_srate, binHz
def measure_duration_ms( rmsV, rms_srate, peak_idx, end_idx, decay_pct ):
"""
Calcuate the time it takes for a note to decay from the peak at
rmsV[peak_idx] dB to 'decay_pct' percent of the peak value.
"""
pkRmsDb = rmsV[ peak_idx ]
# calc the note turn-off (offset) db as a percentage of the peak amplitude
offsetRmsDb = pkRmsDb * decay_pct / 100.0
# calc the sample index where the note is off
offset_idx = peak_idx + np.argmin( np.abs(rmsV[peak_idx:end_idx] - offsetRmsDb) )
# calc the duration of the note
dur_ms = int(round((offset_idx - peak_idx) * 1000.0 / rms_srate))
#print(pkRmsDb, offsetRmsDb, peak_idx, offset_idx, end_idx, dur_ms, rms_srate)
return dur_ms
def select_first_stable_note_by_dur( durMsL, minDurMs=800 ):
first_stable_idx = None
for i,durMs in enumerate(durMsL):
if durMs > minDurMs and first_stable_idx is None:
first_stable_idx = i
else:
if durMs < minDurMs:
first_stable_idx = None
return first_stable_idx
def select_first_stable_note_by_delta_db_1( pkDbL, pkUsL, maxPulseUs=0.1 ):
wndN = 5
aL = []
dV = np.diff(pkDbL) / pkDbL[1:]
for ei in range(wndN,len(pkDbL)):
xV = dV[ei-wndN:ei]
avg = np.mean(np.abs(xV))
aL.append(avg)
k = np.argmin(np.abs(np.array(pkUsL) - maxPulseUs))
print(aL)
print(k)
for i in range(k,0,-1):
if aL[i] > maxDeltaDb:
return i + 1
return None
def select_first_stable_note_by_delta_db( pkDbL, pkUsL=None, maxPulseUs=0.1 ):
wndN = 5
dV = np.diff(pkDbL) / pkDbL[1:]
for ei in range(wndN,len(pkDbL)):
xV = dV[ei-wndN:ei]
avg = np.mean(np.abs(xV))
if avg < .1:
return (ei-wndN)+1
return None
def note_stats( r, decay_pct=50.0, extraDurSearchMs=500 ):
statsL = []
srate = r.rms_srate
qmax = 0
2019-09-01 14:54:09 +00:00
for i,(begSmpMs, endSmpMs) in enumerate(r.eventTimeL):
begSmpIdx = int(round(srate * begSmpMs / 1000.0))
endSmpIdx = int(round(srate * (endSmpMs + extraDurSearchMs) / 1000.0))
pkSmpIdx = r.pkIdxL[i]
durMs = measure_duration_ms( r.rmsDbV, srate, pkSmpIdx, endSmpIdx, decay_pct )
bi = pkSmpIdx
ei = pkSmpIdx + int(round(durMs * srate / 1000.0))
qualityCoeff = np.sum(r.rmsDbV[bi:ei]) + np.sum(r.tdRmsDbV[bi:ei])
if qualityCoeff > qmax:
qmax = qualityCoeff
durAvgDb = (np.mean(r.rmsDbV[bi:ei]) + np.mean(r.tdRmsDbV[bi:ei]))/2.0
statsL.append( types.SimpleNamespace(**{'begSmpSec':begSmpIdx/srate,'endSmpSec':endSmpIdx/srate,'pkSmpSec':pkSmpIdx/srate,'durMs':durMs, 'pkDb':r.pkDbL[i], 'pulse_us':r.pkUsL[i], 'quality':qualityCoeff, 'durAvgDb':durAvgDb }))
for i,r in enumerate(statsL):
statsL[i].quality /= qmax
return statsL
2019-09-01 14:54:09 +00:00
def locate_peak_indexes( xV, xV_srate, eventMsL ):
pkIdxL = []
for begMs, endMs in eventMsL:
begSmpIdx = int(begMs * xV_srate / 1000.0)
endSmpIdx = int(endMs * xV_srate / 1000.0)
pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
return pkIdxL
def key_info_dictionary( keyMapL=None, yamlCfgFn=None):
2019-09-01 14:54:09 +00:00
if yamlCfgFn is not None:
cfg = parse_yaml_cfg(yamlCfgFn)
2019-09-01 14:54:09 +00:00
keyMapL = cfg.key_mapL
2019-09-01 14:54:09 +00:00
kmD = {}
for d in keyMapL:
kmD[ d['midi'] ] = types.SimpleNamespace(**d)
return kmD
def rms_analyze_one_rt_note( sigV, srate, begMs, endMs, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3, durDecayPct=40 ):
sigV = np.squeeze(sigV)
# HACK HACK HACK HACK
dbRefWndMs = 0.002 # HACK HACK HACK HACK
# HACK HACK HACK HACK
td_rmsDbV, td_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
begSmpIdx = int(round(begMs * td_srate/1000))
endSmpIdx = int(round(endMs * td_srate/1000))
td_pk_idx = begSmpIdx + np.argmax(td_rmsDbV[begSmpIdx:endSmpIdx])
td_durMs = measure_duration_ms( td_rmsDbV, td_srate, td_pk_idx, len(sigV)-1, durDecayPct )
# HACK HACK HACK HACK
dbRefWndMs = 0.01 # HACK HACK HACK HACK
# HACK HACK HACK HACK
hm_rmsDbV, hm_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midi_pitch, harmCandN, harmN )
begSmpIdx = int(round(begMs * hm_srate/1000))
endSmpIdx = int(round(endMs * hm_srate/1000))
hm_pk_idx = begSmpIdx + np.argmax(hm_rmsDbV[begSmpIdx:endSmpIdx])
hm_durMs = measure_duration_ms( hm_rmsDbV, hm_srate, hm_pk_idx, len(sigV)-1, durDecayPct )
tdD = { "rmsDbV":td_rmsDbV.tolist(), "srate":td_srate, "pk_idx":int(td_pk_idx), "db":float(td_rmsDbV[td_pk_idx]), "durMs":td_durMs }
hmD = { "rmsDbV":hm_rmsDbV.tolist(), "srate":hm_srate, "pk_idx":int(hm_pk_idx), "db":float(hm_rmsDbV[hm_pk_idx]), "durMs":hm_durMs }
return { "td":tdD, "hm":hmD }
def calibrate_rms( sigV, srate, beg_ms, end_ms ):
bi = int(round(beg_ms * srate / 1000))
ei = int(round(end_ms * srate / 1000))
rms = np.sqrt( np.mean( sigV[bi:ei] * sigV[bi:ei] ))
return 20.0*np.log10( rms / 0.002 )
def calibrate_recording_analysis( inDir ):
jsonFn = os.path.join(inDir, "meas.json" )
audioFn = os.path.join(inDir, "audio.wav" )
with open(jsonFn,"r") as f:
r = json.load(f)
measD = r['measD']
cfg = types.SimpleNamespace(**r['cfg'])
annL = r['annoteL']
anlD = {}
n = 0
for midi_pitch,measL in measD.items():
n += len(measL)
anlD[int(midi_pitch)] = []
srate, signalM = wavfile.read(audioFn)
sigV = signalM / float(0x7fff)
anlr = types.SimpleNamespace(**cfg.analysisD)
# HACK HACK HACK HACK
dbRefWndMs = 0.002 # HACK HACK HACK HACK
# HACK HACK HACK HACK
tdRmsDbV, td_srate = audio_rms( srate, sigV, anlr.rmsWndMs, anlr.rmsHopMs, dbRefWndMs )
# for each measured pitch
for midi_pitch,measL in measD.items():
# for each measured note at this pitch
for mi,d in enumerate(measL):
mr = types.SimpleNamespace(**d)
# locate the associated annotation reocrd
for annD in annL:
ar = types.SimpleNamespace(**annD)
if ar.midi_pitch == mr.midi_pitch and ar.beg_ms==mr.beg_ms and ar.end_ms==mr.end_ms:
assert( ar.pulse_us == mr.pulse_us )
bi = int(round(ar.beg_ms * td_srate / 1000))
ei = int(round(ar.end_ms * td_srate / 1000))
db = np.mean(tdRmsDbV[bi:ei])
db = calibrate_rms( sigV, srate, ar.beg_ms, ar.end_ms )
anlD[int(midi_pitch)].append({ 'pulse_us':ar.pulse_us, 'db':db, 'meas_idx':mi })
break
return anlD
def rms_analysis_main( inDir, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3, durDecayPct=40 ):
2019-09-01 14:54:09 +00:00
seqFn = os.path.join( inDir, "seq.json")
audioFn = os.path.join( inDir, "audio.wav")
with open( seqFn, "rb") as f:
r = json.load(f)
srate, signalM = wavfile.read(audioFn)
sigV = signalM / float(0x7fff)
tdRmsDbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
tdPkIdxL = locate_peak_indexes( tdRmsDbV, rms0_srate, r['eventTimeL'])
2019-09-01 14:54:09 +00:00
rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midi_pitch, harmCandN, harmN )
pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
holdDutyPctL = None
if 'holdDutyPct' in r:
holdDutyPctL = [ (0, r['holdDutyPct']) ]
else:
holdDutyPctL = r['holdDutyPctL']
2019-09-01 14:54:09 +00:00
r = types.SimpleNamespace(**{
"audio_srate":srate,
"tdRmsDbV": tdRmsDbV,
"tdPkIdxL": tdPkIdxL,
"tdPkDbL": [ tdRmsDbV[i] for i in tdPkIdxL ],
2019-09-01 14:54:09 +00:00
"binHz": binHz,
"rmsDbV":rmsDbV,
"rms_srate":rms_srate,
"pkIdxL":pkIdxL, # pkIdxL[ len(pulsUsL) ] - indexes into rmsDbV[] of peaks
#"min_pk_idx":min_pk_idx,
#"max_pk_idx":max_pk_idx,
"eventTimeL":r['eventTimeL'],
"holdDutyPctL":holdDutyPctL,
2019-09-01 14:54:09 +00:00
'pkDbL': [ rmsDbV[ i ] for i in pkIdxL ],
'pkUsL':r['pulseUsL'] })
statsL = note_stats(r,durDecayPct)
setattr(r,"statsL", statsL )
2019-09-01 14:54:09 +00:00
return r
def rms_analysis_main_all( inDir, cacheFn, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3, durDecayPct=40 ):
if os.path.isfile(cacheFn):
print("READING analysis cache file: %s" % (cacheFn))
with open(cacheFn,"rb") as f:
rD = pickle.load(f)
return rD
folderL = os.listdir(inDir)
rD = {}
for folder in folderL:
pathL = folder.split(os.sep)
midi_pitch = int(pathL[-1])
print(midi_pitch)
path = os.path.join(inDir,folder,'0')
if os.path.isdir(path) and os.path.isfile(os.path.join(os.path.join(path,"seq.json"))):
r = rms_analysis_main( path, midi_pitch, rmsWndMs=rmsWndMs, rmsHopMs=rmsHopMs, dbRefWndMs=dbRefWndMs, harmCandN=harmCandN, harmN=harmN, durDecayPct=durDecayPct )
rD[ midi_pitch ] = r
with open(cacheFn,"wb") as f:
pickle.dump(rD,f)
return rD