import os,types,json,pickle from scipy.io import wavfile from scipy.signal import stft import numpy as np from common import parse_yaml_cfg def calc_harm_bins( srate, binHz, midiPitch, harmN ): semi_tone = 1.0/12 quarter_tone = 1.0/24 eigth_tone = 1.0/48 band_width_st = 3.0/48 # 3/8 tone fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12)) fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)] fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)] fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)] for i in range(len(fund_m_binL)): if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0: fund_l_binL[i] = fund_m_binL[i] - 1 if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1: fund_u_binL[i] = fund_m_binL[i] + 1 return fund_l_binL, fund_m_binL, fund_u_binL def rms_to_db( xV, rms_srate, refWndMs ): dbWndN = int(round(refWndMs * rms_srate / 1000.0)) dbRef = ref = np.mean(xV[0:dbWndN]) rmsDbV = 20.0 * np.log10( xV / dbRef ) return rmsDbV def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs ): wndSmpN = int(round( rmsWndMs * srate / 1000.0)) hopSmpN = int(round( hopMs * srate / 1000.0)) xN = xV.shape[0] yN = int(((xN - wndSmpN) / hopSmpN) + 1) assert( yN > 0) yV = np.zeros( (yN, ) ) assert( wndSmpN > 1 ) i = 0 j = 0 while i < xN and j < yN: if i == 0: yV[j] = np.sqrt(xV[0]*xV[0]) elif i < wndSmpN: yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) ) else: yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) ) i += hopSmpN j += 1 rms_srate = srate / hopSmpN return rms_to_db( yV, rms_srate, refWndMs ), rms_srate def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ): wndSmpN = int(round( rmsWndMs * srate / 1000.0)) hopSmpN = int(round( hopMs * srate / 1000.0)) binHz = srate / wndSmpN f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True ) specHopIdx = int(round( spectrumIdx )) specV = np.sqrt(np.abs(xM[:, specHopIdx ])) mV = np.zeros((xM.shape[1])) for i in range(xM.shape[1]): mV[i] = np.max(np.sqrt(np.abs(xM[:,i]))) rms_srate = srate / hopSmpN mV = rms_to_db( mV, rms_srate, refWndMs ) return mV, rms_srate, specV, specHopIdx, binHz def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ): wndSmpN = int(round( rmsWndMs * srate / 1000.0)) hopSmpN = int(round( hopMs * srate / 1000.0)) binHz = srate / wndSmpN #print( "STFT:", rmsWndMs, hopMs, wndSmpN, hopSmpN, wndSmpN-hopSmpN ) f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True ) harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN ) rmsV = np.zeros((xM.shape[1],)) for i in range(xM.shape[1]): mV = np.sqrt(np.abs(xM[:,i])) pV = np.zeros((len(harmLBinL,))) for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )): pV[j] = np.max(mV[b0i:b1i]) rmsV[i] = np.mean( sorted(pV)[-harmN:] ) rms_srate = srate / hopSmpN rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs ) return rmsV, rms_srate, binHz def measure_duration_ms( rmsV, rms_srate, peak_idx, end_idx, decay_pct ): """ Calcuate the time it takes for a note to decay from the peak at rmsV[peak_idx] dB to 'decay_pct' percent of the peak value. """ pkRmsDb = rmsV[ peak_idx ] # calc the note turn-off (offset) db as a percentage of the peak amplitude offsetRmsDb = pkRmsDb * decay_pct / 100.0 # calc the sample index where the note is off offset_idx = peak_idx + np.argmin( np.abs(rmsV[peak_idx:end_idx] - offsetRmsDb) ) # calc the duration of the note dur_ms = int(round((offset_idx - peak_idx) * 1000.0 / rms_srate)) #print(pkRmsDb, offsetRmsDb, peak_idx, offset_idx, end_idx, dur_ms, rms_srate) return dur_ms def select_first_stable_note_by_dur( durMsL, minDurMs=800 ): first_stable_idx = None for i,durMs in enumerate(durMsL): if durMs > minDurMs and first_stable_idx is None: first_stable_idx = i else: if durMs < minDurMs: first_stable_idx = None return first_stable_idx def select_first_stable_note_by_delta_db_1( pkDbL, pkUsL, maxPulseUs=0.1 ): wndN = 5 aL = [] dV = np.diff(pkDbL) / pkDbL[1:] for ei in range(wndN,len(pkDbL)): xV = dV[ei-wndN:ei] avg = np.mean(np.abs(xV)) aL.append(avg) k = np.argmin(np.abs(np.array(pkUsL) - maxPulseUs)) print(aL) print(k) for i in range(k,0,-1): if aL[i] > maxDeltaDb: return i + 1 return None def select_first_stable_note_by_delta_db( pkDbL, pkUsL=None, maxPulseUs=0.1 ): wndN = 5 dV = np.diff(pkDbL) / pkDbL[1:] for ei in range(wndN,len(pkDbL)): xV = dV[ei-wndN:ei] avg = np.mean(np.abs(xV)) if avg < .1: return (ei-wndN)+1 return None def note_stats( r, decay_pct=50.0, extraDurSearchMs=500 ): statsL = [] srate = r.rms_srate qmax = 0 for i,(begSmpMs, endSmpMs) in enumerate(r.eventTimeL): begSmpIdx = int(round(srate * begSmpMs / 1000.0)) endSmpIdx = int(round(srate * (endSmpMs + extraDurSearchMs) / 1000.0)) pkSmpIdx = r.pkIdxL[i] durMs = measure_duration_ms( r.rmsDbV, srate, pkSmpIdx, endSmpIdx, decay_pct ) bi = pkSmpIdx ei = pkSmpIdx + int(round(durMs * srate / 1000.0)) #bi = begSmpIdx #ei = endSmpIdx qualityCoeff = np.sum(r.rmsDbV[bi:ei]) + np.sum(r.tdRmsDbV[bi:ei]) if qualityCoeff > qmax: qmax = qualityCoeff durAvgDb = (np.mean(r.rmsDbV[bi:ei]) + np.mean(r.tdRmsDbV[bi:ei]))/2.0 statsL.append( types.SimpleNamespace(**{'begSmpSec':begSmpIdx/srate,'endSmpSec':endSmpIdx/srate,'pkSmpSec':pkSmpIdx/srate,'durMs':durMs, 'pkDb':r.pkDbL[i], 'pulse_us':r.pkUsL[i], 'quality':qualityCoeff, 'durAvgDb':durAvgDb })) for i,r in enumerate(statsL): statsL[i].quality /= qmax return statsL def locate_peak_indexes( xV, xV_srate, eventMsL ): pkIdxL = [] for begMs, endMs in eventMsL: begSmpIdx = int(begMs * xV_srate / 1000.0) endSmpIdx = int(endMs * xV_srate / 1000.0) pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) ) return pkIdxL def key_info_dictionary( keyMapL=None, yamlCfgFn=None): if yamlCfgFn is not None: cfg = parse_yaml_cfg(yamlCfgFn) keyMapL = cfg.key_mapL kmD = {} for d in keyMapL: kmD[ d['midi'] ] = types.SimpleNamespace(**d) return kmD def rms_analysis_main( inDir, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3, durDecayPct=40 ): seqFn = os.path.join( inDir, "seq.json") audioFn = os.path.join( inDir, "audio.wav") with open( seqFn, "rb") as f: r = json.load(f) srate, signalM = wavfile.read(audioFn) sigV = signalM / float(0x7fff) tdRmsDbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs ) tdPkIdxL = locate_peak_indexes( tdRmsDbV, rms0_srate, r['eventTimeL']) rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midi_pitch, harmCandN, harmN ) pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] ) holdDutyPctL = None if 'holdDutyPct' in r: holdDutyPctL = [ (0, r['holdDutyPct']) ] else: holdDutyPctL = r['holdDutyPctL'] r = types.SimpleNamespace(**{ "audio_srate":srate, "tdRmsDbV": tdRmsDbV, "tdPkIdxL": tdPkIdxL, "tdPkDbL": [ tdRmsDbV[i] for i in tdPkIdxL ], "binHz": binHz, "rmsDbV":rmsDbV, "rms_srate":rms_srate, "pkIdxL":pkIdxL, # pkIdxL[ len(pulsUsL) ] - indexes into rmsDbV[] of peaks #"min_pk_idx":min_pk_idx, #"max_pk_idx":max_pk_idx, "eventTimeL":r['eventTimeL'], "holdDutyPctL":holdDutyPctL, 'pkDbL': [ rmsDbV[ i ] for i in pkIdxL ], 'pkUsL':r['pulseUsL'] }) statsL = note_stats(r,durDecayPct) setattr(r,"statsL", statsL ) return r def rms_analysis_main_all( inDir, cacheFn, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3, durDecayPct=40 ): if os.path.isfile(cacheFn): print("READING analysis cache file: %s" % (cacheFn)) with open(cacheFn,"rb") as f: rD = pickle.load(f) return rD folderL = os.listdir(inDir) rD = {} for folder in folderL: pathL = folder.split(os.sep) midi_pitch = int(pathL[-1]) print(midi_pitch) path = os.path.join(inDir,folder,'0') if os.path.isdir(path) and os.path.isfile(os.path.join(os.path.join(path,"seq.json"))): r = rms_analysis_main( path, midi_pitch, rmsWndMs=rmsWndMs, rmsHopMs=rmsHopMs, dbRefWndMs=dbRefWndMs, harmCandN=harmCandN, harmN=harmN, durDecayPct=durDecayPct ) rD[ midi_pitch ] = r with open(cacheFn,"wb") as f: pickle.dump(rD,f) return rD