picadae calibration programs
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rms_analysis.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. ##| Copyright: (C) 2019-2020 Kevin Larke <contact AT larke DOT org>
  2. ##| License: GNU GPL version 3.0 or above. See the accompanying LICENSE file.
  3. import os,types,json,pickle,types
  4. from scipy.io import wavfile
  5. from scipy.signal import stft
  6. import numpy as np
  7. from common import parse_yaml_cfg
  8. def calc_harm_bins( srate, binHz, midiPitch, harmN ):
  9. semi_tone = 1.0/12
  10. quarter_tone = 1.0/24
  11. eigth_tone = 1.0/48
  12. band_width_st = 3.0/48 # 3/8 tone
  13. fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
  14. fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
  15. fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
  16. fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
  17. for i in range(len(fund_m_binL)):
  18. if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
  19. fund_l_binL[i] = fund_m_binL[i] - 1
  20. if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
  21. fund_u_binL[i] = fund_m_binL[i] + 1
  22. return fund_l_binL, fund_m_binL, fund_u_binL
  23. def rms_to_db( xV, rms_srate, dbLinRef ):
  24. #dbWndN = int(round(refWndMs * rms_srate / 1000.0))
  25. #dbRef = ref = np.mean(xV[0:dbWndN])
  26. #print("DB REF:",dbLinRef, min(xV), np.argmin(xV))
  27. rmsDbV = 20.0 * np.log10( (xV+np.nextafter(0,1)) / dbLinRef )
  28. return rmsDbV
  29. def audio_rms( srate, xV, rmsWndMs, hopMs, dbLinRef ):
  30. wndSmpN = int(round( rmsWndMs * srate / 1000.0))
  31. hopSmpN = int(round( hopMs * srate / 1000.0))
  32. xN = xV.shape[0]
  33. yN = int(((xN - wndSmpN) / hopSmpN) + 1)
  34. assert( yN > 0)
  35. yV = np.zeros( (yN, ) )
  36. assert( wndSmpN > 1 )
  37. i = 0
  38. j = 0
  39. while i < xN and j < yN:
  40. if i == 0:
  41. yV[j] = np.sqrt(xV[0]*xV[0])
  42. elif i < wndSmpN:
  43. yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
  44. else:
  45. yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
  46. i += hopSmpN
  47. j += 1
  48. rms_srate = srate / hopSmpN
  49. return rms_to_db( yV[0:j], rms_srate, dbLinRef ), rms_srate
  50. def audio_stft_rms( srate, xV, rmsWndMs, hopMs, dbLinRef, spectrumIdx ):
  51. wndSmpN = int(round( rmsWndMs * srate / 1000.0))
  52. hopSmpN = int(round( hopMs * srate / 1000.0))
  53. binHz = srate / wndSmpN
  54. f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
  55. specHopIdx = int(round( spectrumIdx ))
  56. specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
  57. mV = np.zeros((xM.shape[1]))
  58. for i in range(xM.shape[1]):
  59. mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
  60. rms_srate = srate / hopSmpN
  61. mV = rms_to_db( mV, rms_srate, dbLinRef )
  62. return mV, rms_srate, specV, specHopIdx, binHz
  63. def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbLinRef, midiPitch, harmCandN, harmN ):
  64. wndSmpN = int(round( rmsWndMs * srate / 1000.0))
  65. hopSmpN = int(round( hopMs * srate / 1000.0))
  66. binHz = srate / wndSmpN
  67. #print( "STFT:", rmsWndMs, hopMs, wndSmpN, hopSmpN, wndSmpN-hopSmpN )
  68. f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
  69. harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
  70. rmsV = np.zeros((xM.shape[1],))
  71. for i in range(xM.shape[1]):
  72. mV = np.sqrt(np.abs(xM[:,i]))
  73. pV = np.zeros((len(harmLBinL,)))
  74. for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
  75. pV[j] = np.max(mV[b0i:b1i])
  76. rmsV[i] = np.mean( sorted(pV)[-harmN:] )
  77. rms_srate = srate / hopSmpN
  78. rmsV = rms_to_db( rmsV, rms_srate, dbLinRef )
  79. return rmsV, rms_srate, binHz
  80. def measure_duration_ms( rmsV, rms_srate, peak_idx, end_idx, decay_pct ):
  81. """
  82. Calcuate the time it takes for a note to decay from the peak at
  83. rmsV[peak_idx] dB to 'decay_pct' percent of the peak value.
  84. """
  85. pkRmsDb = rmsV[ peak_idx ]
  86. # calc the note turn-off (offset) db as a percentage of the peak amplitude
  87. offsetRmsDb = pkRmsDb * decay_pct / 100.0
  88. # calc the sample index where the note is off
  89. offset_idx = peak_idx + np.argmin( np.abs(rmsV[peak_idx:end_idx] - offsetRmsDb) )
  90. # calc the duration of the note
  91. dur_ms = int(round((offset_idx - peak_idx) * 1000.0 / rms_srate))
  92. #print(pkRmsDb, offsetRmsDb, peak_idx, offset_idx, end_idx, dur_ms, rms_srate)
  93. return dur_ms
  94. def select_first_stable_note_by_dur( durMsL, minDurMs=800 ):
  95. first_stable_idx = None
  96. for i,durMs in enumerate(durMsL):
  97. if durMs > minDurMs and first_stable_idx is None:
  98. first_stable_idx = i
  99. else:
  100. if durMs < minDurMs:
  101. first_stable_idx = None
  102. return first_stable_idx
  103. def select_first_stable_note_by_delta_db_1( pkDbL, pkUsL, maxPulseUs=0.1 ):
  104. wndN = 5
  105. aL = []
  106. dV = np.diff(pkDbL) / pkDbL[1:]
  107. for ei in range(wndN,len(pkDbL)):
  108. xV = dV[ei-wndN:ei]
  109. avg = np.mean(np.abs(xV))
  110. aL.append(avg)
  111. k = np.argmin(np.abs(np.array(pkUsL) - maxPulseUs))
  112. print(aL)
  113. print(k)
  114. for i in range(k,0,-1):
  115. if aL[i] > maxDeltaDb:
  116. return i + 1
  117. return None
  118. def select_first_stable_note_by_delta_db( pkDbL, pkUsL=None, maxPulseUs=0.1 ):
  119. wndN = 5
  120. dV = np.diff(pkDbL) / pkDbL[1:]
  121. for ei in range(wndN,len(pkDbL)):
  122. xV = dV[ei-wndN:ei]
  123. avg = np.mean(np.abs(xV))
  124. if avg < .1:
  125. return (ei-wndN)+1
  126. return None
  127. def note_stats( r, decay_pct=50.0, extraDurSearchMs=500 ):
  128. statsL = []
  129. srate = r.rms_srate
  130. qmax = 0
  131. for i,(begSmpMs, endSmpMs) in enumerate(r.eventTimeL):
  132. begSmpIdx = int(round(srate * begSmpMs / 1000.0))
  133. endSmpIdx = int(round(srate * (endSmpMs + extraDurSearchMs) / 1000.0))
  134. pkSmpIdx = r.pkIdxL[i]
  135. durMs = measure_duration_ms( r.rmsDbV, srate, pkSmpIdx, endSmpIdx, decay_pct )
  136. bi = pkSmpIdx
  137. ei = pkSmpIdx + int(round(durMs * srate / 1000.0))
  138. qualityCoeff = np.sum(r.rmsDbV[bi:ei]) + np.sum(r.tdRmsDbV[bi:ei])
  139. if qualityCoeff > qmax:
  140. qmax = qualityCoeff
  141. if ei-bi == 0:
  142. tdRmsDb_v = 0.0 if bi >= len(r.tdRmsDbV) else np.mean(r.tdRmsDbV[bi])
  143. hmRmsDb_v = 0.0 if bi >= len(r.rmsDbV) else np.mean(r.rmsDbV[bi])
  144. durAvgDb = (hmRmsDb_v + tdRmsDb_v)/2.0
  145. else:
  146. tdRmsDb_u = 0.0 if ei >= len(r.tdRmsDbV) else np.mean(r.tdRmsDbV[bi:ei])
  147. hmRmsDb_u = 0.0 if ei >= len(r.rmsDbV) else np.mean(r.rmsDbV[bi:ei])
  148. durAvgDb = (hmRmsDb_u + tdRmsDb_u)/2.0
  149. statsL.append( types.SimpleNamespace(**{'begSmpSec':begSmpIdx/srate,'endSmpSec':endSmpIdx/srate,'pkSmpSec':pkSmpIdx/srate,'durMs':durMs, 'pkDb':r.pkDbL[i], 'pulse_us':r.pkUsL[i], 'quality':qualityCoeff, 'durAvgDb':durAvgDb }))
  150. for i,r in enumerate(statsL):
  151. statsL[i].quality = 0 if qmax <= 0 else statsL[i].quality / qmax
  152. return statsL
  153. def locate_peak_indexes( xV, xV_srate, eventMsL ):
  154. pkIdxL = []
  155. for begMs, endMs in eventMsL:
  156. begSmpIdx = int(begMs * xV_srate / 1000.0)
  157. endSmpIdx = int(endMs * xV_srate / 1000.0)
  158. pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
  159. return pkIdxL
  160. def key_info_dictionary( keyMapL=None, yamlCfgFn=None):
  161. if yamlCfgFn is not None:
  162. cfg = parse_yaml_cfg(yamlCfgFn)
  163. keyMapL = cfg.key_mapL
  164. kmD = {}
  165. for d in keyMapL:
  166. kmD[ d['midi'] ] = types.SimpleNamespace(**d)
  167. return kmD
  168. def rms_analyze_one_rt_note( sigV, srate, begMs, endMs, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbLinRef=0.001, harmCandN=5, harmN=3, durDecayPct=40 ):
  169. sigV = np.squeeze(sigV)
  170. td_rmsDbV, td_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef )
  171. begSmpIdx = int(round(begMs * td_srate/1000))
  172. endSmpIdx = int(round(endMs * td_srate/1000))
  173. td_pk_idx = begSmpIdx + np.argmax(td_rmsDbV[begSmpIdx:endSmpIdx])
  174. td_durMs = measure_duration_ms( td_rmsDbV, td_srate, td_pk_idx, len(sigV)-1, durDecayPct )
  175. hm_rmsDbV, hm_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef, midi_pitch, harmCandN, harmN )
  176. begSmpIdx = int(round(begMs * hm_srate/1000))
  177. endSmpIdx = int(round(endMs * hm_srate/1000))
  178. hm_pk_idx = begSmpIdx + np.argmax(hm_rmsDbV[begSmpIdx:endSmpIdx])
  179. hm_durMs = measure_duration_ms( hm_rmsDbV, hm_srate, hm_pk_idx, len(sigV)-1, durDecayPct )
  180. tdD = { "rmsDbV":td_rmsDbV.tolist(), "srate":td_srate, "pk_idx":int(td_pk_idx), "db":float(td_rmsDbV[td_pk_idx]), "durMs":td_durMs }
  181. hmD = { "rmsDbV":hm_rmsDbV.tolist(), "srate":hm_srate, "pk_idx":int(hm_pk_idx), "db":float(hm_rmsDbV[hm_pk_idx]), "durMs":hm_durMs }
  182. return { "td":tdD, "hm":hmD }
  183. def rms_analyze_one_rt_note_wrap( audioDev, annBegMs, annEndMs, midi_pitch, noteOffDurMs, rmsAnalysisD ):
  184. resD = None
  185. buf_result = audioDev.linear_buffer()
  186. if buf_result:
  187. sigV = buf_result.value
  188. # get the annotated begin and end of the note as sample indexes into sigV
  189. bi = int(round(annBegMs * audioDev.srate / 1000))
  190. ei = int(round(annEndMs * audioDev.srate / 1000))
  191. # calculate half the length of the note-off duration in samples
  192. noteOffSmp_o_2 = int(round( (noteOffDurMs/2) * audioDev.srate / 1000))
  193. # widen the note analysis space noteOffSmp_o_2 samples pre/post the annotated begin/end of the note
  194. bi = max(0,bi - noteOffSmp_o_2)
  195. ei = min(ei+noteOffSmp_o_2,sigV.shape[0]-1)
  196. ar = types.SimpleNamespace(**rmsAnalysisD)
  197. # shift the annotatd begin/end of the note to be relative to index bi
  198. begMs = noteOffSmp_o_2 * 1000 / audioDev.srate
  199. endMs = begMs + (annEndMs - annBegMs)
  200. #print("MEAS:",begMs,endMs,bi,ei,sigV.shape,audioDev.is_recording_enabled(),ar)
  201. # analyze the note
  202. resD = rms_analyze_one_rt_note( sigV[bi:ei], audioDev.srate, begMs, endMs, midi_pitch, rmsWndMs=ar.rmsWndMs, rmsHopMs=ar.rmsHopMs, dbLinRef=ar.dbLinRef, harmCandN=ar.harmCandN, harmN=ar.harmN, durDecayPct=ar.durDecayPct )
  203. #print( "hm:%4.1f %4i td:%4.1f %4i" % (resD['hm']['db'], resD['hm']['durMs'], resD['td']['db'], resD['td']['durMs']))
  204. return resD
  205. def calibrate_rms( sigV, srate, beg_ms, end_ms ):
  206. bi = int(round(beg_ms * srate / 1000))
  207. ei = int(round(end_ms * srate / 1000))
  208. rms = np.sqrt( np.mean( sigV[bi:ei] * sigV[bi:ei] ))
  209. return 20.0*np.log10( rms / 0.002 )
  210. def calibrate_recording_analysis( inDir ):
  211. jsonFn = os.path.join(inDir, "meas.json" )
  212. audioFn = os.path.join(inDir, "audio.wav" )
  213. with open(jsonFn,"r") as f:
  214. r = json.load(f)
  215. measD = r['measD']
  216. cfg = types.SimpleNamespace(**r['cfg'])
  217. annL = r['annoteL']
  218. anlD = {}
  219. n = 0
  220. for midi_pitch,measL in measD.items():
  221. n += len(measL)
  222. anlD[int(midi_pitch)] = []
  223. srate, signalM = wavfile.read(audioFn)
  224. sigV = signalM / float(0x7fff)
  225. anlr = types.SimpleNamespace(**cfg.analysisD)
  226. tdRmsDbV, td_srate = audio_rms( srate, sigV, anlr.rmsWndMs, anlr.rmsHopMs, anlr.dbLinRef )
  227. # for each measured pitch
  228. for midi_pitch,measL in measD.items():
  229. # for each measured note at this pitch
  230. for mi,d in enumerate(measL):
  231. mr = types.SimpleNamespace(**d)
  232. # locate the associated annotation reocrd
  233. for annD in annL:
  234. ar = types.SimpleNamespace(**annD)
  235. if ar.midi_pitch == mr.midi_pitch and ar.beg_ms==mr.beg_ms and ar.end_ms==mr.end_ms:
  236. assert( ar.pulse_us == mr.pulse_us )
  237. bi = int(round(ar.beg_ms * td_srate / 1000))
  238. ei = int(round(ar.end_ms * td_srate / 1000))
  239. db = np.mean(tdRmsDbV[bi:ei])
  240. db = calibrate_rms( sigV, srate, ar.beg_ms, ar.end_ms )
  241. anlD[int(midi_pitch)].append({ 'pulse_us':ar.pulse_us, 'db':db, 'meas_idx':mi })
  242. break
  243. return anlD
  244. def rms_analysis_main( inDir, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbLinRef=0.001, harmCandN=5, harmN=3, durDecayPct=40 ):
  245. seqFn = os.path.join( inDir, "seq.json")
  246. audioFn = os.path.join( inDir, "audio.wav")
  247. with open( seqFn, "rb") as f:
  248. r = json.load(f)
  249. srate, signalM = wavfile.read(audioFn)
  250. sigV = signalM / float(0x7fff)
  251. tdRmsDbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef )
  252. tdPkIdxL = locate_peak_indexes( tdRmsDbV, rms0_srate, r['eventTimeL'])
  253. rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef, midi_pitch, harmCandN, harmN )
  254. pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
  255. holdDutyPctL = None
  256. if 'holdDutyPct' in r:
  257. holdDutyPctL = [ (0, r['holdDutyPct']) ]
  258. else:
  259. holdDutyPctL = r['holdDutyPctL']
  260. r = types.SimpleNamespace(**{
  261. "audio_srate":srate,
  262. "eventTimeMsL":r['eventTimeL'],
  263. "tdRmsDbV": tdRmsDbV,
  264. "tdPkIdxL": tdPkIdxL,
  265. "tdPkDbL": [ tdRmsDbV[i] for i in tdPkIdxL ],
  266. "binHz": binHz,
  267. "rmsDbV":rmsDbV,
  268. "rms_srate":rms_srate,
  269. "pkIdxL":pkIdxL, # pkIdxL[ len(pulsUsL) ] - indexes into rmsDbV[] of peaks
  270. "eventTimeL":r['eventTimeL'],
  271. "holdDutyPctL":holdDutyPctL,
  272. 'pkDbL': [ rmsDbV[ i ] for i in pkIdxL ],
  273. 'pkUsL':r['pulseUsL'] })
  274. statsL = note_stats(r,durDecayPct)
  275. setattr(r,"statsL", statsL )
  276. return r
  277. def rms_analysis_main_all( inDir, cacheFn, rmsWndMs=300, rmsHopMs=30, dbLinRef=0.001, harmCandN=5, harmN=3, durDecayPct=40 ):
  278. if os.path.isfile(cacheFn):
  279. print("READING analysis cache file: %s" % (cacheFn))
  280. with open(cacheFn,"rb") as f:
  281. rD = pickle.load(f)
  282. return rD
  283. folderL = os.listdir(inDir)
  284. rD = {}
  285. for folder in folderL:
  286. pathL = folder.split(os.sep)
  287. midi_pitch = int(pathL[-1])
  288. print(midi_pitch)
  289. path = os.path.join(inDir,folder,'0')
  290. if os.path.isdir(path) and os.path.isfile(os.path.join(os.path.join(path,"seq.json"))):
  291. r = rms_analysis_main( path, midi_pitch, rmsWndMs=rmsWndMs, rmsHopMs=rmsHopMs, dbLinRef=dbLinRef, harmCandN=harmCandN, harmN=harmN, durDecayPct=durDecayPct )
  292. rD[ midi_pitch ] = r
  293. with open(cacheFn,"wb") as f:
  294. pickle.dump(rD,f)
  295. return rD
  296. def samples_to_linear_residual( usL, dbL, pointsPerLine=5 ):
  297. # Score the quality of each sampled point by measuring the
  298. # quality of fit to a local line.
  299. scoreD = { us:[] for us in usL }
  300. pointsPerLine = 5
  301. i = pointsPerLine
  302. # for each sampled point
  303. while i < len(usL):
  304. # beginning with sample at index 'pointsPerLine'
  305. if i >= pointsPerLine:
  306. k = i - pointsPerLine
  307. # get the x (us) and y (db) sample values
  308. xL,yL = zip(*[ ((usL[k+j],1.0), dbL[k+j]) for j in range(pointsPerLine)])
  309. xV = np.array(xL)
  310. yV = np.array(yL)
  311. # fit the sampled point to a line
  312. m,c = np.linalg.lstsq(xV,yV,rcond=None)[0]
  313. # calc the residual of the fit at each point
  314. resV = (m*xV+c)[:,0] - yV
  315. # assign the residual to the associated point in scoreD[x]
  316. for j in range(pointsPerLine):
  317. scoreD[usL[k+j]].append(resV[j])
  318. i += 1
  319. scoreL = []
  320. # calc the mean of the residuals for each point
  321. # (most points were used in 'pointsPerLine' line estimations
  322. # and so they will have 'pointsPerLine' residual values)
  323. for us in usL:
  324. resL = scoreD[us]
  325. if len(resL) == 0:
  326. scoreL.append(0.0)
  327. elif len(resL) == 1:
  328. scoreL.append(resL[0])
  329. else:
  330. scoreL.append(np.mean(resL))
  331. # their should be one mean resid. value for each sampled point
  332. assert( len(scoreL) == len(usL) )
  333. return np.array(scoreL)
  334. def write_audacity_label_files( inDir, analysisArgsD, reverseFl=True ):
  335. pitchDirL = os.listdir(inDir)
  336. for pitchDir in pitchDirL:
  337. folderL = pitchDir.split(os.sep)
  338. midi_pitch = int(folderL[-1])
  339. pitchDir = os.path.join(inDir,pitchDir)
  340. takeDirL = os.listdir(pitchDir)
  341. for takeFolder in takeDirL:
  342. takeDir = os.path.join(pitchDir,takeFolder)
  343. r = rms_analysis_main( takeDir, midi_pitch, **analysisArgsD )
  344. labelFn = os.path.join(takeDir,"audacity.txt")
  345. print("Writing:",labelFn)
  346. with open(labelFn,"w") as f:
  347. for i,s in enumerate(r.statsL):
  348. noteIndex = len(r.statsL)-(i+1) if reverseFl else i
  349. label = "%i %4.1f %6.1f" % (noteIndex, s.pkDb, s.durMs )
  350. f.write("%f\t%f\t%s\n" % ( s.begSmpSec, s.endSmpSec, label ))