picadae calibration programs
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

rms_analysis.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. import os,types,json,pickle,types
  2. from scipy.io import wavfile
  3. from scipy.signal import stft
  4. import numpy as np
  5. from common import parse_yaml_cfg
  6. def calc_harm_bins( srate, binHz, midiPitch, harmN ):
  7. semi_tone = 1.0/12
  8. quarter_tone = 1.0/24
  9. eigth_tone = 1.0/48
  10. band_width_st = 3.0/48 # 3/8 tone
  11. fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
  12. fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
  13. fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
  14. fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
  15. for i in range(len(fund_m_binL)):
  16. if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
  17. fund_l_binL[i] = fund_m_binL[i] - 1
  18. if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
  19. fund_u_binL[i] = fund_m_binL[i] + 1
  20. return fund_l_binL, fund_m_binL, fund_u_binL
  21. def rms_to_db( xV, rms_srate, dbLinRef ):
  22. #dbWndN = int(round(refWndMs * rms_srate / 1000.0))
  23. #dbRef = ref = np.mean(xV[0:dbWndN])
  24. #print("DB REF:",dbLinRef, min(xV), np.argmin(xV))
  25. rmsDbV = 20.0 * np.log10( (xV+np.nextafter(0,1)) / dbLinRef )
  26. return rmsDbV
  27. def audio_rms( srate, xV, rmsWndMs, hopMs, dbLinRef ):
  28. wndSmpN = int(round( rmsWndMs * srate / 1000.0))
  29. hopSmpN = int(round( hopMs * srate / 1000.0))
  30. xN = xV.shape[0]
  31. yN = int(((xN - wndSmpN) / hopSmpN) + 1)
  32. assert( yN > 0)
  33. yV = np.zeros( (yN, ) )
  34. assert( wndSmpN > 1 )
  35. i = 0
  36. j = 0
  37. while i < xN and j < yN:
  38. if i == 0:
  39. yV[j] = np.sqrt(xV[0]*xV[0])
  40. elif i < wndSmpN:
  41. yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
  42. else:
  43. yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
  44. i += hopSmpN
  45. j += 1
  46. rms_srate = srate / hopSmpN
  47. return rms_to_db( yV[0:j], rms_srate, dbLinRef ), rms_srate
  48. def audio_stft_rms( srate, xV, rmsWndMs, hopMs, dbLinRef, spectrumIdx ):
  49. wndSmpN = int(round( rmsWndMs * srate / 1000.0))
  50. hopSmpN = int(round( hopMs * srate / 1000.0))
  51. binHz = srate / wndSmpN
  52. f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
  53. specHopIdx = int(round( spectrumIdx ))
  54. specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
  55. mV = np.zeros((xM.shape[1]))
  56. for i in range(xM.shape[1]):
  57. mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
  58. rms_srate = srate / hopSmpN
  59. mV = rms_to_db( mV, rms_srate, dbLinRef )
  60. return mV, rms_srate, specV, specHopIdx, binHz
  61. def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbLinRef, midiPitch, harmCandN, harmN ):
  62. wndSmpN = int(round( rmsWndMs * srate / 1000.0))
  63. hopSmpN = int(round( hopMs * srate / 1000.0))
  64. binHz = srate / wndSmpN
  65. #print( "STFT:", rmsWndMs, hopMs, wndSmpN, hopSmpN, wndSmpN-hopSmpN )
  66. f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
  67. harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
  68. rmsV = np.zeros((xM.shape[1],))
  69. for i in range(xM.shape[1]):
  70. mV = np.sqrt(np.abs(xM[:,i]))
  71. pV = np.zeros((len(harmLBinL,)))
  72. for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
  73. pV[j] = np.max(mV[b0i:b1i])
  74. rmsV[i] = np.mean( sorted(pV)[-harmN:] )
  75. rms_srate = srate / hopSmpN
  76. rmsV = rms_to_db( rmsV, rms_srate, dbLinRef )
  77. return rmsV, rms_srate, binHz
  78. def measure_duration_ms( rmsV, rms_srate, peak_idx, end_idx, decay_pct ):
  79. """
  80. Calcuate the time it takes for a note to decay from the peak at
  81. rmsV[peak_idx] dB to 'decay_pct' percent of the peak value.
  82. """
  83. pkRmsDb = rmsV[ peak_idx ]
  84. # calc the note turn-off (offset) db as a percentage of the peak amplitude
  85. offsetRmsDb = pkRmsDb * decay_pct / 100.0
  86. # calc the sample index where the note is off
  87. offset_idx = peak_idx + np.argmin( np.abs(rmsV[peak_idx:end_idx] - offsetRmsDb) )
  88. # calc the duration of the note
  89. dur_ms = int(round((offset_idx - peak_idx) * 1000.0 / rms_srate))
  90. #print(pkRmsDb, offsetRmsDb, peak_idx, offset_idx, end_idx, dur_ms, rms_srate)
  91. return dur_ms
  92. def select_first_stable_note_by_dur( durMsL, minDurMs=800 ):
  93. first_stable_idx = None
  94. for i,durMs in enumerate(durMsL):
  95. if durMs > minDurMs and first_stable_idx is None:
  96. first_stable_idx = i
  97. else:
  98. if durMs < minDurMs:
  99. first_stable_idx = None
  100. return first_stable_idx
  101. def select_first_stable_note_by_delta_db_1( pkDbL, pkUsL, maxPulseUs=0.1 ):
  102. wndN = 5
  103. aL = []
  104. dV = np.diff(pkDbL) / pkDbL[1:]
  105. for ei in range(wndN,len(pkDbL)):
  106. xV = dV[ei-wndN:ei]
  107. avg = np.mean(np.abs(xV))
  108. aL.append(avg)
  109. k = np.argmin(np.abs(np.array(pkUsL) - maxPulseUs))
  110. print(aL)
  111. print(k)
  112. for i in range(k,0,-1):
  113. if aL[i] > maxDeltaDb:
  114. return i + 1
  115. return None
  116. def select_first_stable_note_by_delta_db( pkDbL, pkUsL=None, maxPulseUs=0.1 ):
  117. wndN = 5
  118. dV = np.diff(pkDbL) / pkDbL[1:]
  119. for ei in range(wndN,len(pkDbL)):
  120. xV = dV[ei-wndN:ei]
  121. avg = np.mean(np.abs(xV))
  122. if avg < .1:
  123. return (ei-wndN)+1
  124. return None
  125. def note_stats( r, decay_pct=50.0, extraDurSearchMs=500 ):
  126. statsL = []
  127. srate = r.rms_srate
  128. qmax = 0
  129. for i,(begSmpMs, endSmpMs) in enumerate(r.eventTimeL):
  130. begSmpIdx = int(round(srate * begSmpMs / 1000.0))
  131. endSmpIdx = int(round(srate * (endSmpMs + extraDurSearchMs) / 1000.0))
  132. pkSmpIdx = r.pkIdxL[i]
  133. durMs = measure_duration_ms( r.rmsDbV, srate, pkSmpIdx, endSmpIdx, decay_pct )
  134. bi = pkSmpIdx
  135. ei = pkSmpIdx + int(round(durMs * srate / 1000.0))
  136. qualityCoeff = np.sum(r.rmsDbV[bi:ei]) + np.sum(r.tdRmsDbV[bi:ei])
  137. if qualityCoeff > qmax:
  138. qmax = qualityCoeff
  139. if ei-bi == 0:
  140. tdRmsDb_v = 0.0 if bi >= len(r.tdRmsDbV) else np.mean(r.tdRmsDbV[bi])
  141. hmRmsDb_v = 0.0 if bi >= len(r.rmsDbV) else np.mean(r.rmsDbV[bi])
  142. durAvgDb = (hmRmsDb_v + tdRmsDb_v)/2.0
  143. else:
  144. tdRmsDb_u = 0.0 if ei >= len(r.tdRmsDbV) else np.mean(r.tdRmsDbV[bi:ei])
  145. hmRmsDb_u = 0.0 if ei >= len(r.rmsDbV) else np.mean(r.rmsDbV[bi:ei])
  146. durAvgDb = (hmRmsDb_u + tdRmsDb_u)/2.0
  147. statsL.append( types.SimpleNamespace(**{'begSmpSec':begSmpIdx/srate,'endSmpSec':endSmpIdx/srate,'pkSmpSec':pkSmpIdx/srate,'durMs':durMs, 'pkDb':r.pkDbL[i], 'pulse_us':r.pkUsL[i], 'quality':qualityCoeff, 'durAvgDb':durAvgDb }))
  148. for i,r in enumerate(statsL):
  149. statsL[i].quality = 0 if qmax <= 0 else statsL[i].quality / qmax
  150. return statsL
  151. def locate_peak_indexes( xV, xV_srate, eventMsL ):
  152. pkIdxL = []
  153. for begMs, endMs in eventMsL:
  154. begSmpIdx = int(begMs * xV_srate / 1000.0)
  155. endSmpIdx = int(endMs * xV_srate / 1000.0)
  156. pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
  157. return pkIdxL
  158. def key_info_dictionary( keyMapL=None, yamlCfgFn=None):
  159. if yamlCfgFn is not None:
  160. cfg = parse_yaml_cfg(yamlCfgFn)
  161. keyMapL = cfg.key_mapL
  162. kmD = {}
  163. for d in keyMapL:
  164. kmD[ d['midi'] ] = types.SimpleNamespace(**d)
  165. return kmD
  166. def rms_analyze_one_rt_note( sigV, srate, begMs, endMs, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbLinRef=0.001, harmCandN=5, harmN=3, durDecayPct=40 ):
  167. sigV = np.squeeze(sigV)
  168. td_rmsDbV, td_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef )
  169. begSmpIdx = int(round(begMs * td_srate/1000))
  170. endSmpIdx = int(round(endMs * td_srate/1000))
  171. td_pk_idx = begSmpIdx + np.argmax(td_rmsDbV[begSmpIdx:endSmpIdx])
  172. td_durMs = measure_duration_ms( td_rmsDbV, td_srate, td_pk_idx, len(sigV)-1, durDecayPct )
  173. hm_rmsDbV, hm_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef, midi_pitch, harmCandN, harmN )
  174. begSmpIdx = int(round(begMs * hm_srate/1000))
  175. endSmpIdx = int(round(endMs * hm_srate/1000))
  176. hm_pk_idx = begSmpIdx + np.argmax(hm_rmsDbV[begSmpIdx:endSmpIdx])
  177. hm_durMs = measure_duration_ms( hm_rmsDbV, hm_srate, hm_pk_idx, len(sigV)-1, durDecayPct )
  178. tdD = { "rmsDbV":td_rmsDbV.tolist(), "srate":td_srate, "pk_idx":int(td_pk_idx), "db":float(td_rmsDbV[td_pk_idx]), "durMs":td_durMs }
  179. hmD = { "rmsDbV":hm_rmsDbV.tolist(), "srate":hm_srate, "pk_idx":int(hm_pk_idx), "db":float(hm_rmsDbV[hm_pk_idx]), "durMs":hm_durMs }
  180. return { "td":tdD, "hm":hmD }
  181. def rms_analyze_one_rt_note_wrap( audioDev, annBegMs, annEndMs, midi_pitch, noteOffDurMs, rmsAnalysisD ):
  182. resD = None
  183. buf_result = audioDev.linear_buffer()
  184. if buf_result:
  185. sigV = buf_result.value
  186. # get the annotated begin and end of the note as sample indexes into sigV
  187. bi = int(round(annBegMs * audioDev.srate / 1000))
  188. ei = int(round(annEndMs * audioDev.srate / 1000))
  189. # calculate half the length of the note-off duration in samples
  190. noteOffSmp_o_2 = int(round( (noteOffDurMs/2) * audioDev.srate / 1000))
  191. # widen the note analysis space noteOffSmp_o_2 samples pre/post the annotated begin/end of the note
  192. bi = max(0,bi - noteOffSmp_o_2)
  193. ei = min(ei+noteOffSmp_o_2,sigV.shape[0]-1)
  194. ar = types.SimpleNamespace(**rmsAnalysisD)
  195. # shift the annotatd begin/end of the note to be relative to index bi
  196. begMs = noteOffSmp_o_2 * 1000 / audioDev.srate
  197. endMs = begMs + (annEndMs - annBegMs)
  198. #print("MEAS:",begMs,endMs,bi,ei,sigV.shape,audioDev.is_recording_enabled(),ar)
  199. # analyze the note
  200. resD = rms_analyze_one_rt_note( sigV[bi:ei], audioDev.srate, begMs, endMs, midi_pitch, rmsWndMs=ar.rmsWndMs, rmsHopMs=ar.rmsHopMs, dbLinRef=ar.dbLinRef, harmCandN=ar.harmCandN, harmN=ar.harmN, durDecayPct=ar.durDecayPct )
  201. #print( "hm:%4.1f %4i td:%4.1f %4i" % (resD['hm']['db'], resD['hm']['durMs'], resD['td']['db'], resD['td']['durMs']))
  202. return resD
  203. def calibrate_rms( sigV, srate, beg_ms, end_ms ):
  204. bi = int(round(beg_ms * srate / 1000))
  205. ei = int(round(end_ms * srate / 1000))
  206. rms = np.sqrt( np.mean( sigV[bi:ei] * sigV[bi:ei] ))
  207. return 20.0*np.log10( rms / 0.002 )
  208. def calibrate_recording_analysis( inDir ):
  209. jsonFn = os.path.join(inDir, "meas.json" )
  210. audioFn = os.path.join(inDir, "audio.wav" )
  211. with open(jsonFn,"r") as f:
  212. r = json.load(f)
  213. measD = r['measD']
  214. cfg = types.SimpleNamespace(**r['cfg'])
  215. annL = r['annoteL']
  216. anlD = {}
  217. n = 0
  218. for midi_pitch,measL in measD.items():
  219. n += len(measL)
  220. anlD[int(midi_pitch)] = []
  221. srate, signalM = wavfile.read(audioFn)
  222. sigV = signalM / float(0x7fff)
  223. anlr = types.SimpleNamespace(**cfg.analysisD)
  224. tdRmsDbV, td_srate = audio_rms( srate, sigV, anlr.rmsWndMs, anlr.rmsHopMs, anlr.dbLinRef )
  225. # for each measured pitch
  226. for midi_pitch,measL in measD.items():
  227. # for each measured note at this pitch
  228. for mi,d in enumerate(measL):
  229. mr = types.SimpleNamespace(**d)
  230. # locate the associated annotation reocrd
  231. for annD in annL:
  232. ar = types.SimpleNamespace(**annD)
  233. if ar.midi_pitch == mr.midi_pitch and ar.beg_ms==mr.beg_ms and ar.end_ms==mr.end_ms:
  234. assert( ar.pulse_us == mr.pulse_us )
  235. bi = int(round(ar.beg_ms * td_srate / 1000))
  236. ei = int(round(ar.end_ms * td_srate / 1000))
  237. db = np.mean(tdRmsDbV[bi:ei])
  238. db = calibrate_rms( sigV, srate, ar.beg_ms, ar.end_ms )
  239. anlD[int(midi_pitch)].append({ 'pulse_us':ar.pulse_us, 'db':db, 'meas_idx':mi })
  240. break
  241. return anlD
  242. def rms_analysis_main( inDir, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbLinRef=0.001, harmCandN=5, harmN=3, durDecayPct=40 ):
  243. seqFn = os.path.join( inDir, "seq.json")
  244. audioFn = os.path.join( inDir, "audio.wav")
  245. with open( seqFn, "rb") as f:
  246. r = json.load(f)
  247. srate, signalM = wavfile.read(audioFn)
  248. sigV = signalM / float(0x7fff)
  249. tdRmsDbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef )
  250. tdPkIdxL = locate_peak_indexes( tdRmsDbV, rms0_srate, r['eventTimeL'])
  251. rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbLinRef, midi_pitch, harmCandN, harmN )
  252. pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
  253. holdDutyPctL = None
  254. if 'holdDutyPct' in r:
  255. holdDutyPctL = [ (0, r['holdDutyPct']) ]
  256. else:
  257. holdDutyPctL = r['holdDutyPctL']
  258. r = types.SimpleNamespace(**{
  259. "audio_srate":srate,
  260. "eventTimeMsL":r['eventTimeL'],
  261. "tdRmsDbV": tdRmsDbV,
  262. "tdPkIdxL": tdPkIdxL,
  263. "tdPkDbL": [ tdRmsDbV[i] for i in tdPkIdxL ],
  264. "binHz": binHz,
  265. "rmsDbV":rmsDbV,
  266. "rms_srate":rms_srate,
  267. "pkIdxL":pkIdxL, # pkIdxL[ len(pulsUsL) ] - indexes into rmsDbV[] of peaks
  268. "eventTimeL":r['eventTimeL'],
  269. "holdDutyPctL":holdDutyPctL,
  270. 'pkDbL': [ rmsDbV[ i ] for i in pkIdxL ],
  271. 'pkUsL':r['pulseUsL'] })
  272. statsL = note_stats(r,durDecayPct)
  273. setattr(r,"statsL", statsL )
  274. return r
  275. def rms_analysis_main_all( inDir, cacheFn, rmsWndMs=300, rmsHopMs=30, dbLinRef=0.001, harmCandN=5, harmN=3, durDecayPct=40 ):
  276. if os.path.isfile(cacheFn):
  277. print("READING analysis cache file: %s" % (cacheFn))
  278. with open(cacheFn,"rb") as f:
  279. rD = pickle.load(f)
  280. return rD
  281. folderL = os.listdir(inDir)
  282. rD = {}
  283. for folder in folderL:
  284. pathL = folder.split(os.sep)
  285. midi_pitch = int(pathL[-1])
  286. print(midi_pitch)
  287. path = os.path.join(inDir,folder,'0')
  288. if os.path.isdir(path) and os.path.isfile(os.path.join(os.path.join(path,"seq.json"))):
  289. r = rms_analysis_main( path, midi_pitch, rmsWndMs=rmsWndMs, rmsHopMs=rmsHopMs, dbLinRef=dbLinRef, harmCandN=harmCandN, harmN=harmN, durDecayPct=durDecayPct )
  290. rD[ midi_pitch ] = r
  291. with open(cacheFn,"wb") as f:
  292. pickle.dump(rD,f)
  293. return rD
  294. def samples_to_linear_residual( usL, dbL, pointsPerLine=5 ):
  295. # Score the quality of each sampled point by measuring the
  296. # quality of fit to a local line.
  297. scoreD = { us:[] for us in usL }
  298. pointsPerLine = 5
  299. i = pointsPerLine
  300. # for each sampled point
  301. while i < len(usL):
  302. # beginning with sample at index 'pointsPerLine'
  303. if i >= pointsPerLine:
  304. k = i - pointsPerLine
  305. # get the x (us) and y (db) sample values
  306. xL,yL = zip(*[ ((usL[k+j],1.0), dbL[k+j]) for j in range(pointsPerLine)])
  307. xV = np.array(xL)
  308. yV = np.array(yL)
  309. # fit the sampled point to a line
  310. m,c = np.linalg.lstsq(xV,yV,rcond=None)[0]
  311. # calc the residual of the fit at each point
  312. resV = (m*xV+c)[:,0] - yV
  313. # assign the residual to the associated point in scoreD[x]
  314. for j in range(pointsPerLine):
  315. scoreD[usL[k+j]].append(resV[j])
  316. i += 1
  317. scoreL = []
  318. # calc the mean of the residuals for each point
  319. # (most points were used in 'pointsPerLine' line estimations
  320. # and so they will have 'pointsPerLine' residual values)
  321. for us in usL:
  322. resL = scoreD[us]
  323. if len(resL) == 0:
  324. scoreL.append(0.0)
  325. elif len(resL) == 1:
  326. scoreL.append(resL[0])
  327. else:
  328. scoreL.append(np.mean(resL))
  329. # their should be one mean resid. value for each sampled point
  330. assert( len(scoreL) == len(usL) )
  331. return np.array(scoreL)
  332. def write_audacity_label_files( inDir, analysisArgsD, reverseFl=True ):
  333. pitchDirL = os.listdir(inDir)
  334. for pitchDir in pitchDirL:
  335. folderL = pitchDir.split(os.sep)
  336. midi_pitch = int(folderL[-1])
  337. pitchDir = os.path.join(inDir,pitchDir)
  338. takeDirL = os.listdir(pitchDir)
  339. for takeFolder in takeDirL:
  340. takeDir = os.path.join(pitchDir,takeFolder)
  341. r = rms_analysis_main( takeDir, midi_pitch, **analysisArgsD )
  342. labelFn = os.path.join(takeDir,"audacity.txt")
  343. print("Writing:",labelFn)
  344. with open(labelFn,"w") as f:
  345. for i,s in enumerate(r.statsL):
  346. noteIndex = len(r.statsL)-(i+1) if reverseFl else i
  347. label = "%i %4.1f %6.1f" % (noteIndex, s.pkDb, s.durMs )
  348. f.write("%f\t%f\t%s\n" % ( s.begSmpSec, s.endSmpSec, label ))