libcm is a C development framework with an emphasis on audio signal processing applications.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cmOnset.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. //| Copyright: (C) 2009-2020 Kevin Larke <contact AT larke DOT org>
  2. //| License: GNU GPL version 3.0 or above. See the accompanying LICENSE file.
  3. #include "cmGlobal.h"
  4. #include "cmFloatTypes.h"
  5. #include "cmComplexTypes.h"
  6. #include "cmRpt.h"
  7. #include "cmErr.h"
  8. #include "cmCtx.h"
  9. #include "cmMem.h"
  10. #include "cmMallocDebug.h"
  11. #include "cmLinkedHeap.h"
  12. #include "cmSymTbl.h"
  13. #include "cmAudioFile.h"
  14. #include "cmTime.h"
  15. #include "cmMidi.h"
  16. #include "cmFile.h"
  17. #include "cmMath.h"
  18. #include "cmProcObj.h"
  19. #include "cmProcTemplateMain.h"
  20. #include "cmProc.h"
  21. #include "cmProc2.h"
  22. #include "cmVectOps.h"
  23. #include "cmOnset.h"
  24. typedef struct
  25. {
  26. cmErr_t err;
  27. cmOnsetCfg_t cfg;
  28. cmCtx* ctxPtr;
  29. cmAudioFileRd* afRdPtr;
  30. cmPvAnl* pvocPtr;
  31. cmAudioFileH_t afH; // output audio file
  32. cmFileH_t txH; // output text file
  33. unsigned frmCnt; // spectral frame count
  34. cmReal_t* sfV; // sfV[frmCnt] spectral flux vector
  35. cmReal_t* dfV; // dfV[frmCnt] onset function vector
  36. cmReal_t maxSf;
  37. cmAudioFileInfo_t afInfo;
  38. unsigned fftSmpCnt;
  39. unsigned hopSmpCnt;
  40. unsigned binCnt;
  41. unsigned medFiltFrmCnt;
  42. unsigned preDelaySmpCnt;
  43. } _cmOn_t;
  44. cmOnH_t cmOnsetNullHandle = cmSTATIC_NULL_HANDLE;
  45. _cmOn_t* _cmOnsetHandleToPtr( cmOnH_t h )
  46. {
  47. _cmOn_t* p = (_cmOn_t*)h.h;
  48. assert(p!=NULL);
  49. return p;
  50. }
  51. cmOnRC_t _cmOnsetFinalize( _cmOn_t* p )
  52. {
  53. cmOnRC_t rc = kOkOnRC;
  54. if( cmPvAnlFree(&p->pvocPtr) != cmOkRC )
  55. {
  56. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Phase voocoder free failed.");
  57. goto errLabel;
  58. }
  59. if( cmAudioFileRdFree(&p->afRdPtr) != cmOkRC )
  60. {
  61. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Audio file reader failed.");
  62. goto errLabel;
  63. }
  64. if( cmCtxFree(&p->ctxPtr) != cmOkRC )
  65. {
  66. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Context proc failed.");
  67. goto errLabel;
  68. }
  69. cmMemPtrFree(&p->sfV);
  70. cmMemPtrFree(&p->dfV);
  71. cmMemPtrFree(&p);
  72. errLabel:
  73. return rc;
  74. }
  75. cmOnRC_t cmOnsetInitialize( cmCtx_t* c, cmOnH_t* hp )
  76. {
  77. cmOnRC_t rc;
  78. if((rc = cmOnsetFinalize(hp)) != kOkOnRC )
  79. return rc;
  80. _cmOn_t* p = cmMemAllocZ(_cmOn_t,1);
  81. cmErrSetup(&p->err,&c->rpt,"Onset");
  82. // create the proc context object
  83. if((p->ctxPtr = cmCtxAlloc(NULL,&c->rpt,cmLHeapNullHandle,cmSymTblNullHandle)) == NULL )
  84. {
  85. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The ctx compoenent allocation failed.");
  86. goto errLabel;
  87. }
  88. // create the audio file reader
  89. if((p->afRdPtr = cmAudioFileRdAlloc( p->ctxPtr, NULL, 0, NULL, cmInvalidIdx, 0, cmInvalidIdx )) == NULL )
  90. {
  91. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader allocation failed.");
  92. goto errLabel;
  93. }
  94. // create the phase vocoder
  95. if((p->pvocPtr = cmPvAnlAlloc( p->ctxPtr, NULL, 0, 0, 0, 0, 0 )) == NULL )
  96. {
  97. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The phase vocoder allocation failed.");
  98. goto errLabel;
  99. }
  100. hp->h = p;
  101. errLabel:
  102. if( rc != kOkOnRC )
  103. _cmOnsetFinalize(p);
  104. return rc;
  105. }
  106. cmOnRC_t cmOnsetFinalize( cmOnH_t* hp )
  107. {
  108. cmOnRC_t rc = kOkOnRC;
  109. if( hp==NULL || cmOnsetIsValid(*hp)==false )
  110. return kOkOnRC;
  111. _cmOn_t* p = _cmOnsetHandleToPtr(*hp);
  112. rc = _cmOnsetFinalize(p);
  113. return rc;
  114. }
  115. bool cmOnsetIsValid( cmOnH_t h )
  116. { return h.h!=NULL; }
  117. cmOnRC_t _cmOnsetExec( _cmOn_t* p )
  118. {
  119. cmOnRC_t rc = kOkOnRC;
  120. int fi = 0;
  121. unsigned binCnt = p->binCnt; //p->pvocPtr->binCnt;
  122. cmReal_t mag0V[ binCnt ];
  123. double prog = 0.1;
  124. cmReal_t b0 = 1;
  125. cmReal_t b[] = {1 };
  126. cmReal_t a[] = {p->cfg.filtCoeff};
  127. cmReal_t d[] = {0};
  128. cmReal_t maxVal = 0;
  129. cmVOR_Zero(mag0V,binCnt);
  130. // for each frame - read the next block of audio
  131. for(; fi<p->frmCnt && cmAudioFileRdRead(p->afRdPtr) != cmEofRC; ++fi )
  132. {
  133. // calc the spectrum
  134. if( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ))
  135. {
  136. unsigned i;
  137. // calc the spectral flux into sfV[fi].
  138. cmReal_t sf = 0;
  139. for(i=0; i<binCnt; ++i)
  140. {
  141. cmReal_t m1 = p->pvocPtr->magV[i] * 2.0;
  142. if( m1 > maxVal )
  143. maxVal = m1;
  144. cmReal_t dif = m1 - mag0V[i]; // calc. spectral flux
  145. if( dif > 0 )
  146. sf += dif; // accum. flux
  147. mag0V[i] = m1; // store magn. for next frame
  148. }
  149. p->sfV[fi] = sf;
  150. // filter the spectral flux
  151. switch( p->cfg.filterId)
  152. {
  153. case kNoneFiltId:
  154. break;
  155. case kSmoothFiltId:
  156. cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
  157. break;
  158. case kMedianFiltId:
  159. {
  160. cmReal_t* mfb = p->sfV + cmMax(0,fi-17);
  161. if( mfb < p->sfV-3 )
  162. p->sfV[fi] = cmVOR_Median(mfb,p->sfV-mfb);
  163. }
  164. break;
  165. default:
  166. { assert(0); }
  167. }
  168. if( fi >= prog*p->frmCnt )
  169. {
  170. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  171. prog += 0.1;
  172. }
  173. }
  174. }
  175. p->frmCnt = fi;
  176. // normalize the spectral flux vector
  177. cmReal_t mean = cmVOR_Mean(p->sfV,p->frmCnt);
  178. cmReal_t stdDev = sqrt(cmVOR_Variance(p->sfV, p->frmCnt, &mean ));
  179. unsigned detectCnt = 0;
  180. cmVOR_SubVS(p->sfV,p->frmCnt,mean);
  181. cmVOR_DivVS(p->sfV,p->frmCnt,stdDev);
  182. p->maxSf = cmVOR_Max(p->sfV,p->frmCnt,1);
  183. prog = 0.1;
  184. cmRptPrintf(p->err.rpt,"magn. max:%f flux mean:%f max:%f sd:%f\n",maxVal,mean,p->maxSf,stdDev);
  185. // Pick peaks from the onset detection function using a subset
  186. // of the rules from Dixon, 2006, Onset Detection Revisited.
  187. // locate the onsets and store them in dfV[]
  188. for(fi=0; fi<p->frmCnt; ++fi)
  189. {
  190. int bi = cmMax(0, fi - p->cfg.wndFrmCnt); // begin wnd index
  191. int ei = cmMin(p->frmCnt, fi + p->cfg.wndFrmCnt); // end wnd index
  192. int nn = ei - bi; // wnd frm cnt
  193. int wi = fi < p->cfg.wndFrmCnt ? fi : p->cfg.wndFrmCnt; // cur wnd index
  194. p->dfV[fi] = 0;
  195. // if cur index is a peak in the window
  196. if( cmVOR_MaxIndex(p->sfV + bi, nn, 1 ) == wi )
  197. {
  198. // calc an extended window going backwards in time
  199. bi = cmMax(0, fi - p->cfg.wndFrmCnt * p->cfg.preWndMult );
  200. nn = ei - bi;
  201. // if the cur value is greater than the mean of the extended window plus a threshold
  202. if( p->sfV[fi] > cmVOR_Mean(p->sfV + bi, nn ) + p->cfg.threshold )
  203. {
  204. p->dfV[fi] = p->sfV[fi];
  205. ++detectCnt;
  206. }
  207. }
  208. if( fi >= prog*p->frmCnt )
  209. {
  210. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  211. prog += 0.1;
  212. }
  213. }
  214. cmRptPrintf(p->err.rpt,"Detect Count:%i\n",detectCnt);
  215. return rc;
  216. }
  217. cmOnRC_t cmOnsetProc( cmOnH_t h, const cmOnsetCfg_t* cfg, const cmChar_t* inAudioFn )
  218. {
  219. cmOnRC_t rc = kOkOnRC;
  220. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  221. p->cfg = *cfg;
  222. // get the audio file header information
  223. if( cmAudioFileGetInfo(inAudioFn, &p->afInfo, p->err.rpt ) != kOkAfRC )
  224. {
  225. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The audio file open failed on '%s'.",cmStringNullGuard(inAudioFn));
  226. goto errLabel;
  227. }
  228. p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
  229. p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
  230. p->binCnt = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
  231. p->frmCnt = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
  232. p->sfV = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
  233. p->dfV = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
  234. p->medFiltFrmCnt = cmMax(3,floor(cfg->medFiltWndMs * p->afInfo.srate / (1000.0 * p->hopSmpCnt)));
  235. p->preDelaySmpCnt= floor(cfg->preDelayMs * p->afInfo.srate / 1000.0);
  236. cmRptPrintf(p->err.rpt,"wndFrmCnt:%i preWndMult:%f thresh:%f maxHz:%f filtCoeff:%f filterId:%i preDelayMs:%f\n",cfg->wndFrmCnt,cfg->preWndMult,cfg->threshold,cfg->maxFrqHz,cfg->filtCoeff,cfg->medFiltWndMs,cfg->filterId,cfg->preDelayMs );
  237. cmRptPrintf(p->err.rpt,"Analysis Hop Duration: %8.2f ms %i smp\n",(double)p->hopSmpCnt*1000/p->afInfo.srate,p->hopSmpCnt);
  238. cmRptPrintf(p->err.rpt,"Median Filter Window: %8.2f ms %i frames\n",cfg->medFiltWndMs,p->medFiltFrmCnt);
  239. cmRptPrintf(p->err.rpt,"Detection Pre-delay: %8.2f ms %i smp\n",cfg->preDelayMs, p->preDelaySmpCnt);
  240. // initialize the audio file reader
  241. if( cmAudioFileRdOpen( p->afRdPtr, p->hopSmpCnt, inAudioFn, p->cfg.audioChIdx, 0, 0 ) != cmOkRC )
  242. {
  243. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader open failed.");
  244. goto errLabel;
  245. }
  246. // initialize the phase vocoder
  247. if( cmPvAnlInit( p->pvocPtr, p->hopSmpCnt, p->afInfo.srate, p->fftSmpCnt, p->hopSmpCnt, kNoCalcHzPvaFl ) != cmOkRC )
  248. {
  249. rc = cmErrMsg(&p->err,kDspProcFailOnRC," The phase vocoder initialization failed.");
  250. goto errLabel;
  251. }
  252. rc = _cmOnsetExec(p);
  253. errLabel:
  254. return rc;
  255. }
  256. unsigned cmOnsetCount( cmOnH_t h )
  257. {
  258. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  259. unsigned i;
  260. unsigned n = 0;
  261. for(i=0; i<p->frmCnt; ++i)
  262. if( p->dfV[i] > 0 )
  263. ++n;
  264. return n;
  265. }
  266. unsigned cmOnsetSampleIndex( cmOnH_t h, unsigned idx )
  267. {
  268. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  269. unsigned i;
  270. unsigned n = 0;
  271. for(i=0; i<p->frmCnt; ++i)
  272. if( p->dfV[i] > 0 )
  273. {
  274. if( n == idx )
  275. {
  276. unsigned r = i * p->hopSmpCnt;
  277. if( r > p->preDelaySmpCnt )
  278. return r-p->preDelaySmpCnt;
  279. return 0;
  280. }
  281. ++n;
  282. }
  283. return cmInvalidIdx;
  284. }
  285. unsigned cmOnsetHopSampleCount( cmOnH_t h )
  286. {
  287. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  288. return p->hopSmpCnt;
  289. }
  290. cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* outTextFn)
  291. {
  292. enum { kChCnt = 2 };
  293. cmOnRC_t rc = kOkOnRC;
  294. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  295. cmSample_t out0V[ p->hopSmpCnt ];
  296. cmSample_t out1V[ p->hopSmpCnt ];
  297. cmSample_t* aoutV[kChCnt];
  298. unsigned pdn = 0;
  299. aoutV[0] = out0V;
  300. aoutV[1] = out1V;
  301. // initalize the audio output file
  302. if( outAudioFn != NULL )
  303. if( cmAudioFileIsValid( p->afH = cmAudioFileNewCreate( outAudioFn, p->afInfo.srate, p->afInfo.bits, kChCnt, NULL, p->err.rpt)) == false )
  304. {
  305. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC, "The audio output file '%s' could not be opened.", outAudioFn);
  306. goto errLabel;
  307. }
  308. // open the text output file
  309. if( outTextFn != NULL )
  310. {
  311. if( cmFileOpen( &p->txH, outTextFn, kWriteFileFl, p->err.rpt ) != kOkFileRC )
  312. {
  313. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC, "The text output file '%s' could not be opened.",outTextFn);
  314. goto errLabel;
  315. }
  316. cmFilePrint(p->txH,"{\n onsetArray : \n[\n");
  317. }
  318. unsigned fi;
  319. for(fi=0; fi<p->frmCnt; ++fi)
  320. {
  321. // count of samples to write to the audio output file
  322. unsigned osn = p->hopSmpCnt;
  323. // audio channel 1 is filled with the spectral flux
  324. // initialize the out
  325. cmVOS_Fill(out1V,p->hopSmpCnt,p->sfV[fi]/p->maxSf);
  326. cmVOS_Zero(out0V,p->hopSmpCnt);
  327. if( p->dfV[fi] > 0 )
  328. {
  329. // audio channel 0 is set with the detection indicators
  330. unsigned smpIdx = fi * p->hopSmpCnt + p->hopSmpCnt/2;
  331. out0V[ p->hopSmpCnt/2 ] = p->sfV[fi]/p->maxSf;
  332. // if the pre-delay is still active
  333. if( pdn < p->preDelaySmpCnt )
  334. {
  335. osn = 0;
  336. pdn += p->hopSmpCnt;
  337. if( pdn > p->preDelaySmpCnt )
  338. osn = pdn - p->preDelaySmpCnt;
  339. }
  340. // write the output text file
  341. if( cmFileIsValid(p->txH) )
  342. if( cmFilePrintf(p->txH, "[ %i, %f ]\n", smpIdx, p->sfV[fi] ) != kOkFileRC )
  343. {
  344. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"Text output write to '%s' failed.", cmFileName(p->txH));
  345. goto errLabel;
  346. }
  347. }
  348. // write the output audio file
  349. if( osn > 0 && cmAudioFileIsValid(p->afH) )
  350. {
  351. if( cmAudioFileWriteFloat(p->afH, osn, kChCnt, aoutV ) != kOkAfRC )
  352. {
  353. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"Audio file write to '%s' failed.",cmAudioFileName(p->afH));
  354. goto errLabel;
  355. }
  356. }
  357. }
  358. // close the output audio file
  359. if( cmAudioFileDelete(&p->afH) != kOkAfRC )
  360. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"The audio file close failed.");
  361. // close the text file
  362. if( cmFileIsValid(p->txH) )
  363. {
  364. cmFilePrint(p->txH,"]\n}\n");
  365. if( cmFileClose(&p->txH) != kOkFileRC )
  366. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"The text file close failed.");
  367. }
  368. errLabel:
  369. return rc;
  370. }
  371. cmOnRC_t cmOnsetTest( cmCtx_t* c )
  372. {
  373. cmOnsetCfg_t cfg;
  374. cmOnH_t h = cmOnsetNullHandle;
  375. cmOnRC_t rc = kOkOnRC;
  376. const cmChar_t* inAudioFn = "/home/kevin/media/audio/20110723-Kriesberg/Audio Files/Piano 3_15.wav";
  377. const cmChar_t* outAudioFn = "/home/kevin/temp/ons/ons0.aif";
  378. const cmChar_t* outTextFn = "/home/kevin/temp/ons/ons0.txt";
  379. cfg.wndMs = 42;
  380. cfg.hopFact = 4;
  381. cfg.audioChIdx = 0;
  382. cfg.wndFrmCnt = 3;
  383. cfg.preWndMult = 3;
  384. cfg.threshold = 0.6;
  385. cfg.maxFrqHz = 24000;
  386. cfg.filtCoeff = -0.7;
  387. cfg.medFiltWndMs = 50;
  388. cfg.filterId = kMedianFiltId;
  389. cfg.preDelayMs = 20;
  390. if((rc = cmOnsetInitialize(c,&h)) != kOkOnRC )
  391. goto errLabel;
  392. if((rc = cmOnsetProc(h,&cfg,inAudioFn)) == kOkOnRC )
  393. cmOnsetWrite(h,outAudioFn,outTextFn);
  394. errLabel:
  395. cmOnsetFinalize(&h);
  396. return rc;
  397. }