libcm is a C development framework with an emphasis on audio signal processing applications.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

cmOnset.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. //| Copyright: (C) 2009-2020 Kevin Larke <contact AT larke DOT org>
  2. //| License: GNU GPL version 3.0 or above. See the accompanying LICENSE file.
  3. #include "cmGlobal.h"
  4. #include "cmFloatTypes.h"
  5. #include "cmComplexTypes.h"
  6. #include "cmRpt.h"
  7. #include "cmErr.h"
  8. #include "cmCtx.h"
  9. #include "cmMem.h"
  10. #include "cmMallocDebug.h"
  11. #include "cmLinkedHeap.h"
  12. #include "cmSymTbl.h"
  13. #include "cmAudioFile.h"
  14. #include "cmTime.h"
  15. #include "cmMidi.h"
  16. #include "cmFile.h"
  17. #include "cmMath.h"
  18. #include "cmProcObj.h"
  19. #include "cmProcTemplateMain.h"
  20. #include "cmProc.h"
  21. #include "cmProc2.h"
  22. #include "cmVectOps.h"
  23. #include "cmOnset.h"
  24. typedef struct
  25. {
  26. cmErr_t err;
  27. cmOnsetCfg_t cfg;
  28. cmCtx* ctxPtr;
  29. cmAudioFileRd* afRdPtr;
  30. cmPvAnl* pvocPtr;
  31. cmAudioFileH_t afH; // output audio file
  32. cmFileH_t txH; // output text file
  33. unsigned frmCnt; // spectral frame count
  34. cmReal_t* sfV; // sfV[frmCnt] spectral flux vector
  35. cmReal_t* dfV; // dfV[frmCnt] onset function vector
  36. cmReal_t maxSf;
  37. cmAudioFileInfo_t afInfo;
  38. unsigned fftSmpCnt;
  39. unsigned hopSmpCnt;
  40. unsigned binCnt;
  41. unsigned medFiltFrmCnt;
  42. unsigned preDelaySmpCnt;
  43. } _cmOn_t;
  44. cmOnH_t cmOnsetNullHandle = cmSTATIC_NULL_HANDLE;
  45. _cmOn_t* _cmOnsetHandleToPtr( cmOnH_t h )
  46. {
  47. _cmOn_t* p = (_cmOn_t*)h.h;
  48. assert(p!=NULL);
  49. return p;
  50. }
  51. cmOnRC_t _cmOnsetFinalize( _cmOn_t* p )
  52. {
  53. cmOnRC_t rc = kOkOnRC;
  54. if( cmPvAnlFree(&p->pvocPtr) != cmOkRC )
  55. {
  56. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Phase voocoder free failed.");
  57. goto errLabel;
  58. }
  59. if( cmAudioFileRdFree(&p->afRdPtr) != cmOkRC )
  60. {
  61. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Audio file reader failed.");
  62. goto errLabel;
  63. }
  64. if( cmCtxFree(&p->ctxPtr) != cmOkRC )
  65. {
  66. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Context proc failed.");
  67. goto errLabel;
  68. }
  69. cmMemPtrFree(&p->sfV);
  70. cmMemPtrFree(&p->dfV);
  71. cmMemPtrFree(&p);
  72. errLabel:
  73. return rc;
  74. }
  75. cmOnRC_t cmOnsetInitialize( cmCtx_t* c, cmOnH_t* hp )
  76. {
  77. cmOnRC_t rc;
  78. if((rc = cmOnsetFinalize(hp)) != kOkOnRC )
  79. return rc;
  80. _cmOn_t* p = cmMemAllocZ(_cmOn_t,1);
  81. cmErrSetup(&p->err,&c->rpt,"Onset");
  82. // create the proc context object
  83. if((p->ctxPtr = cmCtxAlloc(NULL,&c->rpt,cmLHeapNullHandle,cmSymTblNullHandle)) == NULL )
  84. {
  85. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The ctx compoenent allocation failed.");
  86. goto errLabel;
  87. }
  88. // create the audio file reader
  89. if((p->afRdPtr = cmAudioFileRdAlloc( p->ctxPtr, NULL, 0, NULL, cmInvalidIdx, 0, cmInvalidIdx )) == NULL )
  90. {
  91. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader allocation failed.");
  92. goto errLabel;
  93. }
  94. // create the phase vocoder
  95. if((p->pvocPtr = cmPvAnlAlloc( p->ctxPtr, NULL, 0, 0, 0, 0, 0 )) == NULL )
  96. {
  97. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The phase vocoder allocation failed.");
  98. goto errLabel;
  99. }
  100. hp->h = p;
  101. errLabel:
  102. if( rc != kOkOnRC )
  103. _cmOnsetFinalize(p);
  104. return rc;
  105. }
  106. cmOnRC_t cmOnsetFinalize( cmOnH_t* hp )
  107. {
  108. cmOnRC_t rc = kOkOnRC;
  109. if( hp==NULL || cmOnsetIsValid(*hp)==false )
  110. return kOkOnRC;
  111. _cmOn_t* p = _cmOnsetHandleToPtr(*hp);
  112. rc = _cmOnsetFinalize(p);
  113. return rc;
  114. }
  115. bool cmOnsetIsValid( cmOnH_t h )
  116. { return h.h!=NULL; }
  117. cmOnRC_t _cmOnsetExec( _cmOn_t* p )
  118. {
  119. cmOnRC_t rc = kOkOnRC;
  120. int fi = 0;
  121. unsigned binCnt = p->binCnt; //p->pvocPtr->binCnt;
  122. cmReal_t mag0V[ binCnt ];
  123. double prog = 0.1;
  124. cmReal_t b0 = 1;
  125. cmReal_t b[] = {1 };
  126. cmReal_t a[] = {p->cfg.filtCoeff};
  127. cmReal_t d[] = {0};
  128. cmReal_t maxVal = 0;
  129. cmVOR_Zero(mag0V,binCnt);
  130. // for each frame - read the next block of audio
  131. for(; fi<p->frmCnt && cmAudioFileRdRead(p->afRdPtr) != cmEofRC; ++fi )
  132. {
  133. // calc the spectrum
  134. if( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ))
  135. {
  136. unsigned i;
  137. // calc the spectral flux into sfV[fi].
  138. cmReal_t sf = 0;
  139. for(i=0; i<binCnt; ++i)
  140. {
  141. cmReal_t m1 = p->pvocPtr->magV[i] * 2.0;
  142. if( m1 > maxVal )
  143. maxVal = m1;
  144. cmReal_t dif = m1 - mag0V[i]; // calc. spectral flux
  145. if( dif > 0 )
  146. sf += dif; // accum. flux
  147. mag0V[i] = m1; // store magn. for next frame
  148. }
  149. p->sfV[fi] = sf;
  150. // filter the spectral flux
  151. switch( p->cfg.filterId)
  152. {
  153. case kNoneFiltId:
  154. break;
  155. case kSmoothFiltId:
  156. cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
  157. break;
  158. case kMedianFiltId:
  159. {
  160. cmReal_t* mfb = p->sfV + cmMax(0,fi-17);
  161. if( mfb < p->sfV-3 )
  162. p->sfV[fi] = cmVOR_Median(mfb,p->sfV-mfb);
  163. }
  164. break;
  165. default:
  166. { assert(0); }
  167. }
  168. if( fi >= prog*p->frmCnt )
  169. {
  170. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  171. prog += 0.1;
  172. }
  173. }
  174. }
  175. p->frmCnt = fi;
  176. // normalize the spectral flux vector
  177. cmReal_t mean = cmVOR_Mean(p->sfV,p->frmCnt);
  178. cmReal_t stdDev = sqrt(cmVOR_Variance(p->sfV, p->frmCnt, &mean ));
  179. unsigned detectCnt = 0;
  180. cmVOR_SubVS(p->sfV,p->frmCnt,mean);
  181. cmVOR_DivVS(p->sfV,p->frmCnt,stdDev);
  182. p->maxSf = cmVOR_Max(p->sfV,p->frmCnt,1);
  183. prog = 0.1;
  184. cmRptPrintf(p->err.rpt,"magn. max:%f flux mean:%f max:%f sd:%f\n",maxVal,mean,p->maxSf,stdDev);
  185. // Pick peaks from the onset detection function using a subset
  186. // of the rules from Dixon, 2006, Onset Detection Revisited.
  187. // locate the onsets and store them in dfV[]
  188. for(fi=0; fi<p->frmCnt; ++fi)
  189. {
  190. int bi = cmMax(0, fi - p->cfg.wndFrmCnt); // begin wnd index
  191. int ei = cmMin(p->frmCnt, fi + p->cfg.wndFrmCnt); // end wnd index
  192. int nn = ei - bi; // wnd frm cnt
  193. int wi = fi < p->cfg.wndFrmCnt ? fi : p->cfg.wndFrmCnt; // cur wnd index
  194. p->dfV[fi] = 0;
  195. // if cur index is a peak in the window
  196. if( cmVOR_MaxIndex(p->sfV + bi, nn, 1 ) == wi )
  197. {
  198. // calc an extended window going backwards in time
  199. bi = cmMax(0, fi - p->cfg.wndFrmCnt * p->cfg.preWndMult );
  200. nn = ei - bi;
  201. // if the cur value is greater than the mean of the extended window plus a threshold
  202. if( p->sfV[fi] > cmVOR_Mean(p->sfV + bi, nn ) + p->cfg.threshold )
  203. {
  204. p->dfV[fi] = p->sfV[fi];
  205. ++detectCnt;
  206. }
  207. }
  208. if( fi >= prog*p->frmCnt )
  209. {
  210. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  211. prog += 0.1;
  212. }
  213. }
  214. cmRptPrintf(p->err.rpt,"Detect Count:%i\n",detectCnt);
  215. return rc;
  216. }
  217. cmOnRC_t cmOnsetProc( cmOnH_t h, const cmOnsetCfg_t* cfg, const cmChar_t* inAudioFn )
  218. {
  219. cmOnRC_t rc = kOkOnRC;
  220. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  221. p->cfg = *cfg;
  222. // get the audio file header information
  223. if( cmAudioFileGetInfo(inAudioFn, &p->afInfo, p->err.rpt ) != kOkAfRC )
  224. {
  225. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The audio file open failed on '%s'.",cmStringNullGuard(inAudioFn));
  226. goto errLabel;
  227. }
  228. p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
  229. p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
  230. p->binCnt = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
  231. p->frmCnt = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
  232. p->sfV = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
  233. p->dfV = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
  234. p->medFiltFrmCnt = cmMax(3,floor(cfg->medFiltWndMs * p->afInfo.srate / (1000.0 * p->hopSmpCnt)));
  235. p->preDelaySmpCnt= floor(cfg->preDelayMs * p->afInfo.srate / 1000.0);
  236. cmRptPrintf(p->err.rpt,"wndFrmCnt:%i preWndMult:%f thresh:%f maxHz:%f filtCoeff:%f filterId:%i preDelayMs:%f\n",cfg->wndFrmCnt,cfg->preWndMult,cfg->threshold,cfg->maxFrqHz,cfg->filtCoeff,cfg->medFiltWndMs,cfg->filterId,cfg->preDelayMs );
  237. cmRptPrintf(p->err.rpt,"Analysis Hop Duration: %8.2f ms %i smp\n",(double)p->hopSmpCnt*1000/p->afInfo.srate,p->hopSmpCnt);
  238. cmRptPrintf(p->err.rpt,"Median Filter Window: %8.2f ms %i frames\n",cfg->medFiltWndMs,p->medFiltFrmCnt);
  239. cmRptPrintf(p->err.rpt,"Detection Pre-delay: %8.2f ms %i smp\n",cfg->preDelayMs, p->preDelaySmpCnt);
  240. // initialize the audio file reader
  241. if( cmAudioFileRdOpen( p->afRdPtr, p->hopSmpCnt, inAudioFn, p->cfg.audioChIdx, 0, 0 ) != cmOkRC )
  242. {
  243. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader open failed.");
  244. goto errLabel;
  245. }
  246. // initialize the phase vocoder
  247. if( cmPvAnlInit( p->pvocPtr, p->hopSmpCnt, p->afInfo.srate, p->fftSmpCnt, p->hopSmpCnt, kNoCalcHzPvaFl ) != cmOkRC )
  248. {
  249. rc = cmErrMsg(&p->err,kDspProcFailOnRC," The phase vocoder initialization failed.");
  250. goto errLabel;
  251. }
  252. rc = _cmOnsetExec(p);
  253. errLabel:
  254. return rc;
  255. }
  256. unsigned cmOnsetCount( cmOnH_t h )
  257. {
  258. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  259. unsigned i;
  260. unsigned n = 0;
  261. for(i=0; i<p->frmCnt; ++i)
  262. if( p->dfV[i] > 0 )
  263. ++n;
  264. return n;
  265. }
  266. unsigned cmOnsetSampleIndex( cmOnH_t h, unsigned idx )
  267. {
  268. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  269. unsigned i;
  270. unsigned n = 0;
  271. for(i=0; i<p->frmCnt; ++i)
  272. if( p->dfV[i] > 0 )
  273. {
  274. if( n == idx )
  275. {
  276. unsigned r = i * p->hopSmpCnt;
  277. if( r > p->preDelaySmpCnt )
  278. return r-p->preDelaySmpCnt;
  279. return 0;
  280. }
  281. ++n;
  282. }
  283. return cmInvalidIdx;
  284. }
  285. unsigned cmOnsetHopSampleCount( cmOnH_t h )
  286. {
  287. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  288. return p->hopSmpCnt;
  289. }
  290. cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* outTextFn)
  291. {
  292. enum { kChCnt = 2 };
  293. cmOnRC_t rc = kOkOnRC;
  294. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  295. cmSample_t out0V[ p->hopSmpCnt ];
  296. cmSample_t out1V[ p->hopSmpCnt ];
  297. cmSample_t* aoutV[kChCnt];
  298. unsigned pdn = 0;
  299. aoutV[0] = out0V;
  300. aoutV[1] = out1V;
  301. // initalize the audio output file
  302. if( outAudioFn != NULL )
  303. if( cmAudioFileIsValid( p->afH = cmAudioFileNewCreate( outAudioFn, p->afInfo.srate, p->afInfo.bits, kChCnt, NULL, p->err.rpt)) == false )
  304. {
  305. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC, "The audio output file '%s' could not be opened.", outAudioFn);
  306. goto errLabel;
  307. }
  308. // open the text output file
  309. if( outTextFn != NULL )
  310. {
  311. if( cmFileOpen( &p->txH, outTextFn, kWriteFileFl, p->err.rpt ) != kOkFileRC )
  312. {
  313. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC, "The text output file '%s' could not be opened.",outTextFn);
  314. goto errLabel;
  315. }
  316. cmFilePrint(p->txH,"{\n onsetArray : \n[\n");
  317. }
  318. unsigned fi;
  319. for(fi=0; fi<p->frmCnt; ++fi)
  320. {
  321. // count of samples to write to the audio output file
  322. unsigned osn = p->hopSmpCnt;
  323. // audio channel 1 is filled with the spectral flux
  324. // initialize the out
  325. cmVOS_Fill(out1V,p->hopSmpCnt,p->sfV[fi]/p->maxSf);
  326. cmVOS_Zero(out0V,p->hopSmpCnt);
  327. if( p->dfV[fi] > 0 )
  328. {
  329. // audio channel 0 is set with the detection indicators
  330. unsigned smpIdx = fi * p->hopSmpCnt + p->hopSmpCnt/2;
  331. out0V[ p->hopSmpCnt/2 ] = p->sfV[fi]/p->maxSf;
  332. // if the pre-delay is still active
  333. if( pdn < p->preDelaySmpCnt )
  334. {
  335. osn = 0;
  336. pdn += p->hopSmpCnt;
  337. if( pdn > p->preDelaySmpCnt )
  338. osn = pdn - p->preDelaySmpCnt;
  339. }
  340. // write the output text file
  341. if( cmFileIsValid(p->txH) )
  342. if( cmFilePrintf(p->txH, "[ %i, %f ]\n", smpIdx, p->sfV[fi] ) != kOkFileRC )
  343. {
  344. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"Text output write to '%s' failed.", cmFileName(p->txH));
  345. goto errLabel;
  346. }
  347. }
  348. // write the output audio file
  349. if( osn > 0 && cmAudioFileIsValid(p->afH) )
  350. {
  351. if( cmAudioFileWriteFloat(p->afH, osn, kChCnt, aoutV ) != kOkAfRC )
  352. {
  353. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"Audio file write to '%s' failed.",cmAudioFileName(p->afH));
  354. goto errLabel;
  355. }
  356. }
  357. }
  358. // close the output audio file
  359. if( cmAudioFileDelete(&p->afH) != kOkAfRC )
  360. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"The audio file close failed.");
  361. // close the text file
  362. if( cmFileIsValid(p->txH) )
  363. {
  364. cmFilePrint(p->txH,"]\n}\n");
  365. if( cmFileClose(&p->txH) != kOkFileRC )
  366. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"The text file close failed.");
  367. }
  368. errLabel:
  369. return rc;
  370. }
  371. cmOnRC_t cmOnsetTest( cmCtx_t* c )
  372. {
  373. cmOnsetCfg_t cfg;
  374. cmOnH_t h = cmOnsetNullHandle;
  375. cmOnRC_t rc = kOkOnRC;
  376. const cmChar_t* inAudioFn = "/home/kevin/media/audio/20110723-Kriesberg/Audio Files/Piano 3_15.wav";
  377. const cmChar_t* outAudioFn = "/home/kevin/temp/ons/ons0.aif";
  378. const cmChar_t* outTextFn = "/home/kevin/temp/ons/ons0.txt";
  379. cfg.wndMs = 42;
  380. cfg.hopFact = 4;
  381. cfg.audioChIdx = 0;
  382. cfg.wndFrmCnt = 3;
  383. cfg.preWndMult = 3;
  384. cfg.threshold = 0.6;
  385. cfg.maxFrqHz = 24000;
  386. cfg.filtCoeff = -0.7;
  387. cfg.medFiltWndMs = 50;
  388. cfg.filterId = kMedianFiltId;
  389. cfg.preDelayMs = 20;
  390. if((rc = cmOnsetInitialize(c,&h)) != kOkOnRC )
  391. goto errLabel;
  392. if((rc = cmOnsetProc(h,&cfg,inAudioFn)) == kOkOnRC )
  393. cmOnsetWrite(h,outAudioFn,outTextFn);
  394. errLabel:
  395. cmOnsetFinalize(&h);
  396. return rc;
  397. }