libcm is a C development framework with an emphasis on audio signal processing applications.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

cmOnset.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. #include "cmGlobal.h"
  2. #include "cmFloatTypes.h"
  3. #include "cmComplexTypes.h"
  4. #include "cmRpt.h"
  5. #include "cmErr.h"
  6. #include "cmCtx.h"
  7. #include "cmMem.h"
  8. #include "cmMallocDebug.h"
  9. #include "cmLinkedHeap.h"
  10. #include "cmSymTbl.h"
  11. #include "cmAudioFile.h"
  12. #include "cmTime.h"
  13. #include "cmMidi.h"
  14. #include "cmFile.h"
  15. #include "cmMath.h"
  16. #include "cmProcObj.h"
  17. #include "cmProcTemplateMain.h"
  18. #include "cmProc.h"
  19. #include "cmProc2.h"
  20. #include "cmVectOps.h"
  21. #include "cmOnset.h"
  22. typedef struct
  23. {
  24. cmErr_t err;
  25. cmOnsetCfg_t cfg;
  26. cmCtx* ctxPtr;
  27. cmAudioFileRd* afRdPtr;
  28. cmPvAnl* pvocPtr;
  29. cmAudioFileH_t afH; // output audio file
  30. cmFileH_t txH; // output text file
  31. unsigned frmCnt; // spectral frame count
  32. cmReal_t* sfV; // sfV[frmCnt] spectral flux vector
  33. cmReal_t* dfV; // dfV[frmCnt] onset function vector
  34. cmReal_t maxSf;
  35. cmAudioFileInfo_t afInfo;
  36. unsigned fftSmpCnt;
  37. unsigned hopSmpCnt;
  38. unsigned binCnt;
  39. unsigned medFiltFrmCnt;
  40. unsigned preDelaySmpCnt;
  41. } _cmOn_t;
  42. cmOnH_t cmOnsetNullHandle = cmSTATIC_NULL_HANDLE;
  43. _cmOn_t* _cmOnsetHandleToPtr( cmOnH_t h )
  44. {
  45. _cmOn_t* p = (_cmOn_t*)h.h;
  46. assert(p!=NULL);
  47. return p;
  48. }
  49. cmOnRC_t _cmOnsetFinalize( _cmOn_t* p )
  50. {
  51. cmOnRC_t rc = kOkOnRC;
  52. if( cmPvAnlFree(&p->pvocPtr) != cmOkRC )
  53. {
  54. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Phase voocoder free failed.");
  55. goto errLabel;
  56. }
  57. if( cmAudioFileRdFree(&p->afRdPtr) != cmOkRC )
  58. {
  59. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Audio file reader failed.");
  60. goto errLabel;
  61. }
  62. if( cmCtxFree(&p->ctxPtr) != cmOkRC )
  63. {
  64. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Context proc failed.");
  65. goto errLabel;
  66. }
  67. cmMemPtrFree(&p->sfV);
  68. cmMemPtrFree(&p->dfV);
  69. cmMemPtrFree(&p);
  70. errLabel:
  71. return rc;
  72. }
  73. cmOnRC_t cmOnsetInitialize( cmCtx_t* c, cmOnH_t* hp )
  74. {
  75. cmOnRC_t rc;
  76. if((rc = cmOnsetFinalize(hp)) != kOkOnRC )
  77. return rc;
  78. _cmOn_t* p = cmMemAllocZ(_cmOn_t,1);
  79. cmErrSetup(&p->err,&c->rpt,"Onset");
  80. // create the proc context object
  81. if((p->ctxPtr = cmCtxAlloc(NULL,&c->rpt,cmLHeapNullHandle,cmSymTblNullHandle)) == NULL )
  82. {
  83. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The ctx compoenent allocation failed.");
  84. goto errLabel;
  85. }
  86. // create the audio file reader
  87. if((p->afRdPtr = cmAudioFileRdAlloc( p->ctxPtr, NULL, 0, NULL, cmInvalidIdx, 0, cmInvalidIdx )) == NULL )
  88. {
  89. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader allocation failed.");
  90. goto errLabel;
  91. }
  92. // create the phase vocoder
  93. if((p->pvocPtr = cmPvAnlAlloc( p->ctxPtr, NULL, 0, 0, 0, 0, 0 )) == NULL )
  94. {
  95. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The phase vocoder allocation failed.");
  96. goto errLabel;
  97. }
  98. hp->h = p;
  99. errLabel:
  100. if( rc != kOkOnRC )
  101. _cmOnsetFinalize(p);
  102. return rc;
  103. }
  104. cmOnRC_t cmOnsetFinalize( cmOnH_t* hp )
  105. {
  106. cmOnRC_t rc = kOkOnRC;
  107. if( hp==NULL || cmOnsetIsValid(*hp)==false )
  108. return kOkOnRC;
  109. _cmOn_t* p = _cmOnsetHandleToPtr(*hp);
  110. rc = _cmOnsetFinalize(p);
  111. return rc;
  112. }
  113. bool cmOnsetIsValid( cmOnH_t h )
  114. { return h.h!=NULL; }
  115. cmOnRC_t _cmOnsetExec( _cmOn_t* p )
  116. {
  117. cmOnRC_t rc = kOkOnRC;
  118. int fi = 0;
  119. unsigned binCnt = p->binCnt; //p->pvocPtr->binCnt;
  120. cmReal_t mag0V[ binCnt ];
  121. double prog = 0.1;
  122. cmReal_t b0 = 1;
  123. cmReal_t b[] = {1 };
  124. cmReal_t a[] = {p->cfg.filtCoeff};
  125. cmReal_t d[] = {0};
  126. cmReal_t maxVal = 0;
  127. cmVOR_Zero(mag0V,binCnt);
  128. // for each frame - read the next block of audio
  129. for(; fi<p->frmCnt && cmAudioFileRdRead(p->afRdPtr) != cmEofRC; ++fi )
  130. {
  131. // calc the spectrum
  132. if( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ))
  133. {
  134. unsigned i;
  135. // calc the spectral flux into sfV[fi].
  136. cmReal_t sf = 0;
  137. for(i=0; i<binCnt; ++i)
  138. {
  139. cmReal_t m1 = p->pvocPtr->magV[i] * 2.0;
  140. if( m1 > maxVal )
  141. maxVal = m1;
  142. cmReal_t dif = m1 - mag0V[i]; // calc. spectral flux
  143. if( dif > 0 )
  144. sf += dif; // accum. flux
  145. mag0V[i] = m1; // store magn. for next frame
  146. }
  147. p->sfV[fi] = sf;
  148. // filter the spectral flux
  149. switch( p->cfg.filterId)
  150. {
  151. case kNoneFiltId:
  152. break;
  153. case kSmoothFiltId:
  154. cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
  155. break;
  156. case kMedianFiltId:
  157. {
  158. cmReal_t* mfb = p->sfV + cmMax(0,fi-17);
  159. if( mfb < p->sfV-3 )
  160. p->sfV[fi] = cmVOR_Median(mfb,p->sfV-mfb);
  161. }
  162. break;
  163. default:
  164. { assert(0); }
  165. }
  166. if( fi >= prog*p->frmCnt )
  167. {
  168. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  169. prog += 0.1;
  170. }
  171. }
  172. }
  173. p->frmCnt = fi;
  174. // normalize the spectral flux vector
  175. cmReal_t mean = cmVOR_Mean(p->sfV,p->frmCnt);
  176. cmReal_t stdDev = sqrt(cmVOR_Variance(p->sfV, p->frmCnt, &mean ));
  177. unsigned detectCnt = 0;
  178. cmVOR_SubVS(p->sfV,p->frmCnt,mean);
  179. cmVOR_DivVS(p->sfV,p->frmCnt,stdDev);
  180. p->maxSf = cmVOR_Max(p->sfV,p->frmCnt,1);
  181. prog = 0.1;
  182. cmRptPrintf(p->err.rpt,"magn. max:%f flux mean:%f max:%f sd:%f\n",maxVal,mean,p->maxSf,stdDev);
  183. // Pick peaks from the onset detection function using a subset
  184. // of the rules from Dixon, 2006, Onset Detection Revisited.
  185. // locate the onsets and store them in dfV[]
  186. for(fi=0; fi<p->frmCnt; ++fi)
  187. {
  188. int bi = cmMax(0, fi - p->cfg.wndFrmCnt); // begin wnd index
  189. int ei = cmMin(p->frmCnt, fi + p->cfg.wndFrmCnt); // end wnd index
  190. int nn = ei - bi; // wnd frm cnt
  191. int wi = fi < p->cfg.wndFrmCnt ? fi : p->cfg.wndFrmCnt; // cur wnd index
  192. p->dfV[fi] = 0;
  193. // if cur index is a peak in the window
  194. if( cmVOR_MaxIndex(p->sfV + bi, nn, 1 ) == wi )
  195. {
  196. // calc an extended window going backwards in time
  197. bi = cmMax(0, fi - p->cfg.wndFrmCnt * p->cfg.preWndMult );
  198. nn = ei - bi;
  199. // if the cur value is greater than the mean of the extended window plus a threshold
  200. if( p->sfV[fi] > cmVOR_Mean(p->sfV + bi, nn ) + p->cfg.threshold )
  201. {
  202. p->dfV[fi] = p->sfV[fi];
  203. ++detectCnt;
  204. }
  205. }
  206. if( fi >= prog*p->frmCnt )
  207. {
  208. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  209. prog += 0.1;
  210. }
  211. }
  212. cmRptPrintf(p->err.rpt,"Detect Count:%i\n",detectCnt);
  213. return rc;
  214. }
  215. cmOnRC_t cmOnsetProc( cmOnH_t h, const cmOnsetCfg_t* cfg, const cmChar_t* inAudioFn )
  216. {
  217. cmOnRC_t rc = kOkOnRC;
  218. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  219. p->cfg = *cfg;
  220. // get the audio file header information
  221. if( cmAudioFileGetInfo(inAudioFn, &p->afInfo, p->err.rpt ) != kOkAfRC )
  222. {
  223. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The audio file open failed on '%s'.",cmStringNullGuard(inAudioFn));
  224. goto errLabel;
  225. }
  226. p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
  227. p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
  228. p->binCnt = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
  229. p->frmCnt = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
  230. p->sfV = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
  231. p->dfV = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
  232. p->medFiltFrmCnt = cmMax(3,floor(cfg->medFiltWndMs * p->afInfo.srate / (1000.0 * p->hopSmpCnt)));
  233. p->preDelaySmpCnt= floor(cfg->preDelayMs * p->afInfo.srate / 1000.0);
  234. cmRptPrintf(p->err.rpt,"Analysis Hop Duration: %8.2f ms %i smp\n",(double)p->hopSmpCnt*1000/p->afInfo.srate,p->hopSmpCnt);
  235. cmRptPrintf(p->err.rpt,"Median Filter Window: %8.2f ms %i frames\n",cfg->medFiltWndMs,p->medFiltFrmCnt);
  236. cmRptPrintf(p->err.rpt,"Detection Pre-delay: %8.2f ms %i smp\n",cfg->preDelayMs, p->preDelaySmpCnt);
  237. // initialize the audio file reader
  238. if( cmAudioFileRdOpen( p->afRdPtr, p->hopSmpCnt, inAudioFn, p->cfg.audioChIdx, 0, cmInvalidIdx ) != cmOkRC )
  239. {
  240. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader open failed.");
  241. goto errLabel;
  242. }
  243. // initialize the phase vocoder
  244. if( cmPvAnlInit( p->pvocPtr, p->hopSmpCnt, p->afInfo.srate, p->fftSmpCnt, p->hopSmpCnt, kNoCalcHzPvaFl ) != cmOkRC )
  245. {
  246. rc = cmErrMsg(&p->err,kDspProcFailOnRC," The phase vocoder initialization failed.");
  247. goto errLabel;
  248. }
  249. rc = _cmOnsetExec(p);
  250. errLabel:
  251. return rc;
  252. }
  253. unsigned cmOnsetCount( cmOnH_t h )
  254. {
  255. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  256. unsigned i;
  257. unsigned n = 0;
  258. for(i=0; i<p->frmCnt; ++i)
  259. if( p->dfV[i] > 0 )
  260. ++n;
  261. return n;
  262. }
  263. unsigned cmOnsetSampleIndex( cmOnH_t h, unsigned idx )
  264. {
  265. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  266. unsigned i;
  267. unsigned n = 0;
  268. for(i=0; i<p->frmCnt; ++i)
  269. if( p->dfV[i] > 0 )
  270. {
  271. if( n == idx )
  272. {
  273. unsigned r = i * p->hopSmpCnt;
  274. if( r > p->preDelaySmpCnt )
  275. return r-p->preDelaySmpCnt;
  276. return 0;
  277. }
  278. ++n;
  279. }
  280. return cmInvalidIdx;
  281. }
  282. unsigned cmOnsetHopSampleCount( cmOnH_t h )
  283. {
  284. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  285. return p->hopSmpCnt;
  286. }
  287. cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* outTextFn)
  288. {
  289. enum { kChCnt = 2 };
  290. cmOnRC_t rc = kOkOnRC;
  291. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  292. cmSample_t out0V[ p->hopSmpCnt ];
  293. cmSample_t out1V[ p->hopSmpCnt ];
  294. cmSample_t* aoutV[kChCnt];
  295. unsigned pdn = 0;
  296. aoutV[0] = out0V;
  297. aoutV[1] = out1V;
  298. // initalize the audio output file
  299. if( outAudioFn != NULL )
  300. if( cmAudioFileIsValid( p->afH = cmAudioFileNewCreate( outAudioFn, p->afInfo.srate, p->afInfo.bits, kChCnt, NULL, p->err.rpt)) == false )
  301. {
  302. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC, "The audio output file '%s' could not be opened.", outAudioFn);
  303. goto errLabel;
  304. }
  305. // open the text output file
  306. if( outTextFn != NULL )
  307. {
  308. if( cmFileOpen( &p->txH, outTextFn, kWriteFileFl, p->err.rpt ) != kOkFileRC )
  309. {
  310. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC, "The text output file '%s' could not be opened.",outTextFn);
  311. goto errLabel;
  312. }
  313. cmFilePrint(p->txH,"{\n onsetArray : \n[\n");
  314. }
  315. unsigned fi;
  316. for(fi=0; fi<p->frmCnt; ++fi)
  317. {
  318. // count of samples to write to the audio output file
  319. unsigned osn = p->hopSmpCnt;
  320. // audio channel 1 is filled with the spectral flux
  321. // initialize the out
  322. cmVOS_Fill(out1V,p->hopSmpCnt,p->sfV[fi]/p->maxSf);
  323. cmVOS_Zero(out0V,p->hopSmpCnt);
  324. if( p->dfV[fi] > 0 )
  325. {
  326. // audio channel 0 is set with the detection indicators
  327. unsigned smpIdx = fi * p->hopSmpCnt + p->hopSmpCnt/2;
  328. out0V[ p->hopSmpCnt/2 ] = p->sfV[fi]/p->maxSf;
  329. // if the pre-delay is still active
  330. if( pdn < p->preDelaySmpCnt )
  331. {
  332. osn = 0;
  333. pdn += p->hopSmpCnt;
  334. if( pdn > p->preDelaySmpCnt )
  335. osn = pdn - p->preDelaySmpCnt;
  336. }
  337. // write the output text file
  338. if( cmFileIsValid(p->txH) )
  339. if( cmFilePrintf(p->txH, "[ %i, %f ]\n", smpIdx, p->sfV[fi] ) != kOkFileRC )
  340. {
  341. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"Text output write to '%s' failed.", cmFileName(p->txH));
  342. goto errLabel;
  343. }
  344. }
  345. // write the output audio file
  346. if( osn > 0 && cmAudioFileIsValid(p->afH) )
  347. {
  348. if( cmAudioFileWriteFloat(p->afH, osn, kChCnt, aoutV ) != kOkAfRC )
  349. {
  350. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"Audio file write to '%s' failed.",cmAudioFileName(p->afH));
  351. goto errLabel;
  352. }
  353. }
  354. }
  355. // close the output audio file
  356. if( cmAudioFileDelete(&p->afH) != kOkAfRC )
  357. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"The audio file close failed.");
  358. // close the text file
  359. if( cmFileIsValid(p->txH) )
  360. {
  361. cmFilePrint(p->txH,"]\n}\n");
  362. if( cmFileClose(&p->txH) != kOkFileRC )
  363. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"The text file close failed.");
  364. }
  365. errLabel:
  366. return rc;
  367. }
  368. cmOnRC_t cmOnsetTest( cmCtx_t* c )
  369. {
  370. cmOnsetCfg_t cfg;
  371. cmOnH_t h = cmOnsetNullHandle;
  372. cmOnRC_t rc = kOkOnRC;
  373. const cmChar_t* inAudioFn = "/home/kevin/media/audio/20110723-Kriesberg/Audio Files/Piano 3_15.wav";
  374. const cmChar_t* outAudioFn = "/home/kevin/temp/ons/ons0.aif";
  375. const cmChar_t* outTextFn = "/home/kevin/temp/ons/ons0.txt";
  376. cfg.wndMs = 42;
  377. cfg.hopFact = 4;
  378. cfg.audioChIdx = 0;
  379. cfg.wndFrmCnt = 3;
  380. cfg.preWndMult = 3;
  381. cfg.threshold = 0.6;
  382. cfg.maxFrqHz = 24000;
  383. cfg.filtCoeff = -0.7;
  384. cfg.medFiltWndMs = 50;
  385. cfg.filterId = kMedianFiltId;
  386. cfg.preDelayMs = 20;
  387. if((rc = cmOnsetInitialize(c,&h)) != kOkOnRC )
  388. goto errLabel;
  389. if((rc = cmOnsetProc(h,&cfg,inAudioFn)) == kOkOnRC )
  390. cmOnsetWrite(h,outAudioFn,outTextFn);
  391. errLabel:
  392. cmOnsetFinalize(&h);
  393. return rc;
  394. }