libcm is a C development framework with an emphasis on audio signal processing applications.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

cmOnset.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. #include "cmGlobal.h"
  2. #include "cmFloatTypes.h"
  3. #include "cmComplexTypes.h"
  4. #include "cmRpt.h"
  5. #include "cmErr.h"
  6. #include "cmCtx.h"
  7. #include "cmMem.h"
  8. #include "cmMallocDebug.h"
  9. #include "cmLinkedHeap.h"
  10. #include "cmSymTbl.h"
  11. #include "cmAudioFile.h"
  12. #include "cmMidi.h"
  13. #include "cmFile.h"
  14. #include "cmMath.h"
  15. #include "cmProcObj.h"
  16. #include "cmProcTemplateMain.h"
  17. #include "cmProc.h"
  18. #include "cmProc2.h"
  19. #include "cmVectOps.h"
  20. #include "cmOnset.h"
  21. typedef struct
  22. {
  23. cmErr_t err;
  24. cmOnsetCfg_t cfg;
  25. cmCtx* ctxPtr;
  26. cmAudioFileRd* afRdPtr;
  27. cmPvAnl* pvocPtr;
  28. cmAudioFileH_t afH; // output audio file
  29. cmFileH_t txH; // output text file
  30. unsigned frmCnt; // spectral frame count
  31. cmReal_t* sfV; // sfV[frmCnt] spectral flux vector
  32. cmReal_t* dfV; // dfV[frmCnt] onset function vector
  33. cmReal_t maxSf;
  34. cmAudioFileInfo_t afInfo;
  35. unsigned fftSmpCnt;
  36. unsigned hopSmpCnt;
  37. unsigned binCnt;
  38. unsigned medFiltFrmCnt;
  39. unsigned preDelaySmpCnt;
  40. } _cmOn_t;
  41. cmOnH_t cmOnsetNullHandle = cmSTATIC_NULL_HANDLE;
  42. _cmOn_t* _cmOnsetHandleToPtr( cmOnH_t h )
  43. {
  44. _cmOn_t* p = (_cmOn_t*)h.h;
  45. assert(p!=NULL);
  46. return p;
  47. }
  48. cmOnRC_t _cmOnsetFinalize( _cmOn_t* p )
  49. {
  50. cmOnRC_t rc = kOkOnRC;
  51. if( cmPvAnlFree(&p->pvocPtr) != cmOkRC )
  52. {
  53. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Phase voocoder free failed.");
  54. goto errLabel;
  55. }
  56. if( cmAudioFileRdFree(&p->afRdPtr) != cmOkRC )
  57. {
  58. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Audio file reader failed.");
  59. goto errLabel;
  60. }
  61. if( cmCtxFree(&p->ctxPtr) != cmOkRC )
  62. {
  63. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Context proc failed.");
  64. goto errLabel;
  65. }
  66. cmMemPtrFree(&p->sfV);
  67. cmMemPtrFree(&p->dfV);
  68. cmMemPtrFree(&p);
  69. errLabel:
  70. return rc;
  71. }
  72. cmOnRC_t cmOnsetInitialize( cmCtx_t* c, cmOnH_t* hp )
  73. {
  74. cmOnRC_t rc;
  75. if((rc = cmOnsetFinalize(hp)) != kOkOnRC )
  76. return rc;
  77. _cmOn_t* p = cmMemAllocZ(_cmOn_t,1);
  78. cmErrSetup(&p->err,&c->rpt,"Onset");
  79. // create the proc context object
  80. if((p->ctxPtr = cmCtxAlloc(NULL,&c->rpt,cmLHeapNullHandle,cmSymTblNullHandle)) == NULL )
  81. {
  82. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The ctx compoenent allocation failed.");
  83. goto errLabel;
  84. }
  85. // create the audio file reader
  86. if((p->afRdPtr = cmAudioFileRdAlloc( p->ctxPtr, NULL, 0, NULL, cmInvalidIdx, 0, cmInvalidIdx )) == NULL )
  87. {
  88. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader allocation failed.");
  89. goto errLabel;
  90. }
  91. // create the phase vocoder
  92. if((p->pvocPtr = cmPvAnlAlloc( p->ctxPtr, NULL, 0, 0, 0, 0, 0 )) == NULL )
  93. {
  94. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The phase vocoder allocation failed.");
  95. goto errLabel;
  96. }
  97. hp->h = p;
  98. errLabel:
  99. if( rc != kOkOnRC )
  100. _cmOnsetFinalize(p);
  101. return rc;
  102. }
  103. cmOnRC_t cmOnsetFinalize( cmOnH_t* hp )
  104. {
  105. cmOnRC_t rc = kOkOnRC;
  106. if( hp==NULL || cmOnsetIsValid(*hp)==false )
  107. return kOkOnRC;
  108. _cmOn_t* p = _cmOnsetHandleToPtr(*hp);
  109. rc = _cmOnsetFinalize(p);
  110. return rc;
  111. }
  112. bool cmOnsetIsValid( cmOnH_t h )
  113. { return h.h!=NULL; }
  114. cmOnRC_t _cmOnsetExec( _cmOn_t* p )
  115. {
  116. cmOnRC_t rc = kOkOnRC;
  117. int fi = 0;
  118. unsigned binCnt = p->binCnt; //p->pvocPtr->binCnt;
  119. cmReal_t mag0V[ binCnt ];
  120. double prog = 0.1;
  121. cmReal_t b0 = 1;
  122. cmReal_t b[] = {1 };
  123. cmReal_t a[] = {p->cfg.filtCoeff};
  124. cmReal_t d[] = {0};
  125. cmReal_t maxVal = 0;
  126. cmVOR_Zero(mag0V,binCnt);
  127. // for each frame - read the next block of audio
  128. for(; fi<p->frmCnt && cmAudioFileRdRead(p->afRdPtr) != cmEofRC; ++fi )
  129. {
  130. // calc the spectrum
  131. if( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ))
  132. {
  133. unsigned i;
  134. // calc the spectral flux into sfV[fi].
  135. cmReal_t sf = 0;
  136. for(i=0; i<binCnt; ++i)
  137. {
  138. cmReal_t m1 = p->pvocPtr->magV[i] * 2.0;
  139. if( m1 > maxVal )
  140. maxVal = m1;
  141. cmReal_t dif = m1 - mag0V[i]; // calc. spectral flux
  142. if( dif > 0 )
  143. sf += dif; // accum. flux
  144. mag0V[i] = m1; // store magn. for next frame
  145. }
  146. p->sfV[fi] = sf;
  147. // filter the spectral flux
  148. switch( p->cfg.filterId)
  149. {
  150. case kNoneFiltId:
  151. break;
  152. case kSmoothFiltId:
  153. cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
  154. break;
  155. case kMedianFiltId:
  156. {
  157. cmReal_t* mfb = p->sfV + cmMax(0,fi-17);
  158. if( mfb < p->sfV-3 )
  159. p->sfV[fi] = cmVOR_Median(mfb,p->sfV-mfb);
  160. }
  161. break;
  162. default:
  163. { assert(0); }
  164. }
  165. if( fi >= prog*p->frmCnt )
  166. {
  167. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  168. prog += 0.1;
  169. }
  170. }
  171. }
  172. p->frmCnt = fi;
  173. // normalize the spectral flux vector
  174. cmReal_t mean = cmVOR_Mean(p->sfV,p->frmCnt);
  175. cmReal_t stdDev = sqrt(cmVOR_Variance(p->sfV, p->frmCnt, &mean ));
  176. unsigned detectCnt = 0;
  177. cmVOR_SubVS(p->sfV,p->frmCnt,mean);
  178. cmVOR_DivVS(p->sfV,p->frmCnt,stdDev);
  179. p->maxSf = cmVOR_Max(p->sfV,p->frmCnt,1);
  180. prog = 0.1;
  181. cmRptPrintf(p->err.rpt,"magn. max:%f flux mean:%f max:%f sd:%f\n",maxVal,mean,p->maxSf,stdDev);
  182. // Pick peaks from the onset detection function using a subset
  183. // of the rules from Dixon, 2006, Onset Detection Revisited.
  184. // locate the onsets and store them in dfV[]
  185. for(fi=0; fi<p->frmCnt; ++fi)
  186. {
  187. int bi = cmMax(0, fi - p->cfg.wndFrmCnt); // begin wnd index
  188. int ei = cmMin(p->frmCnt, fi + p->cfg.wndFrmCnt); // end wnd index
  189. int nn = ei - bi; // wnd frm cnt
  190. int wi = fi < p->cfg.wndFrmCnt ? fi : p->cfg.wndFrmCnt; // cur wnd index
  191. p->dfV[fi] = 0;
  192. // if cur index is a peak in the window
  193. if( cmVOR_MaxIndex(p->sfV + bi, nn, 1 ) == wi )
  194. {
  195. // calc an extended window going backwards in time
  196. bi = cmMax(0, fi - p->cfg.wndFrmCnt * p->cfg.preWndMult );
  197. nn = ei - bi;
  198. // if the cur value is greater than the mean of the extended window plus a threshold
  199. if( p->sfV[fi] > cmVOR_Mean(p->sfV + bi, nn ) + p->cfg.threshold )
  200. {
  201. p->dfV[fi] = p->sfV[fi];
  202. ++detectCnt;
  203. }
  204. }
  205. if( fi >= prog*p->frmCnt )
  206. {
  207. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  208. prog += 0.1;
  209. }
  210. }
  211. cmRptPrintf(p->err.rpt,"Detect Count:%i\n",detectCnt);
  212. return rc;
  213. }
  214. cmOnRC_t cmOnsetProc( cmOnH_t h, const cmOnsetCfg_t* cfg, const cmChar_t* inAudioFn )
  215. {
  216. cmOnRC_t rc = kOkOnRC;
  217. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  218. p->cfg = *cfg;
  219. // get the audio file header information
  220. if( cmAudioFileGetInfo(inAudioFn, &p->afInfo, p->err.rpt ) != kOkAfRC )
  221. {
  222. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The audio file open failed on '%s'.",cmStringNullGuard(inAudioFn));
  223. goto errLabel;
  224. }
  225. p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
  226. p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
  227. p->binCnt = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
  228. p->frmCnt = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
  229. p->sfV = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
  230. p->dfV = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
  231. p->medFiltFrmCnt = cmMax(3,floor(cfg->medFiltWndMs * p->afInfo.srate / (1000.0 * p->hopSmpCnt)));
  232. p->preDelaySmpCnt= floor(cfg->preDelayMs * p->afInfo.srate / 1000.0);
  233. cmRptPrintf(p->err.rpt,"Analysis Hop Duration: %8.2f ms %i smp\n",(double)p->hopSmpCnt*1000/p->afInfo.srate,p->hopSmpCnt);
  234. cmRptPrintf(p->err.rpt,"Median Filter Window: %8.2f ms %i frames\n",cfg->medFiltWndMs,p->medFiltFrmCnt);
  235. cmRptPrintf(p->err.rpt,"Detection Pre-delay: %8.2f ms %i smp\n",cfg->preDelayMs, p->preDelaySmpCnt);
  236. // initialize the audio file reader
  237. if( cmAudioFileRdOpen( p->afRdPtr, p->hopSmpCnt, inAudioFn, p->cfg.audioChIdx, 0, cmInvalidIdx ) != cmOkRC )
  238. {
  239. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader open failed.");
  240. goto errLabel;
  241. }
  242. // initialize the phase vocoder
  243. if( cmPvAnlInit( p->pvocPtr, p->hopSmpCnt, p->afInfo.srate, p->fftSmpCnt, p->hopSmpCnt, kNoCalcHzPvaFl ) != cmOkRC )
  244. {
  245. rc = cmErrMsg(&p->err,kDspProcFailOnRC," The phase vocoder initialization failed.");
  246. goto errLabel;
  247. }
  248. rc = _cmOnsetExec(p);
  249. errLabel:
  250. return rc;
  251. }
  252. unsigned cmOnsetCount( cmOnH_t h )
  253. {
  254. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  255. unsigned i;
  256. unsigned n = 0;
  257. for(i=0; i<p->frmCnt; ++i)
  258. if( p->dfV[i] > 0 )
  259. ++n;
  260. return n;
  261. }
  262. unsigned cmOnsetSampleIndex( cmOnH_t h, unsigned idx )
  263. {
  264. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  265. unsigned i;
  266. unsigned n = 0;
  267. for(i=0; i<p->frmCnt; ++i)
  268. if( p->dfV[i] > 0 )
  269. {
  270. if( n == idx )
  271. {
  272. unsigned r = i * p->hopSmpCnt;
  273. if( r > p->preDelaySmpCnt )
  274. return r-p->preDelaySmpCnt;
  275. return 0;
  276. }
  277. ++n;
  278. }
  279. return cmInvalidIdx;
  280. }
  281. unsigned cmOnsetHopSampleCount( cmOnH_t h )
  282. {
  283. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  284. return p->hopSmpCnt;
  285. }
  286. cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* outTextFn)
  287. {
  288. enum { kChCnt = 2 };
  289. cmOnRC_t rc = kOkOnRC;
  290. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  291. cmSample_t out0V[ p->hopSmpCnt ];
  292. cmSample_t out1V[ p->hopSmpCnt ];
  293. cmSample_t* aoutV[kChCnt];
  294. unsigned pdn = 0;
  295. aoutV[0] = out0V;
  296. aoutV[1] = out1V;
  297. // initalize the audio output file
  298. if( outAudioFn != NULL )
  299. if( cmAudioFileIsValid( p->afH = cmAudioFileNewCreate( outAudioFn, p->afInfo.srate, p->afInfo.bits, kChCnt, NULL, p->err.rpt)) == false )
  300. {
  301. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC, "The audio output file '%s' could not be opened.", outAudioFn);
  302. goto errLabel;
  303. }
  304. // open the text output file
  305. if( outTextFn != NULL )
  306. {
  307. if( cmFileOpen( &p->txH, outTextFn, kWriteFileFl, p->err.rpt ) != kOkFileRC )
  308. {
  309. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC, "The text output file '%s' could not be opened.",outTextFn);
  310. goto errLabel;
  311. }
  312. cmFilePrint(p->txH,"{\n onsetArray : \n[\n");
  313. }
  314. unsigned fi;
  315. for(fi=0; fi<p->frmCnt; ++fi)
  316. {
  317. // count of samples to write to the audio output file
  318. unsigned osn = p->hopSmpCnt;
  319. // audio channel 1 is filled with the spectral flux
  320. // initialize the out
  321. cmVOS_Fill(out1V,p->hopSmpCnt,p->sfV[fi]/p->maxSf);
  322. cmVOS_Zero(out0V,p->hopSmpCnt);
  323. if( p->dfV[fi] > 0 )
  324. {
  325. // audio channel 0 is set with the detection indicators
  326. unsigned smpIdx = fi * p->hopSmpCnt + p->hopSmpCnt/2;
  327. out0V[ p->hopSmpCnt/2 ] = p->sfV[fi]/p->maxSf;
  328. // if the pre-delay is still active
  329. if( pdn < p->preDelaySmpCnt )
  330. {
  331. osn = 0;
  332. pdn += p->hopSmpCnt;
  333. if( pdn > p->preDelaySmpCnt )
  334. osn = pdn - p->preDelaySmpCnt;
  335. }
  336. // write the output text file
  337. if( cmFileIsValid(p->txH) )
  338. if( cmFilePrintf(p->txH, "[ %i, %f ]\n", smpIdx, p->sfV[fi] ) != kOkFileRC )
  339. {
  340. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"Text output write to '%s' failed.", cmFileName(p->txH));
  341. goto errLabel;
  342. }
  343. }
  344. // write the output audio file
  345. if( osn > 0 && cmAudioFileIsValid(p->afH) )
  346. {
  347. if( cmAudioFileWriteFloat(p->afH, osn, kChCnt, aoutV ) != kOkAfRC )
  348. {
  349. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"Audio file write to '%s' failed.",cmAudioFileName(p->afH));
  350. goto errLabel;
  351. }
  352. }
  353. }
  354. // close the output audio file
  355. if( cmAudioFileDelete(&p->afH) != kOkAfRC )
  356. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"The audio file close failed.");
  357. // close the text file
  358. if( cmFileIsValid(p->txH) )
  359. {
  360. cmFilePrint(p->txH,"]\n}\n");
  361. if( cmFileClose(&p->txH) != kOkFileRC )
  362. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"The text file close failed.");
  363. }
  364. errLabel:
  365. return rc;
  366. }
  367. cmOnRC_t cmOnsetTest( cmCtx_t* c )
  368. {
  369. cmOnsetCfg_t cfg;
  370. cmOnH_t h = cmOnsetNullHandle;
  371. cmOnRC_t rc = kOkOnRC;
  372. const cmChar_t* inAudioFn = "/home/kevin/media/audio/20110723-Kriesberg/Audio Files/Piano 3_15.wav";
  373. const cmChar_t* outAudioFn = "/home/kevin/temp/ons/ons0.aif";
  374. const cmChar_t* outTextFn = "/home/kevin/temp/ons/ons0.txt";
  375. cfg.wndMs = 42;
  376. cfg.hopFact = 4;
  377. cfg.audioChIdx = 0;
  378. cfg.wndFrmCnt = 3;
  379. cfg.preWndMult = 3;
  380. cfg.threshold = 0.6;
  381. cfg.maxFrqHz = 24000;
  382. cfg.filtCoeff = -0.7;
  383. cfg.medFiltWndMs = 50;
  384. cfg.filterId = kMedianFiltId;
  385. cfg.preDelayMs = 20;
  386. if((rc = cmOnsetInitialize(c,&h)) != kOkOnRC )
  387. goto errLabel;
  388. if((rc = cmOnsetProc(h,&cfg,inAudioFn)) == kOkOnRC )
  389. cmOnsetWrite(h,outAudioFn,outTextFn);
  390. errLabel:
  391. cmOnsetFinalize(&h);
  392. return rc;
  393. }