libcm is a C development framework with an emphasis on audio signal processing applications.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cmOnset.c 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. #include "cmGlobal.h"
  2. #include "cmFloatTypes.h"
  3. #include "cmComplexTypes.h"
  4. #include "cmRpt.h"
  5. #include "cmErr.h"
  6. #include "cmCtx.h"
  7. #include "cmMem.h"
  8. #include "cmMallocDebug.h"
  9. #include "cmLinkedHeap.h"
  10. #include "cmSymTbl.h"
  11. #include "cmAudioFile.h"
  12. #include "cmMidi.h"
  13. #include "cmFile.h"
  14. #include "cmMath.h"
  15. #include "cmProcObj.h"
  16. #include "cmProcTemplateMain.h"
  17. #include "cmProc.h"
  18. #include "cmProc2.h"
  19. #include "cmVectOps.h"
  20. #include "cmOnset.h"
  21. typedef struct
  22. {
  23. cmErr_t err;
  24. cmOnsetCfg_t cfg;
  25. cmCtx* ctxPtr;
  26. cmAudioFileRd* afRdPtr;
  27. cmPvAnl* pvocPtr;
  28. cmAudioFileH_t afH; // output audio file
  29. cmFileH_t txH; // output text file
  30. unsigned frmCnt; // spectral frame count
  31. cmReal_t* sfV; // sfV[frmCnt] spectral flux vector
  32. cmReal_t* dfV; // dfV[frmCnt] onset function vector
  33. cmAudioFileInfo_t afInfo;
  34. unsigned fftSmpCnt;
  35. unsigned hopSmpCnt;
  36. unsigned binCnt;
  37. } _cmOn_t;
  38. cmOnH_t cmOnsetNullHandle = cmSTATIC_NULL_HANDLE;
  39. _cmOn_t* _cmOnsetHandleToPtr( cmOnH_t h )
  40. {
  41. _cmOn_t* p = (_cmOn_t*)h.h;
  42. assert(p!=NULL);
  43. return p;
  44. }
  45. cmOnRC_t _cmOnsetFinalize( _cmOn_t* p )
  46. {
  47. cmOnRC_t rc = kOkOnRC;
  48. if( cmPvAnlFree(&p->pvocPtr) != cmOkRC )
  49. {
  50. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Phase voocoder free failed.");
  51. goto errLabel;
  52. }
  53. if( cmAudioFileRdFree(&p->afRdPtr) != cmOkRC )
  54. {
  55. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Audio file reader failed.");
  56. goto errLabel;
  57. }
  58. if( cmCtxFree(&p->ctxPtr) != cmOkRC )
  59. {
  60. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"Context proc failed.");
  61. goto errLabel;
  62. }
  63. cmMemPtrFree(&p->sfV);
  64. cmMemPtrFree(&p->dfV);
  65. cmMemPtrFree(&p);
  66. errLabel:
  67. return rc;
  68. }
  69. cmOnRC_t cmOnsetInitialize( cmCtx_t* c, cmOnH_t* hp )
  70. {
  71. cmOnRC_t rc;
  72. if((rc = cmOnsetFinalize(hp)) != kOkOnRC )
  73. return rc;
  74. _cmOn_t* p = cmMemAllocZ(_cmOn_t,1);
  75. cmErrSetup(&p->err,&c->rpt,"Onset");
  76. // create the proc context object
  77. if((p->ctxPtr = cmCtxAlloc(NULL,&c->rpt,cmLHeapNullHandle,cmSymTblNullHandle)) == NULL )
  78. {
  79. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The ctx compoenent allocation failed.");
  80. goto errLabel;
  81. }
  82. // create the audio file reader
  83. if((p->afRdPtr = cmAudioFileRdAlloc( p->ctxPtr, NULL, 0, NULL, cmInvalidIdx, 0, cmInvalidIdx )) == NULL )
  84. {
  85. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader allocation failed.");
  86. goto errLabel;
  87. }
  88. // create the phase vocoder
  89. if((p->pvocPtr = cmPvAnlAlloc( p->ctxPtr, NULL, 0, 0, 0, 0, 0 )) == NULL )
  90. {
  91. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The phase vocoder allocation failed.");
  92. goto errLabel;
  93. }
  94. hp->h = p;
  95. errLabel:
  96. if( rc != kOkOnRC )
  97. _cmOnsetFinalize(p);
  98. return rc;
  99. }
  100. cmOnRC_t cmOnsetFinalize( cmOnH_t* hp )
  101. {
  102. cmOnRC_t rc = kOkOnRC;
  103. if( hp==NULL || cmOnsetIsValid(*hp)==false )
  104. return kOkOnRC;
  105. _cmOn_t* p = _cmOnsetHandleToPtr(*hp);
  106. rc = _cmOnsetFinalize(p);
  107. return rc;
  108. }
  109. bool cmOnsetIsValid( cmOnH_t h )
  110. { return h.h!=NULL; }
  111. cmOnRC_t _cmOnsetExec( _cmOn_t* p, unsigned chCnt )
  112. {
  113. cmOnRC_t rc = kOkOnRC;
  114. int fi = 0;
  115. unsigned binCnt = p->binCnt; //p->pvocPtr->binCnt;
  116. cmReal_t mag0V[ binCnt ];
  117. cmSample_t out0V[ p->hopSmpCnt ];
  118. cmSample_t out1V[ p->hopSmpCnt ];
  119. cmSample_t* aoutV[chCnt];
  120. double prog = 0.1;
  121. cmReal_t b0 = 1;
  122. cmReal_t b[] = {1 };
  123. cmReal_t a[] = {p->cfg.filtCoeff};
  124. cmReal_t d[] = {0};
  125. cmReal_t maxVal = 0;
  126. if( chCnt > 0 )
  127. aoutV[0] = out0V;
  128. if( chCnt > 1 )
  129. aoutV[1] = out1V;
  130. cmVOR_Zero(mag0V,binCnt);
  131. // for each frame - read the next block of audio
  132. for(; fi<p->frmCnt && cmAudioFileRdRead(p->afRdPtr) != cmEofRC; ++fi )
  133. {
  134. // calc the spectrum
  135. while( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ) )
  136. {
  137. unsigned i;
  138. // calc the spectral flux into sfV[fi].
  139. cmReal_t sf = 0;
  140. for(i=0; i<binCnt; ++i)
  141. {
  142. cmReal_t m1 = p->pvocPtr->magV[i] * 2.0;
  143. if( m1 > maxVal )
  144. maxVal = m1;
  145. cmReal_t dif = m1 - mag0V[i]; // calc. spectral flux
  146. if( dif > 0 )
  147. sf += dif; // accum. flux
  148. mag0V[i] = m1; // store magn. for next frame
  149. }
  150. p->sfV[fi] = sf;
  151. // filter the spectral flux
  152. cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
  153. if( fi >= prog*p->frmCnt )
  154. {
  155. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  156. prog += 0.1;
  157. }
  158. }
  159. }
  160. p->frmCnt = fi;
  161. // normalize the spectral flux vector
  162. cmReal_t mean = cmVOR_Mean(p->sfV,p->frmCnt);
  163. cmReal_t stdDev = sqrt(cmVOR_Variance(p->sfV, p->frmCnt, &mean ));
  164. cmVOR_SubVS(p->sfV,p->frmCnt,mean);
  165. cmVOR_DivVS(p->sfV,p->frmCnt,stdDev);
  166. cmReal_t maxSf = cmVOR_Max(p->sfV,p->frmCnt,1);
  167. prog = 0.1;
  168. printf("max:%f ",maxVal);
  169. printf("mean:%f max:%f sd:%f\n",mean,maxSf,stdDev);
  170. // Pick peaks from the onset detection function using a subset
  171. // of the rules from Dixon, 2006, Onset Detection Revisited.
  172. // locate the onsets and store them in dfV[]
  173. for(fi=0; fi<p->frmCnt; ++fi)
  174. {
  175. int bi = cmMax(0, fi - p->cfg.wndFrmCnt); // begin wnd index
  176. int ei = cmMin(p->frmCnt, fi + p->cfg.wndFrmCnt); // end wnd index
  177. int nn = ei - bi; // wnd frm cnt
  178. int wi = fi < p->cfg.wndFrmCnt ? fi : p->cfg.wndFrmCnt; // cur wnd index
  179. // initialize the out
  180. cmVOS_Fill(out1V,p->hopSmpCnt,p->sfV[fi]/maxSf);
  181. cmVOS_Zero(out0V,p->hopSmpCnt);
  182. p->dfV[fi] = 0;
  183. // if cur index is a peak in the window
  184. if( cmVOR_MaxIndex(p->sfV + bi, nn, 1 ) == wi )
  185. {
  186. // calc an extended window going backwards in time
  187. bi = cmMax(0, fi - p->cfg.wndFrmCnt * p->cfg.preWndMult );
  188. nn = ei - bi;
  189. // if the cur value is greater than the mean of the extended window plus a threshold
  190. if( p->sfV[fi] > cmVOR_Mean(p->sfV + bi, nn ) + p->cfg.threshold )
  191. {
  192. p->dfV[fi] = p->sfV[fi];
  193. out0V[ p->hopSmpCnt/2 ] = p->sfV[fi]/maxSf;
  194. unsigned smpIdx = fi * p->hopSmpCnt + p->hopSmpCnt/2;
  195. // write the output text file
  196. if( cmFilePrintf(p->txH, "[ %i, %f ]\n", smpIdx, p->sfV[fi] ) != kOkFileRC )
  197. {
  198. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"Text output write to '%s' failed.", cmFileName(p->txH));
  199. goto errLabel;
  200. }
  201. }
  202. }
  203. // write the output audio file
  204. if( cmAudioFileWriteFloat(p->afH, p->hopSmpCnt, chCnt, aoutV ) != kOkAfRC )
  205. {
  206. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"Audio file write to '%s' failed.",cmAudioFileName(p->afH));
  207. goto errLabel;
  208. }
  209. if( fi >= prog*p->frmCnt )
  210. {
  211. cmRptPrintf(p->err.rpt,"%i ",lround(prog*10));
  212. prog += 0.1;
  213. }
  214. }
  215. errLabel:
  216. return rc;
  217. }
  218. cmOnRC_t cmOnsetExec( cmOnH_t h, const cmOnsetCfg_t* cfg, const cmChar_t* inAudioFn, const cmChar_t* outAudioFn, const cmChar_t* outTextFn )
  219. {
  220. cmOnRC_t rc = kOkOnRC;
  221. _cmOn_t* p = _cmOnsetHandleToPtr(h);
  222. unsigned audioOutChCnt = 2;
  223. p->cfg = *cfg;
  224. // get the audio file header information
  225. if( cmAudioFileGetInfo(inAudioFn, &p->afInfo, p->err.rpt ) != kOkAfRC )
  226. {
  227. rc = cmErrMsg(&p->err,kDspProcFailOnRC,"The audio file open failed on '%s'.",cmStringNullGuard(inAudioFn));
  228. goto errLabel;
  229. }
  230. p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
  231. p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
  232. p->binCnt = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
  233. p->frmCnt = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
  234. p->sfV = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
  235. p->dfV = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
  236. // initialize the audio file reader
  237. if( cmAudioFileRdOpen( p->afRdPtr, p->hopSmpCnt, inAudioFn, p->cfg.audioChIdx, 0, cmInvalidIdx ) != cmOkRC )
  238. {
  239. rc = cmErrMsg(&p->err,kDspProcFailOnRC, "The audio file reader open failed.");
  240. goto errLabel;
  241. }
  242. // initialize the phase vocoder
  243. if( cmPvAnlInit( p->pvocPtr, p->hopSmpCnt, p->afInfo.srate, p->fftSmpCnt, p->hopSmpCnt, kNoCalcHzPvaFl ) != cmOkRC )
  244. {
  245. rc = cmErrMsg(&p->err,kDspProcFailOnRC," The phase vocoder initialization failed.");
  246. goto errLabel;
  247. }
  248. // initalize the audio output file
  249. if( outAudioFn != NULL )
  250. if( cmAudioFileIsValid( p->afH = cmAudioFileNewCreate( outAudioFn, p->afInfo.srate, p->afInfo.bits, audioOutChCnt, NULL, p->err.rpt)) == false )
  251. {
  252. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC, "The audio output file '%s' could not be opened.", outAudioFn);
  253. goto errLabel;
  254. }
  255. // open the text output file
  256. if( outTextFn != NULL )
  257. {
  258. if( cmFileOpen( &p->txH, outTextFn, kWriteFileFl, p->err.rpt ) != kOkFileRC )
  259. {
  260. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC, "The text output file '%s' could not be opened.",outTextFn);
  261. goto errLabel;
  262. }
  263. cmFilePrint(p->txH,"{\n onsetArray : \n[\n");
  264. }
  265. rc = _cmOnsetExec(p,audioOutChCnt);
  266. errLabel:
  267. // close the output audio file
  268. if( cmAudioFileDelete(&p->afH) != kOkAfRC )
  269. rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"The audio file close failed.");
  270. // close the text file
  271. if( cmFileIsValid(p->txH) )
  272. {
  273. cmFilePrint(p->txH,"]\n}\n");
  274. if( cmFileClose(&p->txH) != kOkFileRC )
  275. rc = cmErrMsg(&p->err,kDspTextFileFailOnRC,"The text file close failed.");
  276. }
  277. return rc;
  278. }
  279. cmOnRC_t cmOnsetTest( cmCtx_t* c )
  280. {
  281. cmOnsetCfg_t cfg;
  282. cmOnH_t h = cmOnsetNullHandle;
  283. cmOnRC_t rc = kOkOnRC;
  284. const cmChar_t* inAudioFn = "/home/kevin/temp/onset0.wav";
  285. const cmChar_t* outAudioFn = "/home/kevin/temp/mas/mas0.aif";
  286. const cmChar_t* outTextFn = "/home/kevin/temp/mas/mas0.txt";
  287. cfg.wndMs = 42;
  288. cfg.hopFact = 4;
  289. cfg.audioChIdx = 0;
  290. cfg.wndFrmCnt = 3;
  291. cfg.preWndMult = 3;
  292. cfg.threshold = 0.6;
  293. cfg.maxFrqHz = 24000;
  294. cfg.filtCoeff = -0.7;
  295. if((rc = cmOnsetInitialize(c,&h)) != kOkOnRC )
  296. goto errLabel;
  297. rc = cmOnsetExec(h,&cfg,inAudioFn,outAudioFn,outTextFn);
  298. errLabel:
  299. cmOnsetFinalize(&h);
  300. return rc;
  301. }