|
@@ -42,7 +42,8 @@ typedef struct
|
42
|
42
|
unsigned fftSmpCnt;
|
43
|
43
|
unsigned hopSmpCnt;
|
44
|
44
|
unsigned binCnt;
|
45
|
|
-
|
|
45
|
+ unsigned medFiltFrmCnt;
|
|
46
|
+ unsigned preDelaySmpCnt;
|
46
|
47
|
} _cmOn_t;
|
47
|
48
|
|
48
|
49
|
cmOnH_t cmOnsetNullHandle = cmSTATIC_NULL_HANDLE;
|
|
@@ -160,7 +161,7 @@ cmOnRC_t _cmOnsetExec( _cmOn_t* p )
|
160
|
161
|
for(; fi<p->frmCnt && cmAudioFileRdRead(p->afRdPtr) != cmEofRC; ++fi )
|
161
|
162
|
{
|
162
|
163
|
// calc the spectrum
|
163
|
|
- while( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ) )
|
|
164
|
+ if( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ))
|
164
|
165
|
{
|
165
|
166
|
unsigned i;
|
166
|
167
|
|
|
@@ -182,7 +183,23 @@ cmOnRC_t _cmOnsetExec( _cmOn_t* p )
|
182
|
183
|
p->sfV[fi] = sf;
|
183
|
184
|
|
184
|
185
|
// filter the spectral flux
|
185
|
|
- cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
|
|
186
|
+ switch( p->cfg.filterId)
|
|
187
|
+ {
|
|
188
|
+ case kNoneFiltId:
|
|
189
|
+ break;
|
|
190
|
+ case kSmoothFiltId:
|
|
191
|
+ cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
|
|
192
|
+ break;
|
|
193
|
+ case kMedianFiltId:
|
|
194
|
+ {
|
|
195
|
+ cmReal_t* mfb = p->sfV + cmMax(0,fi-17);
|
|
196
|
+ if( mfb < p->sfV-3 )
|
|
197
|
+ p->sfV[fi] = cmVOR_Median(mfb,p->sfV-mfb);
|
|
198
|
+ }
|
|
199
|
+ break;
|
|
200
|
+ default:
|
|
201
|
+ { assert(0); }
|
|
202
|
+ }
|
186
|
203
|
|
187
|
204
|
if( fi >= prog*p->frmCnt )
|
188
|
205
|
{
|
|
@@ -228,8 +245,7 @@ cmOnRC_t _cmOnsetExec( _cmOn_t* p )
|
228
|
245
|
// if the cur value is greater than the mean of the extended window plus a threshold
|
229
|
246
|
if( p->sfV[fi] > cmVOR_Mean(p->sfV + bi, nn ) + p->cfg.threshold )
|
230
|
247
|
{
|
231
|
|
- p->dfV[fi] = p->sfV[fi];
|
232
|
|
-
|
|
248
|
+ p->dfV[fi] = p->sfV[fi];
|
233
|
249
|
}
|
234
|
250
|
}
|
235
|
251
|
|
|
@@ -258,12 +274,18 @@ cmOnRC_t cmOnsetProc( cmOnH_t h, const cmOnsetCfg_t* cfg, const cmChar_t* inAudi
|
258
|
274
|
goto errLabel;
|
259
|
275
|
}
|
260
|
276
|
|
261
|
|
- p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
|
262
|
|
- p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
|
263
|
|
- p->binCnt = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
|
264
|
|
- p->frmCnt = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
|
265
|
|
- p->sfV = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
|
266
|
|
- p->dfV = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
|
|
277
|
+ p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
|
|
278
|
+ p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
|
|
279
|
+ p->binCnt = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
|
|
280
|
+ p->frmCnt = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
|
|
281
|
+ p->sfV = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
|
|
282
|
+ p->dfV = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
|
|
283
|
+ p->medFiltFrmCnt = cmMax(3,floor(cfg->medFiltWndMs * p->afInfo.srate / (1000.0 * p->hopSmpCnt)));
|
|
284
|
+ p->preDelaySmpCnt= floor(cfg->preDelayMs * p->afInfo.srate / 1000.0);
|
|
285
|
+
|
|
286
|
+ cmRptPrintf(p->err.rpt,"Analysis Hop Duration: %8.2f ms %i smp\n",(double)p->hopSmpCnt*1000/p->afInfo.srate,p->hopSmpCnt);
|
|
287
|
+ cmRptPrintf(p->err.rpt,"Median Filter Window: %8.2f ms %i frames\n",cfg->medFiltWndMs,p->medFiltFrmCnt);
|
|
288
|
+ cmRptPrintf(p->err.rpt,"Detection Pre-delay: %8.2f ms %i smp\n",cfg->preDelayMs, p->preDelaySmpCnt);
|
267
|
289
|
|
268
|
290
|
// initialize the audio file reader
|
269
|
291
|
if( cmAudioFileRdOpen( p->afRdPtr, p->hopSmpCnt, inAudioFn, p->cfg.audioChIdx, 0, cmInvalidIdx ) != cmOkRC )
|
|
@@ -295,6 +317,7 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
|
295
|
317
|
cmSample_t out0V[ p->hopSmpCnt ];
|
296
|
318
|
cmSample_t out1V[ p->hopSmpCnt ];
|
297
|
319
|
cmSample_t* aoutV[kChCnt];
|
|
320
|
+ unsigned pdn = 0;
|
298
|
321
|
|
299
|
322
|
aoutV[0] = out0V;
|
300
|
323
|
aoutV[1] = out1V;
|
|
@@ -319,12 +342,12 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
|
319
|
342
|
cmFilePrint(p->txH,"{\n onsetArray : \n[\n");
|
320
|
343
|
}
|
321
|
344
|
|
322
|
|
- // rewind the audio file
|
323
|
|
- cmAudioFileRdSeek(p->afRdPtr,0);
|
324
|
|
-
|
325
|
345
|
unsigned fi;
|
326
|
346
|
for(fi=0; fi<p->frmCnt; ++fi)
|
327
|
347
|
{
|
|
348
|
+ // count of samples to write to the audio output file
|
|
349
|
+ unsigned osn = p->hopSmpCnt;
|
|
350
|
+
|
328
|
351
|
// audio channel 1 is filled with the spectral flux
|
329
|
352
|
// initialize the out
|
330
|
353
|
cmVOS_Fill(out1V,p->hopSmpCnt,p->sfV[fi]/p->maxSf);
|
|
@@ -337,6 +360,18 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
|
337
|
360
|
out0V[ p->hopSmpCnt/2 ] = p->sfV[fi]/p->maxSf;
|
338
|
361
|
|
339
|
362
|
|
|
363
|
+ // if the pre-delay is still active
|
|
364
|
+ if( pdn < p->preDelaySmpCnt )
|
|
365
|
+ {
|
|
366
|
+ osn = 0;
|
|
367
|
+
|
|
368
|
+ pdn += p->hopSmpCnt;
|
|
369
|
+
|
|
370
|
+ if( pdn > p->preDelaySmpCnt )
|
|
371
|
+ osn = pdn - p->preDelaySmpCnt;
|
|
372
|
+ }
|
|
373
|
+
|
|
374
|
+
|
340
|
375
|
// write the output text file
|
341
|
376
|
if( cmFileIsValid(p->txH) )
|
342
|
377
|
if( cmFilePrintf(p->txH, "[ %i, %f ]\n", smpIdx, p->sfV[fi] ) != kOkFileRC )
|
|
@@ -347,11 +382,9 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
|
347
|
382
|
}
|
348
|
383
|
|
349
|
384
|
// write the output audio file
|
350
|
|
- if( cmAudioFileIsValid(p->afH) && cmAudioFileRdRead(p->afRdPtr) == cmOkRC )
|
|
385
|
+ if( osn > 0 && cmAudioFileIsValid(p->afH) )
|
351
|
386
|
{
|
352
|
|
- aoutV[0] = p->afRdPtr->outV;
|
353
|
|
-
|
354
|
|
- if( cmAudioFileWriteFloat(p->afH, p->hopSmpCnt, kChCnt, aoutV ) != kOkAfRC )
|
|
387
|
+ if( cmAudioFileWriteFloat(p->afH, osn, kChCnt, aoutV ) != kOkAfRC )
|
355
|
388
|
{
|
356
|
389
|
rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"Audio file write to '%s' failed.",cmAudioFileName(p->afH));
|
357
|
390
|
goto errLabel;
|
|
@@ -383,9 +416,9 @@ cmOnRC_t cmOnsetTest( cmCtx_t* c )
|
383
|
416
|
cmOnsetCfg_t cfg;
|
384
|
417
|
cmOnH_t h = cmOnsetNullHandle;
|
385
|
418
|
cmOnRC_t rc = kOkOnRC;
|
386
|
|
- const cmChar_t* inAudioFn = "/home/kevin/temp/onset0.wav";
|
387
|
|
- const cmChar_t* outAudioFn = "/home/kevin/temp/mas/mas0.aif";
|
388
|
|
- const cmChar_t* outTextFn = "/home/kevin/temp/mas/mas0.txt";
|
|
419
|
+ const cmChar_t* inAudioFn = "/home/kevin/media/audio/20110723-Kriesberg/Audio Files/Piano 3_15.wav";
|
|
420
|
+ const cmChar_t* outAudioFn = "/home/kevin/temp/ons/ons0.aif";
|
|
421
|
+ const cmChar_t* outTextFn = "/home/kevin/temp/ons/ons0.txt";
|
389
|
422
|
|
390
|
423
|
cfg.wndMs = 42;
|
391
|
424
|
cfg.hopFact = 4;
|
|
@@ -395,6 +428,9 @@ cmOnRC_t cmOnsetTest( cmCtx_t* c )
|
395
|
428
|
cfg.threshold = 0.6;
|
396
|
429
|
cfg.maxFrqHz = 24000;
|
397
|
430
|
cfg.filtCoeff = -0.7;
|
|
431
|
+ cfg.medFiltWndMs = 50;
|
|
432
|
+ cfg.filterId = kMedianFiltId;
|
|
433
|
+ cfg.preDelayMs = 20;
|
398
|
434
|
|
399
|
435
|
if((rc = cmOnsetInitialize(c,&h)) != kOkOnRC )
|
400
|
436
|
goto errLabel;
|