Procházet zdrojové kódy

cmOnset.h/c : Added median filter and pre-delay to audio output.

master
kpl před 11 roky
rodič
revize
797e73e958
2 změnil soubory, kde provedl 69 přidání a 26 odebrání
  1. 57
    21
      app/cmOnset.c
  2. 12
    5
      app/cmOnset.h

+ 57
- 21
app/cmOnset.c Zobrazit soubor

@@ -42,7 +42,8 @@ typedef struct
42 42
   unsigned          fftSmpCnt;
43 43
   unsigned          hopSmpCnt;
44 44
   unsigned          binCnt;
45
-
45
+  unsigned          medFiltFrmCnt;
46
+  unsigned          preDelaySmpCnt;
46 47
 } _cmOn_t;
47 48
 
48 49
 cmOnH_t cmOnsetNullHandle = cmSTATIC_NULL_HANDLE;
@@ -160,7 +161,7 @@ cmOnRC_t _cmOnsetExec( _cmOn_t* p )
160 161
   for(; fi<p->frmCnt && cmAudioFileRdRead(p->afRdPtr) != cmEofRC; ++fi )
161 162
   {
162 163
     // calc the spectrum 
163
-    while( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ) )
164
+    if( cmPvAnlExec(p->pvocPtr, p->afRdPtr->outV, p->afRdPtr->outN ))
164 165
     {
165 166
       unsigned i;
166 167
 
@@ -182,7 +183,23 @@ cmOnRC_t _cmOnsetExec( _cmOn_t* p )
182 183
       p->sfV[fi] = sf;
183 184
 
184 185
       // filter the spectral flux 
185
-      cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
186
+      switch( p->cfg.filterId)
187
+      {
188
+        case kNoneFiltId:
189
+          break;
190
+        case kSmoothFiltId:
191
+          cmVOR_Filter( p->sfV + fi, 1, &sf, 1, b0, b, a, d, 1 );
192
+          break;
193
+        case kMedianFiltId:
194
+          {
195
+            cmReal_t* mfb = p->sfV + cmMax(0,fi-17);
196
+            if( mfb < p->sfV-3 )
197
+              p->sfV[fi] = cmVOR_Median(mfb,p->sfV-mfb);
198
+          }
199
+          break;
200
+        default:
201
+          { assert(0); }
202
+      }
186 203
 
187 204
       if( fi >= prog*p->frmCnt )
188 205
       {
@@ -228,8 +245,7 @@ cmOnRC_t _cmOnsetExec( _cmOn_t* p )
228 245
       // if the cur value is greater than the mean of the extended window plus a threshold
229 246
       if( p->sfV[fi] > cmVOR_Mean(p->sfV + bi, nn ) + p->cfg.threshold )
230 247
       {
231
-        p->dfV[fi]              = p->sfV[fi];
232
-
248
+        p->dfV[fi]  = p->sfV[fi];
233 249
       }
234 250
     }
235 251
 
@@ -258,12 +274,18 @@ cmOnRC_t cmOnsetProc( cmOnH_t h, const cmOnsetCfg_t* cfg, const cmChar_t* inAudi
258 274
     goto errLabel;
259 275
   }
260 276
 
261
-  p->fftSmpCnt = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
262
-  p->hopSmpCnt = p->fftSmpCnt / p->cfg.hopFact;
263
-  p->binCnt    = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
264
-  p->frmCnt    = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
265
-  p->sfV       = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
266
-  p->dfV       = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
277
+  p->fftSmpCnt     = cmNearPowerOfTwo( (unsigned)floor( p->cfg.wndMs * p->afInfo.srate / 1000.0 ) );
278
+  p->hopSmpCnt     = p->fftSmpCnt / p->cfg.hopFact;
279
+  p->binCnt        = cmMin(p->fftSmpCnt/2 + 1, floor(p->cfg.maxFrqHz / (p->afInfo.srate / p->fftSmpCnt)));
280
+  p->frmCnt        = (p->afInfo.frameCnt - p->fftSmpCnt) / p->hopSmpCnt;
281
+  p->sfV           = cmMemResizeZ(cmReal_t,p->sfV,p->frmCnt);
282
+  p->dfV           = cmMemResizeZ(cmReal_t,p->dfV,p->frmCnt);
283
+  p->medFiltFrmCnt = cmMax(3,floor(cfg->medFiltWndMs * p->afInfo.srate / (1000.0 * p->hopSmpCnt)));
284
+  p->preDelaySmpCnt= floor(cfg->preDelayMs * p->afInfo.srate / 1000.0);
285
+
286
+  cmRptPrintf(p->err.rpt,"Analysis Hop Duration: %8.2f ms %i smp\n",(double)p->hopSmpCnt*1000/p->afInfo.srate,p->hopSmpCnt);  
287
+  cmRptPrintf(p->err.rpt,"Median Filter Window:  %8.2f ms %i frames\n",cfg->medFiltWndMs,p->medFiltFrmCnt);
288
+  cmRptPrintf(p->err.rpt,"Detection Pre-delay:   %8.2f ms %i smp\n",cfg->preDelayMs, p->preDelaySmpCnt);
267 289
 
268 290
   // initialize the audio file reader
269 291
   if( cmAudioFileRdOpen( p->afRdPtr, p->hopSmpCnt, inAudioFn, p->cfg.audioChIdx, 0, cmInvalidIdx ) != cmOkRC )
@@ -295,6 +317,7 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
295 317
   cmSample_t  out0V[ p->hopSmpCnt ];  
296 318
   cmSample_t  out1V[ p->hopSmpCnt ];
297 319
   cmSample_t* aoutV[kChCnt];
320
+  unsigned    pdn = 0;
298 321
 
299 322
   aoutV[0] = out0V;
300 323
   aoutV[1] = out1V;
@@ -319,12 +342,12 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
319 342
     cmFilePrint(p->txH,"{\n onsetArray : \n[\n");
320 343
   }
321 344
 
322
-  // rewind the audio file
323
-  cmAudioFileRdSeek(p->afRdPtr,0);
324
-
325 345
   unsigned fi;
326 346
   for(fi=0; fi<p->frmCnt; ++fi)
327 347
   {
348
+    // count of samples to write to the audio output file
349
+    unsigned osn = p->hopSmpCnt;
350
+
328 351
     // audio channel 1 is filled with the spectral flux
329 352
     // initialize the out
330 353
     cmVOS_Fill(out1V,p->hopSmpCnt,p->sfV[fi]/p->maxSf);
@@ -337,6 +360,18 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
337 360
       out0V[ p->hopSmpCnt/2 ] = p->sfV[fi]/p->maxSf;
338 361
 
339 362
 
363
+      // if the pre-delay is still active
364
+      if( pdn < p->preDelaySmpCnt )
365
+      {
366
+        osn = 0;
367
+
368
+        pdn += p->hopSmpCnt;
369
+
370
+        if( pdn > p->preDelaySmpCnt )
371
+          osn = pdn - p->preDelaySmpCnt;
372
+      }
373
+
374
+
340 375
       // write the output text file
341 376
       if( cmFileIsValid(p->txH) )
342 377
         if( cmFilePrintf(p->txH, "[ %i, %f ]\n", smpIdx, p->sfV[fi] ) != kOkFileRC )
@@ -347,11 +382,9 @@ cmOnRC_t cmOnsetWrite( cmOnH_t h, const cmChar_t* outAudioFn, const cmChar_t* ou
347 382
     }
348 383
 
349 384
     // write the output audio file
350
-    if( cmAudioFileIsValid(p->afH) && cmAudioFileRdRead(p->afRdPtr) == cmOkRC )
385
+    if( osn > 0 && cmAudioFileIsValid(p->afH) )
351 386
     {
352
-      aoutV[0] = p->afRdPtr->outV;
353
-
354
-      if( cmAudioFileWriteFloat(p->afH, p->hopSmpCnt, kChCnt, aoutV ) != kOkAfRC )
387
+      if( cmAudioFileWriteFloat(p->afH, osn, kChCnt, aoutV ) != kOkAfRC )
355 388
       {
356 389
         rc = cmErrMsg(&p->err,kDspAudioFileFailOnRC,"Audio file write to '%s' failed.",cmAudioFileName(p->afH));
357 390
         goto errLabel;
@@ -383,9 +416,9 @@ cmOnRC_t cmOnsetTest( cmCtx_t* c )
383 416
   cmOnsetCfg_t    cfg;
384 417
   cmOnH_t         h          = cmOnsetNullHandle;
385 418
   cmOnRC_t        rc         = kOkOnRC;
386
-  const cmChar_t* inAudioFn  = "/home/kevin/temp/onset0.wav";
387
-  const cmChar_t* outAudioFn = "/home/kevin/temp/mas/mas0.aif";
388
-  const cmChar_t* outTextFn  = "/home/kevin/temp/mas/mas0.txt";
419
+  const cmChar_t* inAudioFn  = "/home/kevin/media/audio/20110723-Kriesberg/Audio Files/Piano 3_15.wav";
420
+  const cmChar_t* outAudioFn = "/home/kevin/temp/ons/ons0.aif";
421
+  const cmChar_t* outTextFn  = "/home/kevin/temp/ons/ons0.txt";
389 422
 
390 423
   cfg.wndMs      = 42;
391 424
   cfg.hopFact    = 4;
@@ -395,6 +428,9 @@ cmOnRC_t cmOnsetTest( cmCtx_t* c )
395 428
   cfg.threshold  = 0.6;
396 429
   cfg.maxFrqHz   = 24000;
397 430
   cfg.filtCoeff  = -0.7;
431
+  cfg.medFiltWndMs = 50;
432
+  cfg.filterId     = kMedianFiltId;
433
+  cfg.preDelayMs   = 20;
398 434
 
399 435
   if((rc = cmOnsetInitialize(c,&h)) != kOkOnRC )
400 436
     goto errLabel;

+ 12
- 5
app/cmOnset.h Zobrazit soubor

@@ -16,17 +16,24 @@ extern "C" {
16 16
   typedef cmRC_t     cmOnRC_t;
17 17
   typedef cmHandle_t cmOnH_t;
18 18
 
19
+  enum { kNoneFiltId, kSmoothFiltId, kMedianFiltId };
20
+
19 21
   typedef struct
20 22
   {
21 23
     double   wndMs;
22 24
     unsigned hopFact;
23 25
     unsigned audioChIdx;
24 26
 
25
-    unsigned wndFrmCnt;   // 
26
-    double   preWndMult;  //
27
-    double   threshold;   //
28
-    double   maxFrqHz;    //
29
-    double   filtCoeff;   //
27
+    unsigned wndFrmCnt;   // Detection window length
28
+    double   preWndMult;  // Detection window stretch factor prior to current location.
29
+    double   threshold;   // Spectal flux detection threshold
30
+    double   maxFrqHz;    // Ignore frequencies above maxFrqHz during processing.
31
+    double   filtCoeff;   // smoothing filter coeff (-.7)
32
+    double   medFiltWndMs;// median filter window in milliseconds
33
+    unsigned filterId;    // kSmoothFiltId || kMedianFiltId
34
+    double   preDelayMs;  // move each detection preDelayMs backwards in time 
35
+                          // on the audio output. (compensates for detection delay due to filtering)
36
+
30 37
     
31 38
   } cmOnsetCfg_t;
32 39
 

Načítá se…
Zrušit
Uložit