Kaynağa Gözat

First working version.

master
kpl 4 yıl önce
ebeveyn
işleme
f69c71c498
7 değiştirilmiş dosya ile 659 ekleme ve 226 silme
  1. 8
    0
      AudioDevice.py
  2. 17
    0
      common.py
  3. 48
    27
      convert.py
  4. 76
    41
      p_ac.py
  5. 21
    3
      p_ac.yml
  6. 321
    155
      plot_seq.py
  7. 168
    0
      rms_analysis.py

+ 8
- 0
AudioDevice.py Dosyayı Görüntüle

@@ -181,6 +181,14 @@ class AudioDevice(object):
181 181
 
182 182
         return Result(smpN)
183 183
 
184
+    def buffer_sample_ms( self ):
185
+        r = self.buffer_sample_count()
186
+
187
+        if r:
188
+            r.value = int(r.value * 1000.0 / self.srate)
189
+
190
+        return r
191
+    
184 192
     def linear_buffer( self ):
185 193
 
186 194
         smpN = self.buffer_sample_count()

+ 17
- 0
common.py Dosyayı Görüntüle

@@ -0,0 +1,17 @@
1
+import yaml,types
2
+
3
+def parse_yaml_cfg( fn ):
4
+    """Parse the YAML configuration file."""
5
+    
6
+    cfg  = None
7
+    
8
+    with open(fn,"r") as f:
9
+        cfgD = yaml.load(f, Loader=yaml.FullLoader)
10
+
11
+        cfg = types.SimpleNamespace(**cfgD['p_ac'])
12
+
13
+    return cfg
14
+
15
+
16
+    
17
+

+ 48
- 27
convert.py Dosyayı Görüntüle

@@ -4,7 +4,7 @@ from shutil import copyfile
4 4
 def event_times( eventTimeFn ):
5 5
 
6 6
     eventL = []
7
-    
7
+    velL   = []
8 8
     with open(eventTimeFn,"r") as f:
9 9
 
10 10
         rdr = csv.reader(f)
@@ -14,21 +14,25 @@ def event_times( eventTimeFn ):
14 14
                 beginMs = int(row[1])
15 15
             elif row[0] == 'key_down':
16 16
                 key_downMs = int(row[1]) - beginMs
17
+                vel        = int(row[3])
17 18
             elif row[0] == 'key_up':
18 19
                 key_upMs = row[1]
19 20
 
20 21
                 eventL.append( [ key_downMs, key_downMs+1000 ] )
22
+                velL.append( vel )
21 23
 
22
-    return eventL
24
+    return eventL,velL
23 25
 
24
-def pulse_lengths( pulseLenFn ):
26
+def pulse_lengths( pulseLenFn, velL ):
25 27
 
26 28
     with open(pulseLenFn,'rb') as f:
27 29
         d = pickle.load(f)
28 30
         msL = d['msL']
29 31
         # note: first posn in table is a multiplier
30
-        return [ msL[i]*msL[0] for i in  range(1,len(msL))]
32
+        msL = [ msL[i]*msL[0] for i in  range(1,len(msL))]
33
+        usL = [ msL[vel-1] for vel in velL ]
31 34
 
35
+    return usL
32 36
 
33 37
 def convert( inDir, outDir ):
34 38
 
@@ -41,40 +45,57 @@ def convert( inDir, outDir ):
41 45
         
42 46
         if os.path.isdir(idir):
43 47
 
44
-            eventTimeFn = os.path.join( idir, "labels_0.csv" )
48
+            id = 0
49
+            while True:
50
+
51
+                eventTimeFn = os.path.join( idir, "labels_%i.csv" % (id) )
52
+
53
+                if not os.path.isfile( eventTimeFn ):
54
+                    break
55
+                
56
+                eventTimeL,velL = event_times(eventTimeFn)
57
+
58
+                pulseTimeFn = os.path.join( idir, "table_%i.pickle" % (id))
59
+
60
+                pulseUsL = pulse_lengths( pulseTimeFn, velL )
45 61
 
46
-            eventTimeL = event_times(eventTimeFn)
47
-            
48
-            pulseTimeFn = os.path.join( idir, "table_0.pickle")
62
+                pitch = idir.split("/")[-1]
49 63
 
50
-            pulseUsL = pulse_lengths( pulseTimeFn )
64
+                if not pitch.isdigit():
65
+                    break
51 66
 
52
-            pitch = idir.split("/")[-1]
67
+                d = {
68
+                    "pulseUsL":pulseUsL,
69
+                    "pitchL":[ pitch ],
70
+                    "noteDurMs":1000,
71
+                    "pauseDurMs":0,
72
+                    "holdDutyPct":50,
73
+                    "eventTimeL":eventTimeL,
74
+                    "beginMs":0
75
+                }
53 76
 
54
-            
55
-            d = {
56
-                "pulseUsL":pulseUsL,
57
-                "pitchL":[ pitch ],
58
-                "noteDurMs":1000,
59
-                "pauseDurMs":0,
60
-                "holdDutyPct":50,
61
-                "eventTimeL":eventTimeL,
62
-                "beginMs":0
63
-            }
77
+                odir = os.path.join( outDir, pitch )
78
+                if not os.path.isdir(odir):
79
+                    os.mkdir(odir)
64 80
 
65
-            odir = os.path.join( outDir, pitch )
66
-            if not os.path.isdir(odir):
67
-                os.mkdir(odir)
81
+                odir = os.path.join( odir, "%i" % (id) )
82
+                if not os.path.isdir(odir):
83
+                    os.mkdir(odir)
68 84
 
69
-            with open(os.path.join( odir, "seq.json" ),"w") as f:
70
-                f.write(json.dumps( d ))
85
+                with open(os.path.join( odir, "seq.json" ),"w") as f:
86
+                    f.write(json.dumps( d ))
71 87
 
72
-            copyfile( os.path.join(idir,"audio_0.wav"), os.path.join(odir,"audio.wav"))
88
+                copyfile( os.path.join(idir,"audio_%i.wav" % (id)), os.path.join(odir,"audio.wav"))
89
+
90
+                id += 1
73 91
                 
74 92
 
75 93
 if __name__ == "__main__":
76 94
     inDir = "/home/kevin/temp/picadae_ac_2/full_map"
77
-    outDir = "/home/kevin/temp/p_ac_3_cvt"
95
+    outDir = "/home/kevin/temp/p_ac_3_cvt/full_map"
96
+
97
+    #inDir = "/home/kevin/temp/picadae_ac_2/week_0"
98
+    #outDir = "/home/kevin/temp/p_ac_3_cvt/week_0"
78 99
 
79 100
     convert( inDir, outDir )
80 101
     

+ 76
- 41
p_ac.py Dosyayı Görüntüle

@@ -1,4 +1,4 @@
1
-import sys,os,argparse,yaml,types,logging,select,time,json
1
+import sys,os,argparse,types,logging,select,time,json
2 2
 from datetime import datetime
3 3
 
4 4
 import multiprocessing
@@ -7,6 +7,8 @@ from multiprocessing import Process, Pipe
7 7
 from picadae_api  import Picadae
8 8
 from AudioDevice  import AudioDevice
9 9
 from result       import Result
10
+from common       import parse_yaml_cfg
11
+from plot_seq     import form_resample_pulse_time_list
10 12
 
11 13
 class AttackPulseSeq:
12 14
     """ Sequence a fixed chord over a list of attack pulse lengths."""
@@ -26,8 +28,9 @@ class AttackPulseSeq:
26 28
         self.next_ms              = 0     # Time of next event (note-on or note_off)
27 29
         self.eventTimeL           = []    # Onset/offset time of each note [ [onset_ms,offset_ms] ]
28 30
         self.beginMs              = 0
31
+        self.playOnlyFl           = False
29 32
 
30
-    def start( self, ms, outDir, pitchL, pulseUsL ):
33
+    def start( self, ms, outDir, pitchL, pulseUsL, playOnlyFl=False ):
31 34
         self.outDir     = outDir         # directory to write audio file and results
32 35
         self.pitchL     = pitchL         # chord to play
33 36
         self.pulseUsL   = pulseUsL       # one onset pulse length in microseconds per sequence element
@@ -35,16 +38,27 @@ class AttackPulseSeq:
35 38
         self.pulse_idx  = 0
36 39
         self.state      = 'note_on'
37 40
         self.next_ms    = ms + 500       # wait for 500ms to play the first note (this will guarantee that there is some empty space in the audio file before the first note)
38
-        self.eventTimeL = [[0,0]] * len(pulseUsL) # initialize the event time         
41
+        self.eventTimeL = [[0,0]  for _ in range(len(pulseUsL))] # initialize the event time         
39 42
         self.beginMs    = ms
40
-        self.audio.record_enable(True)   # start recording audio
43
+        self.playOnlyFl = playOnlyFl
44
+
45
+        for pitch in pitchL:
46
+            self.api.set_pwm( pitch, self.holdDutyPct )
47
+
48
+        if not playOnlyFl:
49
+            self.audio.record_enable(True)   # start recording audio
41 50
         self.tick(ms)                    # play the first note
42 51
         
43 52
     def stop(self, ms):
44 53
         self._send_note_off() # be sure that all notes are actually turn-off
45
-        self.audio.record_enable(False)  # stop recording audio
54
+        
55
+        if not self.playOnlyFl:
56
+            self.audio.record_enable(False)  # stop recording audio
57
+            
46 58
         self._disable()          # disable this sequencer
47
-        self._write()            # write the results
59
+        
60
+        if not self.playOnlyFl:
61
+            self._write()            # write the results
48 62
 
49 63
     def is_enabled(self):
50 64
         return self.state is not None
@@ -75,7 +89,8 @@ class AttackPulseSeq:
75 89
         
76 90
     def _note_on( self, ms ):
77 91
 
78
-        self.eventTimeL[ self.pulse_idx ][0] = ms - self.beginMs
92
+        #self.eventTimeL[ self.pulse_idx ][0] = ms - self.beginMs
93
+        self.eventTimeL[ self.pulse_idx ][0] = self.audio.buffer_sample_ms().value
79 94
         self.next_ms = ms + self.noteDurMs
80 95
         self.state = 'note_off'
81 96
 
@@ -84,7 +99,8 @@ class AttackPulseSeq:
84 99
             print("note-on:",pitch,self.pulse_idx)
85 100
 
86 101
     def _note_off( self, ms ):
87
-        self.eventTimeL[ self.pulse_idx ][1] = ms - self.beginMs
102
+        #self.eventTimeL[ self.pulse_idx ][1] = ms - self.beginMs
103
+        self.eventTimeL[ self.pulse_idx ][1] = self.audio.buffer_sample_ms().value
88 104
         self.next_ms = ms + self.pauseDurMs
89 105
         self.state   = 'note_on'
90 106
         
@@ -130,19 +146,17 @@ class CalibrateKeys:
130 146
         self.cfg      = cfg
131 147
         self.seq      = AttackPulseSeq(  audioDev, api, noteDurMs=1000, pauseDurMs=1000, holdDutyPct=50 )
132 148
         
133
-        self.label     = None
134 149
         self.pulseUsL  = None
135 150
         self.chordL   = None
136 151
         self.pitch_idx = -1
137 152
 
138 153
         
139
-    def start( self, ms, label, chordL, pulseUsL ):
154
+    def start( self, ms, chordL, pulseUsL, playOnlyFl=False ):
140 155
         if len(chordL) > 0:
141
-            self.label     = label
142 156
             self.pulseUsL  = pulseUsL
143 157
             self.chordL   = chordL
144 158
             self.pitch_idx = -1
145
-            self._start_next_chord( ms )
159
+            self._start_next_chord( ms, playOnlyFl )
146 160
         
147 161
         
148 162
     def stop( self, ms ):
@@ -158,12 +172,13 @@ class CalibrateKeys:
158 172
 
159 173
             # if the sequencer is done playing 
160 174
             if not self.seq.is_enabled():
161
-                self._start_next_chord( ms ) # ... else start the next sequence
175
+                self._start_next_chord( ms, self.seq.playOnlyFl ) # ... else start the next sequence
162 176
 
163 177
         return None
164 178
 
165
-    def _start_next_chord( self, ms ):
179
+    def _start_next_chord( self, ms, playOnlyFl ):
166 180
 
181
+        
167 182
         self.pitch_idx += 1
168 183
 
169 184
         # if the last chord in chordL has been played ...
@@ -179,15 +194,44 @@ class CalibrateKeys:
179 194
                 os.mkdir( outDir )
180 195
 
181 196
             # form the output directory as "<label>_<pitch0>_<pitch1> ... "
182
-            dirStr = self.label + "_" + "_".join([ str(pitch) for pitch in pitchL ])
197
+            dirStr = "_".join([ str(pitch) for pitch in pitchL ])
183 198
 
184 199
             outDir = os.path.join(outDir, dirStr )
185 200
 
201
+            print(outDir)
202
+            if not os.path.isdir(outDir):
203
+                os.mkdir(outDir)
204
+            
205
+
206
+            # get the next available output directory id
207
+            outDir_id = self._calc_next_out_dir_id( outDir )
208
+
209
+            # if this is not the first time this note has been sampled then get the resample locations
210
+            if outDir_id != 0:
211
+                self.pulseUsL,_,_ = form_resample_pulse_time_list( outDir, self.cfg.analysisArgs )
212
+
213
+            if playOnlyFl:
214
+                self.pulseUsL,_ = form_final_pulse_list( outDir,  pitchL[0],  self.cfg.analysisArgs, take_id=None )
215
+
216
+                
217
+            outDir = os.path.join( outDir, str(outDir_id) )
218
+
219
+            if not os.path.isdir(outDir):
220
+                os.mkdir(outDir)
221
+
186 222
             # start the sequencer
187
-            self.seq.start( ms, outDir, pitchL, self.pulseUsL )
223
+            self.seq.start( ms, outDir, pitchL, self.pulseUsL, playOnlyFl )
188 224
         
189
-    
190 225
 
226
+    def _calc_next_out_dir_id( self, outDir ):
227
+
228
+        id = 0
229
+        while os.path.isdir( os.path.join(outDir,"%i" % id)):
230
+            id += 1
231
+
232
+        return id
233
+                    
234
+        
191 235
 # This is the main application API it is running in a child process.
192 236
 class App:
193 237
     def __init__(self ):
@@ -208,7 +252,9 @@ class App:
208 252
         
209 253
         res = self.audioDev.setup(**cfg.audio)
210 254
 
211
-        if res:
255
+        if not res:
256
+            self.audio_dev_list(0)
257
+        else:
212 258
             self.api = Picadae( key_mapL=cfg.key_mapL)
213 259
 
214 260
             # wait for the letter 'a' to come back from the serial port
@@ -237,7 +283,11 @@ class App:
237 283
 
238 284
     def calibrate_keys_start( self, ms, pitchRangeL ):
239 285
         chordL = [ [pitch]  for pitch in range(pitchRangeL[0], pitchRangeL[1]+1)]
240
-        self.calibrate.start(  ms, "full", chordL, cfg.full_pulseL )
286
+        self.calibrate.start(  ms, chordL, cfg.full_pulseL )
287
+
288
+    def play_keys_start( self, ms, pitchRangeL ):
289
+        chordL = [ [pitch]  for pitch in range(pitchRangeL[0], pitchRangeL[1]+1)]
290
+        self.calibrate.start(  ms, chordL, cfg.full_pulseL, playOnlyFl=True )
241 291
 
242 292
     def calibrate_keys_stop( self, ms ):
243 293
         self.calibrate.stop(ms)
@@ -299,7 +349,7 @@ def app_event_loop_func( pipe, cfg ):
299 349
                 func = getattr(app,msg.type)
300 350
 
301 351
                 ms  = int(round( (datetime.now() - dt0).total_seconds() * 1000.0) )
302
-                
352
+
303 353
                 # call the command handler
304 354
                 if msg.value:
305 355
                     res = func( ms, msg.value )
@@ -320,7 +370,7 @@ def app_event_loop_func( pipe, cfg ):
320 370
         
321 371
         # calc the tick() time stamp
322 372
         ms  = int(round( (datetime.now() - dt0).total_seconds() * 1000.0) )
323
-
373
+        
324 374
         # tick the app
325 375
         app.tick( ms )
326 376
         
@@ -362,8 +412,9 @@ class Shell:
362 412
             'q':{ "func":'quit',                  "minN":0,  "maxN":0, "help":"quit"},
363 413
             '?':{ "func":"_help",                 "minN":0,  "maxN":0, "help":"Print usage text."},
364 414
             'a':{ "func":"audio_dev_list",        "minN":0,  "maxN":0, "help":"List the audio devices."},
365
-            'c':{ "func":"calibrate_keys_start",  "minN":1,  "maxN":2, "help":"Calibrate a range of keys."},
366
-            's':{ "func":"calibrate_keys_stop",   "minN":0,  "maxN":0, "help":"Stop key calibration"}
415
+            'c':{ "func":"calibrate_keys_start",  "minN":1,  "maxN":2, "help":"Calibrate a range of keys. "},
416
+            's':{ "func":"calibrate_keys_stop",   "minN":0,  "maxN":0, "help":"Stop key calibration"},
417
+            'p':{ "func":"play_keys_start",       "minN":1,  "maxN":2, "help":"Play current calibration"}
367 418
             }
368 419
 
369 420
     def _help( self, _=None ):
@@ -405,7 +456,6 @@ class Shell:
405 456
         if  d['minN'] != -1 and (d['minN'] > len(argL) or len(argL) > d['maxN']):                
406 457
             return self._syntaxError("Argument count mismatch. {} is out of range:{} to {}".format(len(argL),d['minN'],d['maxN']))
407 458
 
408
-
409 459
         # call the command function
410 460
         if func:
411 461
             result = func(*argL)
@@ -457,8 +507,8 @@ class Shell:
457 507
         while not self.appProc.isdone():
458 508
             self.appProc.recv()  # drain the AppProc() as it shutdown
459 509
             time.sleep(0.1)
460
-        
461
-
510
+       
511
+ 
462 512
     def _handle_app_msgs( self, msgL ):
463 513
         quitAppFl = False
464 514
         if msgL:
@@ -489,21 +539,6 @@ def parse_args():
489 539
     return ap.parse_args()
490 540
     
491 541
             
492
-def parse_yaml_cfg( fn ):
493
-    """Parse the YAML configuration file."""
494
-    
495
-    cfg  = None
496
-    
497
-    with open(fn,"r") as f:
498
-        cfgD = yaml.load(f, Loader=yaml.FullLoader)
499
-
500
-        cfg = types.SimpleNamespace(**cfgD['p_ac'])
501
-
502
-    return cfg
503
-
504
-
505
-    
506
-
507 542
     
508 543
 if __name__ == "__main__":
509 544
     

+ 21
- 3
p_ac.yml Dosyayı Görüntüle

@@ -18,13 +18,31 @@
18 18
 
19 19
 
20 20
     # MeasureSeq args
21
-    outDir: "~/temp/p_ac3",
21
+    outDir: "~/temp/p_ac_3",
22 22
     noteDurMs: 1000,
23 23
     pauseDurMs: 1000,
24 24
     holdDutyPct: 50,
25 25
 
26
-    #full_pulseL: [ 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 8000, 9000, 10000, 12000, 14000, 18000, 22000, 26000, 30000, 34000, 40000],
27
-    full_pulseL: [ 18000, 22000, 26000, 30000, 34000, 40000],
26
+    full_pulse0L: [ 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 8000, 9000, 10000, 12000, 14000, 18000, 22000, 26000, 30000, 34000, 40000],
27
+    full_pulse1L: [  10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 20000, 22000, 24000, 26000, 30000, 32000, 34000, 36000, 40000],
28
+    full_pulseL: [  10000, 10500, 11000, 11500, 12000, 12500, 13000, 13500, 14000, 14500, 15000, 15500, 16000, 16500, 17000, 17500, 18000, 18500, 20000, 22000, 24000, 26000, 30000, 32000, 34000, 36000, 40000],
29
+
30
+    # RMS analysis args
31
+    analysisArgs: {
32
+      rmsAnalysisArgs: {
33
+        rmsWndMs: 300,    # length of the RMS measurment window
34
+        rmsHopMs: 30,     # RMS measurement inter window distance
35
+        dbRefWndMs: 500,  # length of initial portion of signal to use to calculate the dB reference level
36
+        harmCandN: 5,     # count of harmonic candidates to locate during harmonic based RMS analysis
37
+        harmN: 3,         # count of harmonics to use to calculate harmonic based RMS analysis
38
+      },
39
+      
40
+      minAttkDb: 5.0,   # threshold of silence level 
41
+      maxDbOffset: 0.5, # travel down the from the max. note level by at most this amount to locate the max. peak
42
+      maxDeltaDb: 2.0,  # maximum db change between volume samples (changes greater than this will trigger resampling)
43
+      samplesPerDb: 4,   # count of samples per dB to resample ranges whose range is less than maxDeltaDb
44
+      minSampleDistUs: 500 # minimum distance between sample points in microseconds
45
+      },
28 46
     
29 47
      key_mapL: [
30 48
 

+ 321
- 155
plot_seq.py Dosyayı Görüntüle

@@ -1,8 +1,8 @@
1
-import os, sys, json
2
-from scipy.io import wavfile
3
-from scipy.signal import stft
1
+import os, sys
4 2
 import matplotlib.pyplot as plt
5 3
 import numpy as np
4
+from common import parse_yaml_cfg
5
+from rms_analysis import rms_analysis_main
6 6
 
7 7
 def is_nanV( xV ):
8 8
     
@@ -12,173 +12,325 @@ def is_nanV( xV ):
12 12
         
13 13
     return False
14 14
 
15
-def find_min_max_peak_index( rmsV, pkIdxL, minDb, maxDbOffs=0.5 ):
15
+def _find_max_take_id( inDir ):
16 16
 
17
-    # select only the peaks from rmsV[] to work with
18
-    yV = rmsV[ pkIdxL ]
17
+    id = 0
18
+    while os.path.isdir( os.path.join(inDir, "%i" % id) ):
19
+        id += 1
19 20
 
20
-    # get the max volume note
21
-    max_i = np.argmax( yV )
22
-    maxDb = yV[ max_i ]
21
+    if id > 0:
22
+        id -= 1
23
+        
24
+    return id
25
+        
23 26
 
24
-    min_i = max_i
27
+def form_final_pulse_list( inDir, midi_pitch, analysisArgsD, take_id=None ):
25 28
 
26
-    # starting from the max volume peak go backwards
27
-    for i in range( max_i, 0, -1 ):
29
+    # append the midi pitch to the input directory
30
+    #inDir = os.path.join( inDir, "%i" % (midi_pitch))
28 31
 
29
-        # if this peak is within maxDbOffs of the loudest then choose this one instead
30
-        if maxDb - yV[i] < maxDbOffs:
31
-            max_i = i
32
+    if False:
33
+        # determine the take id if none was given
34
+        if take_id is None:
35
+            take_id = _find_max_take_id( inDir )
32 36
 
33
-        # if this peak is less than minDb then the previous note is the min note
34
-        if yV[i] < minDb:
35
-            break
36
-        
37
-        min_i = i
37
+        inDir = os.path.join(inDir,"%i" % (take_id))
38 38
 
39
-    assert( min_i < max_i )
40
-    
41
-    return min_i, max_i
39
+        assert( os.path.isdir(inDir))
42 40
 
43
-def find_skip_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx ):
44
-    skipPkIdxL = []
45
-    yV         = rmsV[pkIdxL]
46
-    refPkDb    = yV[min_pk_idx]
41
+        # analyze the requested take audio
42
+        r = rms_analysis_main( inDir, midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
47 43
 
48
-    for i in range( min_pk_idx+1, max_pk_idx+1 ):
49
-        if yV[i] > refPkDb:
50
-            refPkDb = yV[i]
51
-        else:
52
-            skipPkIdxL.append(i)
53
-            
44
+        pkL = []
45
+        # store the peaks in pkL[ (db,us) ]
46
+        for db,us in zip(r.pkDbL,r.pkUsL):
47
+            pkL.append( (db,us) )
48
+
49
+        # sort the peaks on increasing attack pulse microseconds
50
+        pkL = sorted( pkL, key= lambda x: x[1] )
51
+
52
+        # split pkL 
53
+        pkDbL,pkUsL = tuple(zip(*pkL))
54
+
55
+    dirL =  os.listdir(inDir)
56
+
57
+    pkL = []
58
+
59
+    # for each take in this directory
60
+    for idir in dirL:
61
+
62
+        take_number = int(idir)
63
+
64
+        # analyze this takes audio and locate the note peaks
65
+        r = rms_analysis_main( os.path.join(inDir,idir), midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
66
+
67
+        # store the peaks in pkL[ (db,us) ]
68
+        for db,us in zip(r.pkDbL,r.pkUsL):
69
+            pkL.append( (db,us) )
54 70
             
55
-    return skipPkIdxL
71
+    # sort the peaks on increasing attack pulse microseconds
72
+    pkL = sorted( pkL, key= lambda x: x[1] )
73
+
74
+    # merge sample points that separated by less than 'minSampleDistUs' milliseconds
75
+    pkL = merge_close_sample_points( pkL, analysisArgsD['minSampleDistUs'] )
76
+    
77
+    # split pkL 
78
+    pkDbL,pkUsL = tuple(zip(*pkL))
56 79
 
80
+    #-------------------------------------------
57 81
         
58
-def calc_harm_bins( srate, binHz, midiPitch, harmN ):
82
+    # locate the first and last note 
83
+    min_pk_idx, max_pk_idx = find_min_max_peak_index( pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )
84
+
85
+    db1 = pkDbL[ max_pk_idx ]
86
+    db0 = pkDbL[ min_pk_idx ]
87
+
88
+    pulseUsL = []
89
+    pulseDbL = []
90
+    multValL = []
91
+    for out_idx in range(128):
92
+
93
+        # calc the target volume
94
+        db = db0 + (out_idx * (db1-db0)/127.0)
95
+
96
+        multi_value_count = 0
97
+
98
+        # look for the target between each of the sampled points
99
+        for i in range(1,len(pkDbL)):
100
+
101
+            # if the target volume is between these two sample points
102
+            if pkDbL[i-1] <= db and db < pkDbL[i]:
103
+
104
+                # if the target has not already been located
105
+                if len(pulseUsL) == out_idx:
59 106
 
60
-    semi_tone     = 1.0/12
61
-    quarter_tone  = 1.0/24
62
-    eigth_tone    = 1.0/48
63
-    band_width_st = 3.0/48  # 3/8 tone
107
+                    # interpolate the pulse time from between the sampled points
108
+                    frac = (db - pkDbL[i-1]) / (pkDbL[i] - pkDbL[i-1])
109
+                    us = pkUsL[i-1] + frac * (pkUsL[i] - pkUsL[i-1])
110
+                    db = pkDbL[i-1] + frac * (pkDbL[i] - pkDbL[i-1])
111
+                    pulseUsL.append(us)
112
+                    pulseDbL.append(db)
113
+
114
+                else:
115
+                    # this target db value was found between multiple sampled points
116
+                    # therefore the sampled volume function is not monotonic
117
+                    multi_value_count += 1
118
+
119
+        if multi_value_count > 0:
120
+            multValL.append((out_idx,multi_value_count))
121
+
122
+    if len(multValL) > 0:
123
+        print("Multi-value pulse locations were found during velocity table formation: ",multValL)
124
+
125
+    return pulseUsL,pulseDbL
64 126
     
65
-    fundHz     = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
66
-    fund_l_binL   = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
67
-    fund_m_binL   = [int(round(fundHz *         i/binHz)) for i in range(1,harmN+1)]
68
-    fund_u_binL   = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
69 127
 
70
-    for i in range(len(fund_m_binL)):
71
-        if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
72
-            fund_l_binL[i] = fund_m_binL[i] - 1
73 128
 
74
-        if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
75
-            fund_u_binL[i] = fund_m_binL[i] + 1
129
+def merge_close_sample_points( pkDbUsL, minSampleDistanceUs ):
130
+
131
+    avg0Us = np.mean(np.diff([ x[1] for x in pkDbUsL ]))
132
+    n0 = len(pkDbUsL)
133
+    
134
+    while True:
135
+        us0 = None
136
+        db0 = None
137
+        
138
+        for i,(db,us) in enumerate(pkDbUsL):
139
+            if i > 0 and us - us0 < minSampleDistanceUs:
140
+                us1 = (us0 + us)/2
141
+                db1  = (db0 + db)/2
142
+                pkDbUsL[i-1] = (db1,us1)
143
+                del pkDbUsL[i]
144
+                break
145
+            else:
146
+                us0 = us
147
+                db0 = db
148
+
149
+        if i+1 == len(pkDbUsL):
150
+            break
151
+
152
+    avg1Us = np.mean(np.diff([ x[1] for x in pkDbUsL ]))
76 153
     
77
-    return fund_l_binL, fund_m_binL, fund_u_binL
154
+    print("%i sample points deleted by merging close points." % (n0 - len(pkDbUsL)))
155
+    print("Mean time between samples - before:%f after:%f " % (avg0Us,avg1Us))
156
+    print("Min time between samples: %i " % (np.min(np.diff([x[1] for x in pkDbUsL]))))
78 157
     
79
-def rms_to_db( xV, rms_srate, refWndMs ):
80
-    dbWndN = int(round(refWndMs * rms_srate / 1000.0))
81
-    dbRef = ref = np.mean(xV[0:dbWndN])
82
-    rmsDbV = 20.0 * np.log10( xV / dbRef )
158
+    return pkDbUsL
83 159
 
84
-    return rmsDbV
85 160
 
86
-def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs  ):
87 161
 
88
-    wndSmpN = int(round( rmsWndMs * srate / 1000.0))
89
-    hopSmpN = int(round( hopMs    * srate / 1000.0))
162
+def calc_resample_ranges( pkDbL, pkUsL, min_pk_idx, max_pk_idx, maxDeltaDb, samplePerDb ):
90 163
 
91
-    xN   = xV.shape[0]
92
-    yN   = int(((xN - wndSmpN) / hopSmpN) + 1)
93
-    assert( yN > 0)
94
-    yV   = np.zeros( (yN, ) )
164
+    if min_pk_idx == 0:
165
+        print("No silent notes were generated.  Decrease the minimum peak level or the hold voltage.")
166
+        return None
95 167
 
96
-    assert( wndSmpN > 1 )
168
+    resampleUsSet = set()
169
+    refPkDb       = pkDbL[min_pk_idx]
97 170
 
98
-    i = 0
99
-    j = 0
100
-    while i < xN and j < yN:
171
+    #pkDbL = pkDbL[ pkIdxL ]
101 172
 
102
-        if i == 0:
103
-            yV[j] = np.sqrt(xV[0]*xV[0])
104
-        elif i < wndSmpN:
105
-            yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
106
-        else:
107
-            yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
173
+    for i in range( min_pk_idx, max_pk_idx+1 ):
108 174
 
109
-        i += hopSmpN
110
-        j += 1
175
+        d = pkDbL[i] - pkDbL[i-1]
111 176
 
112
-    rms_srate = srate / hopSmpN
113
-    return rms_to_db( yV, rms_srate, refWndMs ), rms_srate
177
+        usL = []
114 178
 
179
+        # if this peak is less than maxDeltaDb above the previous pk or
180
+        # it is below the previous max peak
181
+        if d > maxDeltaDb or d <= 0 or pkDbL[i] < refPkDb:
115 182
 
116
-def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ):
117
-    
118
-    wndSmpN = int(round( rmsWndMs * srate / 1000.0))
119
-    hopSmpN = int(round( hopMs    * srate / 1000.0))
120
-    binHz   = srate / wndSmpN
121
-    
122
-    f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
183
+            sampleCnt = max(int(round(abs(d) * samplePerDb)),samplePerDb)
184
+            dUs       = int(round((pkUsL[i] - pkUsL[i-1])/sampleCnt))
185
+            usL       = [ pkUsL[i-1] + dUs*j  for j in range(sampleCnt)]
186
+
187
+            if i + 1 < len(pkDbL):
188
+                d = pkDbL[i+1] - pkDbL[i]
189
+                
190
+                sampleCnt = max(int(round(abs(d) * samplePerDb)),samplePerDb)
191
+                dUs       = int(round((pkUsL[i+1] - pkUsL[i])/sampleCnt))
192
+                usL      += [ pkUsL[i] + dUs*j  for j in range(sampleCnt)]
193
+
194
+        if pkDbL[i] > refPkDb:
195
+            refPkDb = pkDbL[i]
196
+                
197
+        if usL:
198
+            resampleUsSet = resampleUsSet.union( usL )
199
+
200
+    return resampleUsSet
201
+            
202
+
203
+
204
+def form_resample_pulse_time_list( inDir, analysisArgsD ):
205
+    """" This function merges all available data from previous takes to form
206
+    a new list of pulse times to sample.
207
+    """
208
+
209
+    # the last folder is always the midi pitch of the note under analysis
210
+    midi_pitch = int( inDir.split("/")[-1] )
123 211
 
124
-    specHopIdx = int(round( spectrumIdx ))    
125
-    specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
212
+    dirL =  os.listdir(inDir)
213
+
214
+    pkL = []
215
+
216
+    # for each take in this directory
217
+    for idir in dirL:
218
+
219
+        take_number = int(idir)
220
+
221
+        # analyze this takes audio and locate the note peaks
222
+        r = rms_analysis_main( os.path.join(inDir,idir), midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
223
+
224
+        # store the peaks in pkL[ (db,us) ]
225
+        for db,us in zip(r.pkDbL,r.pkUsL):
226
+            pkL.append( (db,us) )
227
+            
228
+    # sort the peaks on increasing attack pulse microseconds
229
+    pkL = sorted( pkL, key= lambda x: x[1] )
230
+
231
+    # merge sample points that separated by less than 'minSampleDistUs' milliseconds
232
+    pkL = merge_close_sample_points( pkL, analysisArgsD['minSampleDistUs'] )
126 233
     
127
-    mV = np.zeros((xM.shape[1]))
234
+    # split pkL 
235
+    pkDbL,pkUsL = tuple(zip(*pkL))
128 236
 
129
-    for i in range(xM.shape[1]):
130
-        mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
131 237
 
238
+    # locate the first and last note 
239
+    min_pk_idx, max_pk_idx = find_min_max_peak_index( pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )    
132 240
 
133
-    rms_srate = srate / hopSmpN
134
-    mV = rms_to_db( mV, rms_srate, refWndMs )
135
-        
136
-    return mV, rms_srate, specV, specHopIdx, binHz
241
+    # estimate the microsecond locations to resample
242
+    resampleUsSet = calc_resample_ranges( pkDbL, pkUsL, min_pk_idx, max_pk_idx, analysisArgsD['maxDeltaDb'], analysisArgsD['samplesPerDb'] )
243
+
244
+    resampleUsL = sorted( list(resampleUsSet) )
137 245
 
246
+    return resampleUsL, pkDbL, pkUsL
138 247
 
139
-def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN  ):
140 248
 
141
-    wndSmpN   = int(round( rmsWndMs * srate / 1000.0))
142
-    hopSmpN   = int(round( hopMs    * srate / 1000.0))
143 249
 
144
-    binHz   = srate / wndSmpN
250
+def plot_resample_pulse_times( inDir, analysisArgsD ):
251
+
252
+    newPulseUsL, rmsDbV, pulseUsL = form_resample_pulse_time_list( inDir, analysisArgsD )
253
+
254
+    midi_pitch = int( inDir.split("/")[-1] )
255
+    velTblUsL,velTblDbL = form_final_pulse_list( inDir, midi_pitch, analysisArgsD, take_id=None )
145 256
     
146
-    f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
257
+    fig,ax = plt.subplots()
258
+
259
+    ax.plot(pulseUsL,rmsDbV )
260
+
261
+    for us in newPulseUsL:
262
+        ax.axvline( x = us )
147 263
 
148
-    harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
264
+    ax.plot(velTblUsL,velTblDbL,marker='.',linestyle='None')
149 265
     
150
-    rmsV = np.zeros((xM.shape[1],))
266
+    plt.show()
267
+        
268
+def find_min_max_peak_index( pkDbL, minDb, maxDbOffs ):
269
+    """
270
+    Find the min db and max db peak.
271
+    """
272
+    # select only the peaks from rmsV[] to work with
273
+    yV = pkDbL
151 274
 
275
+    # get the max volume note
276
+    max_i = np.argmax( yV )
277
+    maxDb = yV[ max_i ]
278
+
279
+    min_i = max_i
152 280
 
153
-    for i in range(xM.shape[1]):
154
-        mV = np.sqrt(np.abs(xM[:,i]))
281
+    # starting from the max volume peak go backwards
282
+    for i in range( max_i, 0, -1 ):
283
+
284
+        # if this peak is within maxDbOffs of the loudest then choose this one instead
285
+        if maxDb - yV[i] < maxDbOffs:
286
+            max_i = i
155 287
 
156
-        pV = np.zeros((len(harmLBinL,)))
288
+        # if this peak is less than minDb then the previous note is the min note
289
+        if yV[i] < minDb:
290
+            break
157 291
         
158
-        for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
159
-            pV[j] = np.max(mV[b0i:b1i])
292
+        min_i = i
293
+
294
+    assert( min_i < max_i )
160 295
 
161
-        rmsV[i] = np.mean( sorted(pV)[-harmN:] )
296
+    if min_i == 0:
297
+        print("No silent notes were generated.  Decrease the minimum peak level or the hold voltage.")
298
+    
299
+    return min_i, max_i
300
+
301
+def find_skip_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx ):
302
+    """ Fine peaks associated with longer attacks pulses that are lower than peaks with a shorter attack pulse.
303
+    These peaks indicate degenerate portions of the pulse/db curve which must be skipped during velocity table formation
304
+    """
305
+    skipPkIdxL = []
306
+    yV         = rmsV[pkIdxL]
307
+    refPkDb    = yV[min_pk_idx]
308
+
309
+    for i in range( min_pk_idx+1, max_pk_idx+1 ):
310
+        if yV[i] > refPkDb:
311
+            refPkDb = yV[i]
312
+        else:
313
+            skipPkIdxL.append(i)
162 314
             
163 315
             
164
-        
165
-    rms_srate = srate / hopSmpN
166
-    rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs )
167
-    return rmsV, rms_srate, binHz
168
-    
169
-    
170
-               
171
-def locate_peak_indexes( xV, xV_srate, eventMsL ):
316
+    return skipPkIdxL
317
+
318
+def find_out_of_range_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx, maxDeltaDb ):
319
+    """ Locate peaks which are more than maxDeltaDb from the previous peak. 
320
+    If two peaks are separated by more than maxDeltaDb then the range must be resampled
321
+    """
172 322
 
173
-    pkIdxL = []
174
-    for begMs, endMs in eventMsL:
323
+    oorPkIdxL = []
324
+    yV         = rmsV[pkIdxL]
175 325
 
176
-        begSmpIdx = int(begMs * xV_srate / 1000.0)
177
-        endSmpIdx = int(endMs * xV_srate / 1000.0)
326
+    for i in range( min_pk_idx, max_pk_idx+1 ):
327
+        if i > 0:
328
+            d = yV[i] - yV[i-1]
329
+            if d > maxDeltaDb or d < 0:
330
+                oorPkIdxL.append(i)
178 331
 
179
-        pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
332
+    return oorPkIdxL
180 333
 
181
-    return pkIdxL
182 334
 
183 335
 
184 336
 def plot_spectrum( ax, srate, binHz, specV, midiPitch, harmN ):
@@ -249,68 +401,82 @@ def plot_spectral_ranges( inDir, pitchL, rmsWndMs=300, rmsHopMs=30, harmN=5, dbR
249 401
 
250 402
         
251 403
         
252
-def do_td_plot( inDir ):
253
-    rmsWndMs = 300
254
-    rmsHopMs = 30
255
-    dbRefWndMs = 500
256
-    harmCandN = 5
257
-    harmN     = 3
258
-    minAttkDb = 5.0
259
-    
260
-    seqFn = os.path.join( inDir, "seq.json")
261
-    audioFn = os.path.join( inDir, "audio.wav")
262
-    midiPitch = int(inDir.split("/")[-1])
263
-    
404
+def td_plot( ax, inDir, midi_pitch, id, analysisArgsD ):
264 405
 
265
-    with open( seqFn, "rb") as f:
266
-        r = json.load(f)
406
+    r = rms_analysis_main( inDir, midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
267 407
     
268
-    
269
-    srate, signalM  = wavfile.read(audioFn)
270
-    sigV  = signalM / float(0x7fff)
271
-        
272
-    rms0DbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
273
-
274
-    rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midiPitch, harmCandN, harmN  )
275
-    
276
-    pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
408
+    min_pk_idx, max_pk_idx = find_min_max_peak_index( r.pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )    
277 409
 
410
+    skipPkIdxL = find_skip_peaks( r.rmsDbV, r.pkIdxL, min_pk_idx, max_pk_idx )
278 411
 
279
-    min_pk_idx, max_pk_idx = find_min_max_peak_index( rmsDbV, pkIdxL, minAttkDb )
412
+    jmpPkIdxL = find_out_of_range_peaks( r.rmsDbV, r.pkIdxL, min_pk_idx, max_pk_idx, analysisArgsD['maxDeltaDb'] )
280 413
 
281
-    skipPkIdxL = find_skip_peaks( rmsDbV, pkIdxL, min_pk_idx, max_pk_idx )
414
+    secV = np.arange(0,len(r.rmsDbV)) / r.rms_srate
282 415
     
283
-    fig,ax = plt.subplots()
284
-    fig.set_size_inches(18.5, 10.5, forward=True)
416
+    ax.plot( secV, r.rmsDbV )
417
+    ax.plot( np.arange(0,len(r.tdRmsDbV)) / r.rms_srate, r.tdRmsDbV, color="black" )
285 418
 
286
-    secV = np.arange(0,len(rmsDbV)) / rms_srate
287 419
     
288
-    ax.plot( secV, rmsDbV )
289
-    ax.plot( np.arange(0,len(rms0DbV)) / rms0_srate, rms0DbV, color="black" )
290
-
291 420
     # print beg/end boundaries
292
-    for i,(begMs, endMs) in enumerate(r['eventTimeL']):
421
+    for i,(begMs, endMs) in enumerate(r.eventTimeL):
293 422
         ax.axvline( x=begMs/1000.0, color="green")
294 423
         ax.axvline( x=endMs/1000.0, color="red")
295 424
         ax.text(begMs/1000.0, 20.0, str(i) )
296 425
 
426
+    return
297 427
     # plot peak markers
298
-    for i,pki in enumerate(pkIdxL):
299
-        marker = "o" if i==min_pk_idx or i==max_pk_idx else "."
428
+    for i,pki in enumerate(r.pkIdxL):
429
+        marker = 4 if i==min_pk_idx or i==max_pk_idx else 5
300 430
         color  = "red" if i in skipPkIdxL else "black"
301
-        ax.plot( [pki / rms_srate], [ rmsDbV[pki] ], marker=marker, color=color)
431
+        ax.plot( [pki / r.rms_srate], [ r.rmsDbV[pki] ], marker=marker, color=color)
432
+
433
+        if i in jmpPkIdxL:
434
+            ax.plot( [pki / r.rms_srate], [ r.rmsDbV[pki] ], marker=6, color="blue")
302 435
 
303
-    plt.show()
304 436
 
305 437
 
306 438
     
439
+def do_td_plot( inDir, analysisArgs ):
440
+    
441
+    fig,ax = plt.subplots()
442
+    fig.set_size_inches(18.5, 10.5, forward=True)
443
+
444
+
445
+    id         = int(inDir.split("/")[-1])
446
+    midi_pitch = int(inDir.split("/")[-2])
447
+
448
+    td_plot(ax,inDir,midi_pitch,id,analysisArgs)
449
+
450
+    plt.show()
451
+
452
+def do_td_multi_plot( inDir, analysisArgs ):
453
+
454
+    midi_pitch = int(inDir.split("/")[-1])
455
+
456
+    dirL =  os.listdir(inDir)
307 457
 
458
+    fig,axL = plt.subplots(len(dirL),1)
308 459
 
460
+    
461
+    for id,(idir,ax) in enumerate(zip(dirL,axL)):
309 462
 
463
+        td_plot(ax, os.path.join(inDir,str(id)), midi_pitch, id, analysisArgs )
464
+    
465
+    plt.show()
466
+
467
+    
310 468
 if __name__ == "__main__":
311 469
 
312 470
     inDir = sys.argv[1]
471
+    cfgFn = sys.argv[2]
313 472
 
314
-    do_td_plot(inDir)
473
+    cfg = parse_yaml_cfg( cfgFn )
474
+    
475
+    #do_td_plot(inDir,cfg.analysisArgs)
476
+
477
+    #o_td_multi_plot(inDir,cfg.analysisArgs)
315 478
 
316 479
     #plot_spectral_ranges( inDir, [ 24, 36, 48, 60, 72, 84, 96, 104] )
480
+
481
+    plot_resample_pulse_times( inDir, cfg.analysisArgs )
482
+

+ 168
- 0
rms_analysis.py Dosyayı Görüntüle

@@ -0,0 +1,168 @@
1
+import os,types,json
2
+from scipy.io import wavfile
3
+from scipy.signal import stft
4
+import numpy as np
5
+
6
+
7
+def calc_harm_bins( srate, binHz, midiPitch, harmN ):
8
+
9
+    semi_tone     = 1.0/12
10
+    quarter_tone  = 1.0/24
11
+    eigth_tone    = 1.0/48
12
+    band_width_st = 3.0/48  # 3/8 tone
13
+    
14
+    fundHz     = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
15
+    fund_l_binL   = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
16
+    fund_m_binL   = [int(round(fundHz *         i/binHz)) for i in range(1,harmN+1)]
17
+    fund_u_binL   = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
18
+
19
+    for i in range(len(fund_m_binL)):
20
+        if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
21
+            fund_l_binL[i] = fund_m_binL[i] - 1
22
+
23
+        if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
24
+            fund_u_binL[i] = fund_m_binL[i] + 1
25
+    
26
+    return fund_l_binL, fund_m_binL, fund_u_binL
27
+    
28
+def rms_to_db( xV, rms_srate, refWndMs ):
29
+    dbWndN = int(round(refWndMs * rms_srate / 1000.0))
30
+    dbRef = ref = np.mean(xV[0:dbWndN])
31
+    rmsDbV = 20.0 * np.log10( xV / dbRef )
32
+
33
+    return rmsDbV
34
+
35
+def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs  ):
36
+
37
+    wndSmpN = int(round( rmsWndMs * srate / 1000.0))
38
+    hopSmpN = int(round( hopMs    * srate / 1000.0))
39
+
40
+    xN   = xV.shape[0]
41
+    yN   = int(((xN - wndSmpN) / hopSmpN) + 1)
42
+    assert( yN > 0)
43
+    yV   = np.zeros( (yN, ) )
44
+
45
+    assert( wndSmpN > 1 )
46
+
47
+    i = 0
48
+    j = 0
49
+    while i < xN and j < yN:
50
+
51
+        if i == 0:
52
+            yV[j] = np.sqrt(xV[0]*xV[0])
53
+        elif i < wndSmpN:
54
+            yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
55
+        else:
56
+            yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
57
+
58
+        i += hopSmpN
59
+        j += 1
60
+
61
+    rms_srate = srate / hopSmpN
62
+    return rms_to_db( yV, rms_srate, refWndMs ), rms_srate
63
+
64
+
65
+def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ):
66
+    
67
+    wndSmpN = int(round( rmsWndMs * srate / 1000.0))
68
+    hopSmpN = int(round( hopMs    * srate / 1000.0))
69
+    binHz   = srate / wndSmpN
70
+    
71
+    f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
72
+
73
+    specHopIdx = int(round( spectrumIdx ))    
74
+    specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
75
+    
76
+    mV = np.zeros((xM.shape[1]))
77
+
78
+    for i in range(xM.shape[1]):
79
+        mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
80
+
81
+
82
+    rms_srate = srate / hopSmpN
83
+    mV = rms_to_db( mV, rms_srate, refWndMs )
84
+        
85
+    return mV, rms_srate, specV, specHopIdx, binHz
86
+
87
+
88
+def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN  ):
89
+
90
+    wndSmpN   = int(round( rmsWndMs * srate / 1000.0))
91
+    hopSmpN   = int(round( hopMs    * srate / 1000.0))
92
+
93
+    binHz   = srate / wndSmpN
94
+    
95
+    f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
96
+
97
+    harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
98
+    
99
+    rmsV = np.zeros((xM.shape[1],))
100
+
101
+
102
+    for i in range(xM.shape[1]):
103
+        mV = np.sqrt(np.abs(xM[:,i]))
104
+
105
+        pV = np.zeros((len(harmLBinL,)))
106
+        
107
+        for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
108
+            pV[j] = np.max(mV[b0i:b1i])
109
+
110
+        rmsV[i] = np.mean( sorted(pV)[-harmN:] )
111
+            
112
+            
113
+        
114
+    rms_srate = srate / hopSmpN
115
+    rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs )
116
+    return rmsV, rms_srate, binHz
117
+    
118
+    
119
+               
120
+def locate_peak_indexes( xV, xV_srate, eventMsL ):
121
+
122
+    pkIdxL = []
123
+    for begMs, endMs in eventMsL:
124
+
125
+        begSmpIdx = int(begMs * xV_srate / 1000.0)
126
+        endSmpIdx = int(endMs * xV_srate / 1000.0)
127
+
128
+        pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
129
+
130
+    return pkIdxL
131
+
132
+
133
+
134
+
135
+
136
+def rms_analysis_main( inDir, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3 ):
137
+
138
+    seqFn     = os.path.join( inDir, "seq.json")
139
+    audioFn   = os.path.join( inDir, "audio.wav")
140
+    
141
+
142
+    with open( seqFn, "rb") as f:
143
+        r = json.load(f)
144
+    
145
+    
146
+    srate, signalM  = wavfile.read(audioFn)
147
+    sigV  = signalM / float(0x7fff)
148
+
149
+    tdRmsDbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
150
+
151
+    rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midi_pitch, harmCandN, harmN  )
152
+    
153
+    pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
154
+
155
+    r = types.SimpleNamespace(**{
156
+        "audio_srate":srate,
157
+        "tdRmsDbV": tdRmsDbV,
158
+        "binHz": binHz,
159
+        "rmsDbV":rmsDbV,
160
+        "rms_srate":rms_srate,
161
+        "pkIdxL":pkIdxL,            # pkIdxL[ len(pulsUsL) ] - indexes into rmsDbV[] of peaks
162
+        #"min_pk_idx":min_pk_idx,
163
+        #"max_pk_idx":max_pk_idx,
164
+        "eventTimeL":r['eventTimeL'],
165
+        'pkDbL': [ rmsDbV[ i ] for i in pkIdxL ],
166
+        'pkUsL':r['pulseUsL'] })
167
+
168
+    return r

Loading…
İptal
Kaydet