First working version.

This commit is contained in:
kpl 2019-09-01 10:54:09 -04:00
parent ac9061a72d
commit f69c71c498
7 changed files with 672 additions and 239 deletions

View File

@ -181,6 +181,14 @@ class AudioDevice(object):
return Result(smpN)
def buffer_sample_ms( self ):
r = self.buffer_sample_count()
if r:
r.value = int(r.value * 1000.0 / self.srate)
return r
def linear_buffer( self ):
smpN = self.buffer_sample_count()

17
common.py Normal file
View File

@ -0,0 +1,17 @@
import yaml,types
def parse_yaml_cfg( fn ):
"""Parse the YAML configuration file."""
cfg = None
with open(fn,"r") as f:
cfgD = yaml.load(f, Loader=yaml.FullLoader)
cfg = types.SimpleNamespace(**cfgD['p_ac'])
return cfg

View File

@ -4,7 +4,7 @@ from shutil import copyfile
def event_times( eventTimeFn ):
eventL = []
velL = []
with open(eventTimeFn,"r") as f:
rdr = csv.reader(f)
@ -14,21 +14,25 @@ def event_times( eventTimeFn ):
beginMs = int(row[1])
elif row[0] == 'key_down':
key_downMs = int(row[1]) - beginMs
vel = int(row[3])
elif row[0] == 'key_up':
key_upMs = row[1]
eventL.append( [ key_downMs, key_downMs+1000 ] )
velL.append( vel )
return eventL
return eventL,velL
def pulse_lengths( pulseLenFn ):
def pulse_lengths( pulseLenFn, velL ):
with open(pulseLenFn,'rb') as f:
d = pickle.load(f)
msL = d['msL']
# note: first posn in table is a multiplier
return [ msL[i]*msL[0] for i in range(1,len(msL))]
msL = [ msL[i]*msL[0] for i in range(1,len(msL))]
usL = [ msL[vel-1] for vel in velL ]
return usL
def convert( inDir, outDir ):
@ -41,40 +45,57 @@ def convert( inDir, outDir ):
if os.path.isdir(idir):
eventTimeFn = os.path.join( idir, "labels_0.csv" )
id = 0
while True:
eventTimeL = event_times(eventTimeFn)
pulseTimeFn = os.path.join( idir, "table_0.pickle")
eventTimeFn = os.path.join( idir, "labels_%i.csv" % (id) )
pulseUsL = pulse_lengths( pulseTimeFn )
if not os.path.isfile( eventTimeFn ):
break
eventTimeL,velL = event_times(eventTimeFn)
pitch = idir.split("/")[-1]
pulseTimeFn = os.path.join( idir, "table_%i.pickle" % (id))
d = {
"pulseUsL":pulseUsL,
"pitchL":[ pitch ],
"noteDurMs":1000,
"pauseDurMs":0,
"holdDutyPct":50,
"eventTimeL":eventTimeL,
"beginMs":0
}
pulseUsL = pulse_lengths( pulseTimeFn, velL )
odir = os.path.join( outDir, pitch )
if not os.path.isdir(odir):
os.mkdir(odir)
pitch = idir.split("/")[-1]
with open(os.path.join( odir, "seq.json" ),"w") as f:
f.write(json.dumps( d ))
if not pitch.isdigit():
break
copyfile( os.path.join(idir,"audio_0.wav"), os.path.join(odir,"audio.wav"))
d = {
"pulseUsL":pulseUsL,
"pitchL":[ pitch ],
"noteDurMs":1000,
"pauseDurMs":0,
"holdDutyPct":50,
"eventTimeL":eventTimeL,
"beginMs":0
}
odir = os.path.join( outDir, pitch )
if not os.path.isdir(odir):
os.mkdir(odir)
odir = os.path.join( odir, "%i" % (id) )
if not os.path.isdir(odir):
os.mkdir(odir)
with open(os.path.join( odir, "seq.json" ),"w") as f:
f.write(json.dumps( d ))
copyfile( os.path.join(idir,"audio_%i.wav" % (id)), os.path.join(odir,"audio.wav"))
id += 1
if __name__ == "__main__":
inDir = "/home/kevin/temp/picadae_ac_2/full_map"
outDir = "/home/kevin/temp/p_ac_3_cvt"
outDir = "/home/kevin/temp/p_ac_3_cvt/full_map"
#inDir = "/home/kevin/temp/picadae_ac_2/week_0"
#outDir = "/home/kevin/temp/p_ac_3_cvt/week_0"
convert( inDir, outDir )

121
p_ac.py
View File

@ -1,4 +1,4 @@
import sys,os,argparse,yaml,types,logging,select,time,json
import sys,os,argparse,types,logging,select,time,json
from datetime import datetime
import multiprocessing
@ -7,6 +7,8 @@ from multiprocessing import Process, Pipe
from picadae_api import Picadae
from AudioDevice import AudioDevice
from result import Result
from common import parse_yaml_cfg
from plot_seq import form_resample_pulse_time_list
class AttackPulseSeq:
""" Sequence a fixed chord over a list of attack pulse lengths."""
@ -26,8 +28,9 @@ class AttackPulseSeq:
self.next_ms = 0 # Time of next event (note-on or note_off)
self.eventTimeL = [] # Onset/offset time of each note [ [onset_ms,offset_ms] ]
self.beginMs = 0
self.playOnlyFl = False
def start( self, ms, outDir, pitchL, pulseUsL ):
def start( self, ms, outDir, pitchL, pulseUsL, playOnlyFl=False ):
self.outDir = outDir # directory to write audio file and results
self.pitchL = pitchL # chord to play
self.pulseUsL = pulseUsL # one onset pulse length in microseconds per sequence element
@ -35,16 +38,27 @@ class AttackPulseSeq:
self.pulse_idx = 0
self.state = 'note_on'
self.next_ms = ms + 500 # wait for 500ms to play the first note (this will guarantee that there is some empty space in the audio file before the first note)
self.eventTimeL = [[0,0]] * len(pulseUsL) # initialize the event time
self.eventTimeL = [[0,0] for _ in range(len(pulseUsL))] # initialize the event time
self.beginMs = ms
self.audio.record_enable(True) # start recording audio
self.playOnlyFl = playOnlyFl
for pitch in pitchL:
self.api.set_pwm( pitch, self.holdDutyPct )
if not playOnlyFl:
self.audio.record_enable(True) # start recording audio
self.tick(ms) # play the first note
def stop(self, ms):
self._send_note_off() # be sure that all notes are actually turn-off
self.audio.record_enable(False) # stop recording audio
if not self.playOnlyFl:
self.audio.record_enable(False) # stop recording audio
self._disable() # disable this sequencer
self._write() # write the results
if not self.playOnlyFl:
self._write() # write the results
def is_enabled(self):
return self.state is not None
@ -75,7 +89,8 @@ class AttackPulseSeq:
def _note_on( self, ms ):
self.eventTimeL[ self.pulse_idx ][0] = ms - self.beginMs
#self.eventTimeL[ self.pulse_idx ][0] = ms - self.beginMs
self.eventTimeL[ self.pulse_idx ][0] = self.audio.buffer_sample_ms().value
self.next_ms = ms + self.noteDurMs
self.state = 'note_off'
@ -84,7 +99,8 @@ class AttackPulseSeq:
print("note-on:",pitch,self.pulse_idx)
def _note_off( self, ms ):
self.eventTimeL[ self.pulse_idx ][1] = ms - self.beginMs
#self.eventTimeL[ self.pulse_idx ][1] = ms - self.beginMs
self.eventTimeL[ self.pulse_idx ][1] = self.audio.buffer_sample_ms().value
self.next_ms = ms + self.pauseDurMs
self.state = 'note_on'
@ -130,19 +146,17 @@ class CalibrateKeys:
self.cfg = cfg
self.seq = AttackPulseSeq( audioDev, api, noteDurMs=1000, pauseDurMs=1000, holdDutyPct=50 )
self.label = None
self.pulseUsL = None
self.chordL = None
self.pitch_idx = -1
def start( self, ms, label, chordL, pulseUsL ):
def start( self, ms, chordL, pulseUsL, playOnlyFl=False ):
if len(chordL) > 0:
self.label = label
self.pulseUsL = pulseUsL
self.chordL = chordL
self.pitch_idx = -1
self._start_next_chord( ms )
self._start_next_chord( ms, playOnlyFl )
def stop( self, ms ):
@ -158,12 +172,13 @@ class CalibrateKeys:
# if the sequencer is done playing
if not self.seq.is_enabled():
self._start_next_chord( ms ) # ... else start the next sequence
self._start_next_chord( ms, self.seq.playOnlyFl ) # ... else start the next sequence
return None
def _start_next_chord( self, ms ):
def _start_next_chord( self, ms, playOnlyFl ):
self.pitch_idx += 1
# if the last chord in chordL has been played ...
@ -179,15 +194,44 @@ class CalibrateKeys:
os.mkdir( outDir )
# form the output directory as "<label>_<pitch0>_<pitch1> ... "
dirStr = self.label + "_" + "_".join([ str(pitch) for pitch in pitchL ])
dirStr = "_".join([ str(pitch) for pitch in pitchL ])
outDir = os.path.join(outDir, dirStr )
# start the sequencer
self.seq.start( ms, outDir, pitchL, self.pulseUsL )
print(outDir)
if not os.path.isdir(outDir):
os.mkdir(outDir)
# get the next available output directory id
outDir_id = self._calc_next_out_dir_id( outDir )
# if this is not the first time this note has been sampled then get the resample locations
if outDir_id != 0:
self.pulseUsL,_,_ = form_resample_pulse_time_list( outDir, self.cfg.analysisArgs )
if playOnlyFl:
self.pulseUsL,_ = form_final_pulse_list( outDir, pitchL[0], self.cfg.analysisArgs, take_id=None )
outDir = os.path.join( outDir, str(outDir_id) )
if not os.path.isdir(outDir):
os.mkdir(outDir)
# start the sequencer
self.seq.start( ms, outDir, pitchL, self.pulseUsL, playOnlyFl )
def _calc_next_out_dir_id( self, outDir ):
id = 0
while os.path.isdir( os.path.join(outDir,"%i" % id)):
id += 1
return id
# This is the main application API it is running in a child process.
class App:
def __init__(self ):
@ -208,7 +252,9 @@ class App:
res = self.audioDev.setup(**cfg.audio)
if res:
if not res:
self.audio_dev_list(0)
else:
self.api = Picadae( key_mapL=cfg.key_mapL)
# wait for the letter 'a' to come back from the serial port
@ -237,7 +283,11 @@ class App:
def calibrate_keys_start( self, ms, pitchRangeL ):
chordL = [ [pitch] for pitch in range(pitchRangeL[0], pitchRangeL[1]+1)]
self.calibrate.start( ms, "full", chordL, cfg.full_pulseL )
self.calibrate.start( ms, chordL, cfg.full_pulseL )
def play_keys_start( self, ms, pitchRangeL ):
chordL = [ [pitch] for pitch in range(pitchRangeL[0], pitchRangeL[1]+1)]
self.calibrate.start( ms, chordL, cfg.full_pulseL, playOnlyFl=True )
def calibrate_keys_stop( self, ms ):
self.calibrate.stop(ms)
@ -299,7 +349,7 @@ def app_event_loop_func( pipe, cfg ):
func = getattr(app,msg.type)
ms = int(round( (datetime.now() - dt0).total_seconds() * 1000.0) )
# call the command handler
if msg.value:
res = func( ms, msg.value )
@ -320,7 +370,7 @@ def app_event_loop_func( pipe, cfg ):
# calc the tick() time stamp
ms = int(round( (datetime.now() - dt0).total_seconds() * 1000.0) )
# tick the app
app.tick( ms )
@ -362,8 +412,9 @@ class Shell:
'q':{ "func":'quit', "minN":0, "maxN":0, "help":"quit"},
'?':{ "func":"_help", "minN":0, "maxN":0, "help":"Print usage text."},
'a':{ "func":"audio_dev_list", "minN":0, "maxN":0, "help":"List the audio devices."},
'c':{ "func":"calibrate_keys_start", "minN":1, "maxN":2, "help":"Calibrate a range of keys."},
's':{ "func":"calibrate_keys_stop", "minN":0, "maxN":0, "help":"Stop key calibration"}
'c':{ "func":"calibrate_keys_start", "minN":1, "maxN":2, "help":"Calibrate a range of keys. "},
's':{ "func":"calibrate_keys_stop", "minN":0, "maxN":0, "help":"Stop key calibration"},
'p':{ "func":"play_keys_start", "minN":1, "maxN":2, "help":"Play current calibration"}
}
def _help( self, _=None ):
@ -405,7 +456,6 @@ class Shell:
if d['minN'] != -1 and (d['minN'] > len(argL) or len(argL) > d['maxN']):
return self._syntaxError("Argument count mismatch. {} is out of range:{} to {}".format(len(argL),d['minN'],d['maxN']))
# call the command function
if func:
result = func(*argL)
@ -457,8 +507,8 @@ class Shell:
while not self.appProc.isdone():
self.appProc.recv() # drain the AppProc() as it shutdown
time.sleep(0.1)
def _handle_app_msgs( self, msgL ):
quitAppFl = False
if msgL:
@ -489,21 +539,6 @@ def parse_args():
return ap.parse_args()
def parse_yaml_cfg( fn ):
"""Parse the YAML configuration file."""
cfg = None
with open(fn,"r") as f:
cfgD = yaml.load(f, Loader=yaml.FullLoader)
cfg = types.SimpleNamespace(**cfgD['p_ac'])
return cfg
if __name__ == "__main__":

View File

@ -18,13 +18,31 @@
# MeasureSeq args
outDir: "~/temp/p_ac3",
outDir: "~/temp/p_ac_3",
noteDurMs: 1000,
pauseDurMs: 1000,
holdDutyPct: 50,
#full_pulseL: [ 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 8000, 9000, 10000, 12000, 14000, 18000, 22000, 26000, 30000, 34000, 40000],
full_pulseL: [ 18000, 22000, 26000, 30000, 34000, 40000],
full_pulse0L: [ 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 8000, 9000, 10000, 12000, 14000, 18000, 22000, 26000, 30000, 34000, 40000],
full_pulse1L: [ 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 20000, 22000, 24000, 26000, 30000, 32000, 34000, 36000, 40000],
full_pulseL: [ 10000, 10500, 11000, 11500, 12000, 12500, 13000, 13500, 14000, 14500, 15000, 15500, 16000, 16500, 17000, 17500, 18000, 18500, 20000, 22000, 24000, 26000, 30000, 32000, 34000, 36000, 40000],
# RMS analysis args
analysisArgs: {
rmsAnalysisArgs: {
rmsWndMs: 300, # length of the RMS measurment window
rmsHopMs: 30, # RMS measurement inter window distance
dbRefWndMs: 500, # length of initial portion of signal to use to calculate the dB reference level
harmCandN: 5, # count of harmonic candidates to locate during harmonic based RMS analysis
harmN: 3, # count of harmonics to use to calculate harmonic based RMS analysis
},
minAttkDb: 5.0, # threshold of silence level
maxDbOffset: 0.5, # travel down the from the max. note level by at most this amount to locate the max. peak
maxDeltaDb: 2.0, # maximum db change between volume samples (changes greater than this will trigger resampling)
samplesPerDb: 4, # count of samples per dB to resample ranges whose range is less than maxDeltaDb
minSampleDistUs: 500 # minimum distance between sample points in microseconds
},
key_mapL: [

View File

@ -1,8 +1,8 @@
import os, sys, json
from scipy.io import wavfile
from scipy.signal import stft
import os, sys
import matplotlib.pyplot as plt
import numpy as np
from common import parse_yaml_cfg
from rms_analysis import rms_analysis_main
def is_nanV( xV ):
@ -12,10 +12,265 @@ def is_nanV( xV ):
return False
def find_min_max_peak_index( rmsV, pkIdxL, minDb, maxDbOffs=0.5 ):
def _find_max_take_id( inDir ):
id = 0
while os.path.isdir( os.path.join(inDir, "%i" % id) ):
id += 1
if id > 0:
id -= 1
return id
def form_final_pulse_list( inDir, midi_pitch, analysisArgsD, take_id=None ):
# append the midi pitch to the input directory
#inDir = os.path.join( inDir, "%i" % (midi_pitch))
if False:
# determine the take id if none was given
if take_id is None:
take_id = _find_max_take_id( inDir )
inDir = os.path.join(inDir,"%i" % (take_id))
assert( os.path.isdir(inDir))
# analyze the requested take audio
r = rms_analysis_main( inDir, midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
pkL = []
# store the peaks in pkL[ (db,us) ]
for db,us in zip(r.pkDbL,r.pkUsL):
pkL.append( (db,us) )
# sort the peaks on increasing attack pulse microseconds
pkL = sorted( pkL, key= lambda x: x[1] )
# split pkL
pkDbL,pkUsL = tuple(zip(*pkL))
dirL = os.listdir(inDir)
pkL = []
# for each take in this directory
for idir in dirL:
take_number = int(idir)
# analyze this takes audio and locate the note peaks
r = rms_analysis_main( os.path.join(inDir,idir), midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
# store the peaks in pkL[ (db,us) ]
for db,us in zip(r.pkDbL,r.pkUsL):
pkL.append( (db,us) )
# sort the peaks on increasing attack pulse microseconds
pkL = sorted( pkL, key= lambda x: x[1] )
# merge sample points that separated by less than 'minSampleDistUs' milliseconds
pkL = merge_close_sample_points( pkL, analysisArgsD['minSampleDistUs'] )
# split pkL
pkDbL,pkUsL = tuple(zip(*pkL))
#-------------------------------------------
# locate the first and last note
min_pk_idx, max_pk_idx = find_min_max_peak_index( pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )
db1 = pkDbL[ max_pk_idx ]
db0 = pkDbL[ min_pk_idx ]
pulseUsL = []
pulseDbL = []
multValL = []
for out_idx in range(128):
# calc the target volume
db = db0 + (out_idx * (db1-db0)/127.0)
multi_value_count = 0
# look for the target between each of the sampled points
for i in range(1,len(pkDbL)):
# if the target volume is between these two sample points
if pkDbL[i-1] <= db and db < pkDbL[i]:
# if the target has not already been located
if len(pulseUsL) == out_idx:
# interpolate the pulse time from between the sampled points
frac = (db - pkDbL[i-1]) / (pkDbL[i] - pkDbL[i-1])
us = pkUsL[i-1] + frac * (pkUsL[i] - pkUsL[i-1])
db = pkDbL[i-1] + frac * (pkDbL[i] - pkDbL[i-1])
pulseUsL.append(us)
pulseDbL.append(db)
else:
# this target db value was found between multiple sampled points
# therefore the sampled volume function is not monotonic
multi_value_count += 1
if multi_value_count > 0:
multValL.append((out_idx,multi_value_count))
if len(multValL) > 0:
print("Multi-value pulse locations were found during velocity table formation: ",multValL)
return pulseUsL,pulseDbL
def merge_close_sample_points( pkDbUsL, minSampleDistanceUs ):
avg0Us = np.mean(np.diff([ x[1] for x in pkDbUsL ]))
n0 = len(pkDbUsL)
while True:
us0 = None
db0 = None
for i,(db,us) in enumerate(pkDbUsL):
if i > 0 and us - us0 < minSampleDistanceUs:
us1 = (us0 + us)/2
db1 = (db0 + db)/2
pkDbUsL[i-1] = (db1,us1)
del pkDbUsL[i]
break
else:
us0 = us
db0 = db
if i+1 == len(pkDbUsL):
break
avg1Us = np.mean(np.diff([ x[1] for x in pkDbUsL ]))
print("%i sample points deleted by merging close points." % (n0 - len(pkDbUsL)))
print("Mean time between samples - before:%f after:%f " % (avg0Us,avg1Us))
print("Min time between samples: %i " % (np.min(np.diff([x[1] for x in pkDbUsL]))))
return pkDbUsL
def calc_resample_ranges( pkDbL, pkUsL, min_pk_idx, max_pk_idx, maxDeltaDb, samplePerDb ):
if min_pk_idx == 0:
print("No silent notes were generated. Decrease the minimum peak level or the hold voltage.")
return None
resampleUsSet = set()
refPkDb = pkDbL[min_pk_idx]
#pkDbL = pkDbL[ pkIdxL ]
for i in range( min_pk_idx, max_pk_idx+1 ):
d = pkDbL[i] - pkDbL[i-1]
usL = []
# if this peak is less than maxDeltaDb above the previous pk or
# it is below the previous max peak
if d > maxDeltaDb or d <= 0 or pkDbL[i] < refPkDb:
sampleCnt = max(int(round(abs(d) * samplePerDb)),samplePerDb)
dUs = int(round((pkUsL[i] - pkUsL[i-1])/sampleCnt))
usL = [ pkUsL[i-1] + dUs*j for j in range(sampleCnt)]
if i + 1 < len(pkDbL):
d = pkDbL[i+1] - pkDbL[i]
sampleCnt = max(int(round(abs(d) * samplePerDb)),samplePerDb)
dUs = int(round((pkUsL[i+1] - pkUsL[i])/sampleCnt))
usL += [ pkUsL[i] + dUs*j for j in range(sampleCnt)]
if pkDbL[i] > refPkDb:
refPkDb = pkDbL[i]
if usL:
resampleUsSet = resampleUsSet.union( usL )
return resampleUsSet
def form_resample_pulse_time_list( inDir, analysisArgsD ):
"""" This function merges all available data from previous takes to form
a new list of pulse times to sample.
"""
# the last folder is always the midi pitch of the note under analysis
midi_pitch = int( inDir.split("/")[-1] )
dirL = os.listdir(inDir)
pkL = []
# for each take in this directory
for idir in dirL:
take_number = int(idir)
# analyze this takes audio and locate the note peaks
r = rms_analysis_main( os.path.join(inDir,idir), midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
# store the peaks in pkL[ (db,us) ]
for db,us in zip(r.pkDbL,r.pkUsL):
pkL.append( (db,us) )
# sort the peaks on increasing attack pulse microseconds
pkL = sorted( pkL, key= lambda x: x[1] )
# merge sample points that separated by less than 'minSampleDistUs' milliseconds
pkL = merge_close_sample_points( pkL, analysisArgsD['minSampleDistUs'] )
# split pkL
pkDbL,pkUsL = tuple(zip(*pkL))
# locate the first and last note
min_pk_idx, max_pk_idx = find_min_max_peak_index( pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )
# estimate the microsecond locations to resample
resampleUsSet = calc_resample_ranges( pkDbL, pkUsL, min_pk_idx, max_pk_idx, analysisArgsD['maxDeltaDb'], analysisArgsD['samplesPerDb'] )
resampleUsL = sorted( list(resampleUsSet) )
return resampleUsL, pkDbL, pkUsL
def plot_resample_pulse_times( inDir, analysisArgsD ):
newPulseUsL, rmsDbV, pulseUsL = form_resample_pulse_time_list( inDir, analysisArgsD )
midi_pitch = int( inDir.split("/")[-1] )
velTblUsL,velTblDbL = form_final_pulse_list( inDir, midi_pitch, analysisArgsD, take_id=None )
fig,ax = plt.subplots()
ax.plot(pulseUsL,rmsDbV )
for us in newPulseUsL:
ax.axvline( x = us )
ax.plot(velTblUsL,velTblDbL,marker='.',linestyle='None')
plt.show()
def find_min_max_peak_index( pkDbL, minDb, maxDbOffs ):
"""
Find the min db and max db peak.
"""
# select only the peaks from rmsV[] to work with
yV = rmsV[ pkIdxL ]
yV = pkDbL
# get the max volume note
max_i = np.argmax( yV )
@ -37,10 +292,16 @@ def find_min_max_peak_index( rmsV, pkIdxL, minDb, maxDbOffs=0.5 ):
min_i = i
assert( min_i < max_i )
if min_i == 0:
print("No silent notes were generated. Decrease the minimum peak level or the hold voltage.")
return min_i, max_i
def find_skip_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx ):
""" Fine peaks associated with longer attacks pulses that are lower than peaks with a shorter attack pulse.
These peaks indicate degenerate portions of the pulse/db curve which must be skipped during velocity table formation
"""
skipPkIdxL = []
yV = rmsV[pkIdxL]
refPkDb = yV[min_pk_idx]
@ -54,131 +315,22 @@ def find_skip_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx ):
return skipPkIdxL
def calc_harm_bins( srate, binHz, midiPitch, harmN ):
def find_out_of_range_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx, maxDeltaDb ):
""" Locate peaks which are more than maxDeltaDb from the previous peak.
If two peaks are separated by more than maxDeltaDb then the range must be resampled
"""
semi_tone = 1.0/12
quarter_tone = 1.0/24
eigth_tone = 1.0/48
band_width_st = 3.0/48 # 3/8 tone
fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
oorPkIdxL = []
yV = rmsV[pkIdxL]
for i in range(len(fund_m_binL)):
if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
fund_l_binL[i] = fund_m_binL[i] - 1
for i in range( min_pk_idx, max_pk_idx+1 ):
if i > 0:
d = yV[i] - yV[i-1]
if d > maxDeltaDb or d < 0:
oorPkIdxL.append(i)
if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
fund_u_binL[i] = fund_m_binL[i] + 1
return fund_l_binL, fund_m_binL, fund_u_binL
def rms_to_db( xV, rms_srate, refWndMs ):
dbWndN = int(round(refWndMs * rms_srate / 1000.0))
dbRef = ref = np.mean(xV[0:dbWndN])
rmsDbV = 20.0 * np.log10( xV / dbRef )
return oorPkIdxL
return rmsDbV
def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
xN = xV.shape[0]
yN = int(((xN - wndSmpN) / hopSmpN) + 1)
assert( yN > 0)
yV = np.zeros( (yN, ) )
assert( wndSmpN > 1 )
i = 0
j = 0
while i < xN and j < yN:
if i == 0:
yV[j] = np.sqrt(xV[0]*xV[0])
elif i < wndSmpN:
yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
else:
yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
i += hopSmpN
j += 1
rms_srate = srate / hopSmpN
return rms_to_db( yV, rms_srate, refWndMs ), rms_srate
def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
specHopIdx = int(round( spectrumIdx ))
specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
mV = np.zeros((xM.shape[1]))
for i in range(xM.shape[1]):
mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
rms_srate = srate / hopSmpN
mV = rms_to_db( mV, rms_srate, refWndMs )
return mV, rms_srate, specV, specHopIdx, binHz
def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
rmsV = np.zeros((xM.shape[1],))
for i in range(xM.shape[1]):
mV = np.sqrt(np.abs(xM[:,i]))
pV = np.zeros((len(harmLBinL,)))
for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
pV[j] = np.max(mV[b0i:b1i])
rmsV[i] = np.mean( sorted(pV)[-harmN:] )
rms_srate = srate / hopSmpN
rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs )
return rmsV, rms_srate, binHz
def locate_peak_indexes( xV, xV_srate, eventMsL ):
pkIdxL = []
for begMs, endMs in eventMsL:
begSmpIdx = int(begMs * xV_srate / 1000.0)
endSmpIdx = int(endMs * xV_srate / 1000.0)
pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
return pkIdxL
def plot_spectrum( ax, srate, binHz, specV, midiPitch, harmN ):
@ -249,68 +401,82 @@ def plot_spectral_ranges( inDir, pitchL, rmsWndMs=300, rmsHopMs=30, harmN=5, dbR
def do_td_plot( inDir ):
rmsWndMs = 300
rmsHopMs = 30
dbRefWndMs = 500
harmCandN = 5
harmN = 3
minAttkDb = 5.0
seqFn = os.path.join( inDir, "seq.json")
audioFn = os.path.join( inDir, "audio.wav")
midiPitch = int(inDir.split("/")[-1])
def td_plot( ax, inDir, midi_pitch, id, analysisArgsD ):
with open( seqFn, "rb") as f:
r = json.load(f)
r = rms_analysis_main( inDir, midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
min_pk_idx, max_pk_idx = find_min_max_peak_index( r.pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )
skipPkIdxL = find_skip_peaks( r.rmsDbV, r.pkIdxL, min_pk_idx, max_pk_idx )
jmpPkIdxL = find_out_of_range_peaks( r.rmsDbV, r.pkIdxL, min_pk_idx, max_pk_idx, analysisArgsD['maxDeltaDb'] )
secV = np.arange(0,len(r.rmsDbV)) / r.rms_srate
srate, signalM = wavfile.read(audioFn)
sigV = signalM / float(0x7fff)
rms0DbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
ax.plot( secV, r.rmsDbV )
ax.plot( np.arange(0,len(r.tdRmsDbV)) / r.rms_srate, r.tdRmsDbV, color="black" )
rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midiPitch, harmCandN, harmN )
pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
min_pk_idx, max_pk_idx = find_min_max_peak_index( rmsDbV, pkIdxL, minAttkDb )
skipPkIdxL = find_skip_peaks( rmsDbV, pkIdxL, min_pk_idx, max_pk_idx )
fig,ax = plt.subplots()
fig.set_size_inches(18.5, 10.5, forward=True)
secV = np.arange(0,len(rmsDbV)) / rms_srate
ax.plot( secV, rmsDbV )
ax.plot( np.arange(0,len(rms0DbV)) / rms0_srate, rms0DbV, color="black" )
# print beg/end boundaries
for i,(begMs, endMs) in enumerate(r['eventTimeL']):
for i,(begMs, endMs) in enumerate(r.eventTimeL):
ax.axvline( x=begMs/1000.0, color="green")
ax.axvline( x=endMs/1000.0, color="red")
ax.text(begMs/1000.0, 20.0, str(i) )
return
# plot peak markers
for i,pki in enumerate(pkIdxL):
marker = "o" if i==min_pk_idx or i==max_pk_idx else "."
for i,pki in enumerate(r.pkIdxL):
marker = 4 if i==min_pk_idx or i==max_pk_idx else 5
color = "red" if i in skipPkIdxL else "black"
ax.plot( [pki / rms_srate], [ rmsDbV[pki] ], marker=marker, color=color)
ax.plot( [pki / r.rms_srate], [ r.rmsDbV[pki] ], marker=marker, color=color)
if i in jmpPkIdxL:
ax.plot( [pki / r.rms_srate], [ r.rmsDbV[pki] ], marker=6, color="blue")
plt.show()
def do_td_plot( inDir, analysisArgs ):
fig,ax = plt.subplots()
fig.set_size_inches(18.5, 10.5, forward=True)
id = int(inDir.split("/")[-1])
midi_pitch = int(inDir.split("/")[-2])
td_plot(ax,inDir,midi_pitch,id,analysisArgs)
plt.show()
def do_td_multi_plot( inDir, analysisArgs ):
midi_pitch = int(inDir.split("/")[-1])
dirL = os.listdir(inDir)
fig,axL = plt.subplots(len(dirL),1)
for id,(idir,ax) in enumerate(zip(dirL,axL)):
td_plot(ax, os.path.join(inDir,str(id)), midi_pitch, id, analysisArgs )
plt.show()
if __name__ == "__main__":
inDir = sys.argv[1]
cfgFn = sys.argv[2]
do_td_plot(inDir)
cfg = parse_yaml_cfg( cfgFn )
#do_td_plot(inDir,cfg.analysisArgs)
#o_td_multi_plot(inDir,cfg.analysisArgs)
#plot_spectral_ranges( inDir, [ 24, 36, 48, 60, 72, 84, 96, 104] )
plot_resample_pulse_times( inDir, cfg.analysisArgs )

168
rms_analysis.py Normal file
View File

@ -0,0 +1,168 @@
import os,types,json
from scipy.io import wavfile
from scipy.signal import stft
import numpy as np
def calc_harm_bins( srate, binHz, midiPitch, harmN ):
semi_tone = 1.0/12
quarter_tone = 1.0/24
eigth_tone = 1.0/48
band_width_st = 3.0/48 # 3/8 tone
fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
for i in range(len(fund_m_binL)):
if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
fund_l_binL[i] = fund_m_binL[i] - 1
if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
fund_u_binL[i] = fund_m_binL[i] + 1
return fund_l_binL, fund_m_binL, fund_u_binL
def rms_to_db( xV, rms_srate, refWndMs ):
dbWndN = int(round(refWndMs * rms_srate / 1000.0))
dbRef = ref = np.mean(xV[0:dbWndN])
rmsDbV = 20.0 * np.log10( xV / dbRef )
return rmsDbV
def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
xN = xV.shape[0]
yN = int(((xN - wndSmpN) / hopSmpN) + 1)
assert( yN > 0)
yV = np.zeros( (yN, ) )
assert( wndSmpN > 1 )
i = 0
j = 0
while i < xN and j < yN:
if i == 0:
yV[j] = np.sqrt(xV[0]*xV[0])
elif i < wndSmpN:
yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
else:
yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
i += hopSmpN
j += 1
rms_srate = srate / hopSmpN
return rms_to_db( yV, rms_srate, refWndMs ), rms_srate
def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
specHopIdx = int(round( spectrumIdx ))
specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
mV = np.zeros((xM.shape[1]))
for i in range(xM.shape[1]):
mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
rms_srate = srate / hopSmpN
mV = rms_to_db( mV, rms_srate, refWndMs )
return mV, rms_srate, specV, specHopIdx, binHz
def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
rmsV = np.zeros((xM.shape[1],))
for i in range(xM.shape[1]):
mV = np.sqrt(np.abs(xM[:,i]))
pV = np.zeros((len(harmLBinL,)))
for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
pV[j] = np.max(mV[b0i:b1i])
rmsV[i] = np.mean( sorted(pV)[-harmN:] )
rms_srate = srate / hopSmpN
rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs )
return rmsV, rms_srate, binHz
def locate_peak_indexes( xV, xV_srate, eventMsL ):
pkIdxL = []
for begMs, endMs in eventMsL:
begSmpIdx = int(begMs * xV_srate / 1000.0)
endSmpIdx = int(endMs * xV_srate / 1000.0)
pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
return pkIdxL
def rms_analysis_main( inDir, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3 ):
seqFn = os.path.join( inDir, "seq.json")
audioFn = os.path.join( inDir, "audio.wav")
with open( seqFn, "rb") as f:
r = json.load(f)
srate, signalM = wavfile.read(audioFn)
sigV = signalM / float(0x7fff)
tdRmsDbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midi_pitch, harmCandN, harmN )
pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
r = types.SimpleNamespace(**{
"audio_srate":srate,
"tdRmsDbV": tdRmsDbV,
"binHz": binHz,
"rmsDbV":rmsDbV,
"rms_srate":rms_srate,
"pkIdxL":pkIdxL, # pkIdxL[ len(pulsUsL) ] - indexes into rmsDbV[] of peaks
#"min_pk_idx":min_pk_idx,
#"max_pk_idx":max_pk_idx,
"eventTimeL":r['eventTimeL'],
'pkDbL': [ rmsDbV[ i ] for i in pkIdxL ],
'pkUsL':r['pulseUsL'] })
return r