First working version.

This commit is contained in:
kpl 2019-09-01 10:54:09 -04:00
parent ac9061a72d
commit f69c71c498
7 changed files with 672 additions and 239 deletions

View File

@ -181,6 +181,14 @@ class AudioDevice(object):
return Result(smpN) return Result(smpN)
def buffer_sample_ms( self ):
r = self.buffer_sample_count()
if r:
r.value = int(r.value * 1000.0 / self.srate)
return r
def linear_buffer( self ): def linear_buffer( self ):
smpN = self.buffer_sample_count() smpN = self.buffer_sample_count()

17
common.py Normal file
View File

@ -0,0 +1,17 @@
import yaml,types
def parse_yaml_cfg( fn ):
"""Parse the YAML configuration file."""
cfg = None
with open(fn,"r") as f:
cfgD = yaml.load(f, Loader=yaml.FullLoader)
cfg = types.SimpleNamespace(**cfgD['p_ac'])
return cfg

View File

@ -4,7 +4,7 @@ from shutil import copyfile
def event_times( eventTimeFn ): def event_times( eventTimeFn ):
eventL = [] eventL = []
velL = []
with open(eventTimeFn,"r") as f: with open(eventTimeFn,"r") as f:
rdr = csv.reader(f) rdr = csv.reader(f)
@ -14,21 +14,25 @@ def event_times( eventTimeFn ):
beginMs = int(row[1]) beginMs = int(row[1])
elif row[0] == 'key_down': elif row[0] == 'key_down':
key_downMs = int(row[1]) - beginMs key_downMs = int(row[1]) - beginMs
vel = int(row[3])
elif row[0] == 'key_up': elif row[0] == 'key_up':
key_upMs = row[1] key_upMs = row[1]
eventL.append( [ key_downMs, key_downMs+1000 ] ) eventL.append( [ key_downMs, key_downMs+1000 ] )
velL.append( vel )
return eventL return eventL,velL
def pulse_lengths( pulseLenFn ): def pulse_lengths( pulseLenFn, velL ):
with open(pulseLenFn,'rb') as f: with open(pulseLenFn,'rb') as f:
d = pickle.load(f) d = pickle.load(f)
msL = d['msL'] msL = d['msL']
# note: first posn in table is a multiplier # note: first posn in table is a multiplier
return [ msL[i]*msL[0] for i in range(1,len(msL))] msL = [ msL[i]*msL[0] for i in range(1,len(msL))]
usL = [ msL[vel-1] for vel in velL ]
return usL
def convert( inDir, outDir ): def convert( inDir, outDir ):
@ -41,16 +45,24 @@ def convert( inDir, outDir ):
if os.path.isdir(idir): if os.path.isdir(idir):
eventTimeFn = os.path.join( idir, "labels_0.csv" ) id = 0
while True:
eventTimeL = event_times(eventTimeFn) eventTimeFn = os.path.join( idir, "labels_%i.csv" % (id) )
pulseTimeFn = os.path.join( idir, "table_0.pickle") if not os.path.isfile( eventTimeFn ):
break
pulseUsL = pulse_lengths( pulseTimeFn ) eventTimeL,velL = event_times(eventTimeFn)
pulseTimeFn = os.path.join( idir, "table_%i.pickle" % (id))
pulseUsL = pulse_lengths( pulseTimeFn, velL )
pitch = idir.split("/")[-1] pitch = idir.split("/")[-1]
if not pitch.isdigit():
break
d = { d = {
"pulseUsL":pulseUsL, "pulseUsL":pulseUsL,
@ -66,15 +78,24 @@ def convert( inDir, outDir ):
if not os.path.isdir(odir): if not os.path.isdir(odir):
os.mkdir(odir) os.mkdir(odir)
odir = os.path.join( odir, "%i" % (id) )
if not os.path.isdir(odir):
os.mkdir(odir)
with open(os.path.join( odir, "seq.json" ),"w") as f: with open(os.path.join( odir, "seq.json" ),"w") as f:
f.write(json.dumps( d )) f.write(json.dumps( d ))
copyfile( os.path.join(idir,"audio_0.wav"), os.path.join(odir,"audio.wav")) copyfile( os.path.join(idir,"audio_%i.wav" % (id)), os.path.join(odir,"audio.wav"))
id += 1
if __name__ == "__main__": if __name__ == "__main__":
inDir = "/home/kevin/temp/picadae_ac_2/full_map" inDir = "/home/kevin/temp/picadae_ac_2/full_map"
outDir = "/home/kevin/temp/p_ac_3_cvt" outDir = "/home/kevin/temp/p_ac_3_cvt/full_map"
#inDir = "/home/kevin/temp/picadae_ac_2/week_0"
#outDir = "/home/kevin/temp/p_ac_3_cvt/week_0"
convert( inDir, outDir ) convert( inDir, outDir )

101
p_ac.py
View File

@ -1,4 +1,4 @@
import sys,os,argparse,yaml,types,logging,select,time,json import sys,os,argparse,types,logging,select,time,json
from datetime import datetime from datetime import datetime
import multiprocessing import multiprocessing
@ -7,6 +7,8 @@ from multiprocessing import Process, Pipe
from picadae_api import Picadae from picadae_api import Picadae
from AudioDevice import AudioDevice from AudioDevice import AudioDevice
from result import Result from result import Result
from common import parse_yaml_cfg
from plot_seq import form_resample_pulse_time_list
class AttackPulseSeq: class AttackPulseSeq:
""" Sequence a fixed chord over a list of attack pulse lengths.""" """ Sequence a fixed chord over a list of attack pulse lengths."""
@ -26,8 +28,9 @@ class AttackPulseSeq:
self.next_ms = 0 # Time of next event (note-on or note_off) self.next_ms = 0 # Time of next event (note-on or note_off)
self.eventTimeL = [] # Onset/offset time of each note [ [onset_ms,offset_ms] ] self.eventTimeL = [] # Onset/offset time of each note [ [onset_ms,offset_ms] ]
self.beginMs = 0 self.beginMs = 0
self.playOnlyFl = False
def start( self, ms, outDir, pitchL, pulseUsL ): def start( self, ms, outDir, pitchL, pulseUsL, playOnlyFl=False ):
self.outDir = outDir # directory to write audio file and results self.outDir = outDir # directory to write audio file and results
self.pitchL = pitchL # chord to play self.pitchL = pitchL # chord to play
self.pulseUsL = pulseUsL # one onset pulse length in microseconds per sequence element self.pulseUsL = pulseUsL # one onset pulse length in microseconds per sequence element
@ -35,15 +38,26 @@ class AttackPulseSeq:
self.pulse_idx = 0 self.pulse_idx = 0
self.state = 'note_on' self.state = 'note_on'
self.next_ms = ms + 500 # wait for 500ms to play the first note (this will guarantee that there is some empty space in the audio file before the first note) self.next_ms = ms + 500 # wait for 500ms to play the first note (this will guarantee that there is some empty space in the audio file before the first note)
self.eventTimeL = [[0,0]] * len(pulseUsL) # initialize the event time self.eventTimeL = [[0,0] for _ in range(len(pulseUsL))] # initialize the event time
self.beginMs = ms self.beginMs = ms
self.playOnlyFl = playOnlyFl
for pitch in pitchL:
self.api.set_pwm( pitch, self.holdDutyPct )
if not playOnlyFl:
self.audio.record_enable(True) # start recording audio self.audio.record_enable(True) # start recording audio
self.tick(ms) # play the first note self.tick(ms) # play the first note
def stop(self, ms): def stop(self, ms):
self._send_note_off() # be sure that all notes are actually turn-off self._send_note_off() # be sure that all notes are actually turn-off
if not self.playOnlyFl:
self.audio.record_enable(False) # stop recording audio self.audio.record_enable(False) # stop recording audio
self._disable() # disable this sequencer self._disable() # disable this sequencer
if not self.playOnlyFl:
self._write() # write the results self._write() # write the results
def is_enabled(self): def is_enabled(self):
@ -75,7 +89,8 @@ class AttackPulseSeq:
def _note_on( self, ms ): def _note_on( self, ms ):
self.eventTimeL[ self.pulse_idx ][0] = ms - self.beginMs #self.eventTimeL[ self.pulse_idx ][0] = ms - self.beginMs
self.eventTimeL[ self.pulse_idx ][0] = self.audio.buffer_sample_ms().value
self.next_ms = ms + self.noteDurMs self.next_ms = ms + self.noteDurMs
self.state = 'note_off' self.state = 'note_off'
@ -84,7 +99,8 @@ class AttackPulseSeq:
print("note-on:",pitch,self.pulse_idx) print("note-on:",pitch,self.pulse_idx)
def _note_off( self, ms ): def _note_off( self, ms ):
self.eventTimeL[ self.pulse_idx ][1] = ms - self.beginMs #self.eventTimeL[ self.pulse_idx ][1] = ms - self.beginMs
self.eventTimeL[ self.pulse_idx ][1] = self.audio.buffer_sample_ms().value
self.next_ms = ms + self.pauseDurMs self.next_ms = ms + self.pauseDurMs
self.state = 'note_on' self.state = 'note_on'
@ -130,19 +146,17 @@ class CalibrateKeys:
self.cfg = cfg self.cfg = cfg
self.seq = AttackPulseSeq( audioDev, api, noteDurMs=1000, pauseDurMs=1000, holdDutyPct=50 ) self.seq = AttackPulseSeq( audioDev, api, noteDurMs=1000, pauseDurMs=1000, holdDutyPct=50 )
self.label = None
self.pulseUsL = None self.pulseUsL = None
self.chordL = None self.chordL = None
self.pitch_idx = -1 self.pitch_idx = -1
def start( self, ms, label, chordL, pulseUsL ): def start( self, ms, chordL, pulseUsL, playOnlyFl=False ):
if len(chordL) > 0: if len(chordL) > 0:
self.label = label
self.pulseUsL = pulseUsL self.pulseUsL = pulseUsL
self.chordL = chordL self.chordL = chordL
self.pitch_idx = -1 self.pitch_idx = -1
self._start_next_chord( ms ) self._start_next_chord( ms, playOnlyFl )
def stop( self, ms ): def stop( self, ms ):
@ -158,11 +172,12 @@ class CalibrateKeys:
# if the sequencer is done playing # if the sequencer is done playing
if not self.seq.is_enabled(): if not self.seq.is_enabled():
self._start_next_chord( ms ) # ... else start the next sequence self._start_next_chord( ms, self.seq.playOnlyFl ) # ... else start the next sequence
return None return None
def _start_next_chord( self, ms ): def _start_next_chord( self, ms, playOnlyFl ):
self.pitch_idx += 1 self.pitch_idx += 1
@ -179,14 +194,43 @@ class CalibrateKeys:
os.mkdir( outDir ) os.mkdir( outDir )
# form the output directory as "<label>_<pitch0>_<pitch1> ... " # form the output directory as "<label>_<pitch0>_<pitch1> ... "
dirStr = self.label + "_" + "_".join([ str(pitch) for pitch in pitchL ]) dirStr = "_".join([ str(pitch) for pitch in pitchL ])
outDir = os.path.join(outDir, dirStr ) outDir = os.path.join(outDir, dirStr )
# start the sequencer print(outDir)
self.seq.start( ms, outDir, pitchL, self.pulseUsL ) if not os.path.isdir(outDir):
os.mkdir(outDir)
# get the next available output directory id
outDir_id = self._calc_next_out_dir_id( outDir )
# if this is not the first time this note has been sampled then get the resample locations
if outDir_id != 0:
self.pulseUsL,_,_ = form_resample_pulse_time_list( outDir, self.cfg.analysisArgs )
if playOnlyFl:
self.pulseUsL,_ = form_final_pulse_list( outDir, pitchL[0], self.cfg.analysisArgs, take_id=None )
outDir = os.path.join( outDir, str(outDir_id) )
if not os.path.isdir(outDir):
os.mkdir(outDir)
# start the sequencer
self.seq.start( ms, outDir, pitchL, self.pulseUsL, playOnlyFl )
def _calc_next_out_dir_id( self, outDir ):
id = 0
while os.path.isdir( os.path.join(outDir,"%i" % id)):
id += 1
return id
# This is the main application API it is running in a child process. # This is the main application API it is running in a child process.
class App: class App:
@ -208,7 +252,9 @@ class App:
res = self.audioDev.setup(**cfg.audio) res = self.audioDev.setup(**cfg.audio)
if res: if not res:
self.audio_dev_list(0)
else:
self.api = Picadae( key_mapL=cfg.key_mapL) self.api = Picadae( key_mapL=cfg.key_mapL)
# wait for the letter 'a' to come back from the serial port # wait for the letter 'a' to come back from the serial port
@ -237,7 +283,11 @@ class App:
def calibrate_keys_start( self, ms, pitchRangeL ): def calibrate_keys_start( self, ms, pitchRangeL ):
chordL = [ [pitch] for pitch in range(pitchRangeL[0], pitchRangeL[1]+1)] chordL = [ [pitch] for pitch in range(pitchRangeL[0], pitchRangeL[1]+1)]
self.calibrate.start( ms, "full", chordL, cfg.full_pulseL ) self.calibrate.start( ms, chordL, cfg.full_pulseL )
def play_keys_start( self, ms, pitchRangeL ):
chordL = [ [pitch] for pitch in range(pitchRangeL[0], pitchRangeL[1]+1)]
self.calibrate.start( ms, chordL, cfg.full_pulseL, playOnlyFl=True )
def calibrate_keys_stop( self, ms ): def calibrate_keys_stop( self, ms ):
self.calibrate.stop(ms) self.calibrate.stop(ms)
@ -363,7 +413,8 @@ class Shell:
'?':{ "func":"_help", "minN":0, "maxN":0, "help":"Print usage text."}, '?':{ "func":"_help", "minN":0, "maxN":0, "help":"Print usage text."},
'a':{ "func":"audio_dev_list", "minN":0, "maxN":0, "help":"List the audio devices."}, 'a':{ "func":"audio_dev_list", "minN":0, "maxN":0, "help":"List the audio devices."},
'c':{ "func":"calibrate_keys_start", "minN":1, "maxN":2, "help":"Calibrate a range of keys. "}, 'c':{ "func":"calibrate_keys_start", "minN":1, "maxN":2, "help":"Calibrate a range of keys. "},
's':{ "func":"calibrate_keys_stop", "minN":0, "maxN":0, "help":"Stop key calibration"} 's':{ "func":"calibrate_keys_stop", "minN":0, "maxN":0, "help":"Stop key calibration"},
'p':{ "func":"play_keys_start", "minN":1, "maxN":2, "help":"Play current calibration"}
} }
def _help( self, _=None ): def _help( self, _=None ):
@ -405,7 +456,6 @@ class Shell:
if d['minN'] != -1 and (d['minN'] > len(argL) or len(argL) > d['maxN']): if d['minN'] != -1 and (d['minN'] > len(argL) or len(argL) > d['maxN']):
return self._syntaxError("Argument count mismatch. {} is out of range:{} to {}".format(len(argL),d['minN'],d['maxN'])) return self._syntaxError("Argument count mismatch. {} is out of range:{} to {}".format(len(argL),d['minN'],d['maxN']))
# call the command function # call the command function
if func: if func:
result = func(*argL) result = func(*argL)
@ -489,21 +539,6 @@ def parse_args():
return ap.parse_args() return ap.parse_args()
def parse_yaml_cfg( fn ):
"""Parse the YAML configuration file."""
cfg = None
with open(fn,"r") as f:
cfgD = yaml.load(f, Loader=yaml.FullLoader)
cfg = types.SimpleNamespace(**cfgD['p_ac'])
return cfg
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -18,13 +18,31 @@
# MeasureSeq args # MeasureSeq args
outDir: "~/temp/p_ac3", outDir: "~/temp/p_ac_3",
noteDurMs: 1000, noteDurMs: 1000,
pauseDurMs: 1000, pauseDurMs: 1000,
holdDutyPct: 50, holdDutyPct: 50,
#full_pulseL: [ 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 8000, 9000, 10000, 12000, 14000, 18000, 22000, 26000, 30000, 34000, 40000], full_pulse0L: [ 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 8000, 9000, 10000, 12000, 14000, 18000, 22000, 26000, 30000, 34000, 40000],
full_pulseL: [ 18000, 22000, 26000, 30000, 34000, 40000], full_pulse1L: [ 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 20000, 22000, 24000, 26000, 30000, 32000, 34000, 36000, 40000],
full_pulseL: [ 10000, 10500, 11000, 11500, 12000, 12500, 13000, 13500, 14000, 14500, 15000, 15500, 16000, 16500, 17000, 17500, 18000, 18500, 20000, 22000, 24000, 26000, 30000, 32000, 34000, 36000, 40000],
# RMS analysis args
analysisArgs: {
rmsAnalysisArgs: {
rmsWndMs: 300, # length of the RMS measurment window
rmsHopMs: 30, # RMS measurement inter window distance
dbRefWndMs: 500, # length of initial portion of signal to use to calculate the dB reference level
harmCandN: 5, # count of harmonic candidates to locate during harmonic based RMS analysis
harmN: 3, # count of harmonics to use to calculate harmonic based RMS analysis
},
minAttkDb: 5.0, # threshold of silence level
maxDbOffset: 0.5, # travel down the from the max. note level by at most this amount to locate the max. peak
maxDeltaDb: 2.0, # maximum db change between volume samples (changes greater than this will trigger resampling)
samplesPerDb: 4, # count of samples per dB to resample ranges whose range is less than maxDeltaDb
minSampleDistUs: 500 # minimum distance between sample points in microseconds
},
key_mapL: [ key_mapL: [

View File

@ -1,8 +1,8 @@
import os, sys, json import os, sys
from scipy.io import wavfile
from scipy.signal import stft
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from common import parse_yaml_cfg
from rms_analysis import rms_analysis_main
def is_nanV( xV ): def is_nanV( xV ):
@ -12,10 +12,265 @@ def is_nanV( xV ):
return False return False
def find_min_max_peak_index( rmsV, pkIdxL, minDb, maxDbOffs=0.5 ): def _find_max_take_id( inDir ):
id = 0
while os.path.isdir( os.path.join(inDir, "%i" % id) ):
id += 1
if id > 0:
id -= 1
return id
def form_final_pulse_list( inDir, midi_pitch, analysisArgsD, take_id=None ):
# append the midi pitch to the input directory
#inDir = os.path.join( inDir, "%i" % (midi_pitch))
if False:
# determine the take id if none was given
if take_id is None:
take_id = _find_max_take_id( inDir )
inDir = os.path.join(inDir,"%i" % (take_id))
assert( os.path.isdir(inDir))
# analyze the requested take audio
r = rms_analysis_main( inDir, midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
pkL = []
# store the peaks in pkL[ (db,us) ]
for db,us in zip(r.pkDbL,r.pkUsL):
pkL.append( (db,us) )
# sort the peaks on increasing attack pulse microseconds
pkL = sorted( pkL, key= lambda x: x[1] )
# split pkL
pkDbL,pkUsL = tuple(zip(*pkL))
dirL = os.listdir(inDir)
pkL = []
# for each take in this directory
for idir in dirL:
take_number = int(idir)
# analyze this takes audio and locate the note peaks
r = rms_analysis_main( os.path.join(inDir,idir), midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
# store the peaks in pkL[ (db,us) ]
for db,us in zip(r.pkDbL,r.pkUsL):
pkL.append( (db,us) )
# sort the peaks on increasing attack pulse microseconds
pkL = sorted( pkL, key= lambda x: x[1] )
# merge sample points that separated by less than 'minSampleDistUs' milliseconds
pkL = merge_close_sample_points( pkL, analysisArgsD['minSampleDistUs'] )
# split pkL
pkDbL,pkUsL = tuple(zip(*pkL))
#-------------------------------------------
# locate the first and last note
min_pk_idx, max_pk_idx = find_min_max_peak_index( pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )
db1 = pkDbL[ max_pk_idx ]
db0 = pkDbL[ min_pk_idx ]
pulseUsL = []
pulseDbL = []
multValL = []
for out_idx in range(128):
# calc the target volume
db = db0 + (out_idx * (db1-db0)/127.0)
multi_value_count = 0
# look for the target between each of the sampled points
for i in range(1,len(pkDbL)):
# if the target volume is between these two sample points
if pkDbL[i-1] <= db and db < pkDbL[i]:
# if the target has not already been located
if len(pulseUsL) == out_idx:
# interpolate the pulse time from between the sampled points
frac = (db - pkDbL[i-1]) / (pkDbL[i] - pkDbL[i-1])
us = pkUsL[i-1] + frac * (pkUsL[i] - pkUsL[i-1])
db = pkDbL[i-1] + frac * (pkDbL[i] - pkDbL[i-1])
pulseUsL.append(us)
pulseDbL.append(db)
else:
# this target db value was found between multiple sampled points
# therefore the sampled volume function is not monotonic
multi_value_count += 1
if multi_value_count > 0:
multValL.append((out_idx,multi_value_count))
if len(multValL) > 0:
print("Multi-value pulse locations were found during velocity table formation: ",multValL)
return pulseUsL,pulseDbL
def merge_close_sample_points( pkDbUsL, minSampleDistanceUs ):
avg0Us = np.mean(np.diff([ x[1] for x in pkDbUsL ]))
n0 = len(pkDbUsL)
while True:
us0 = None
db0 = None
for i,(db,us) in enumerate(pkDbUsL):
if i > 0 and us - us0 < minSampleDistanceUs:
us1 = (us0 + us)/2
db1 = (db0 + db)/2
pkDbUsL[i-1] = (db1,us1)
del pkDbUsL[i]
break
else:
us0 = us
db0 = db
if i+1 == len(pkDbUsL):
break
avg1Us = np.mean(np.diff([ x[1] for x in pkDbUsL ]))
print("%i sample points deleted by merging close points." % (n0 - len(pkDbUsL)))
print("Mean time between samples - before:%f after:%f " % (avg0Us,avg1Us))
print("Min time between samples: %i " % (np.min(np.diff([x[1] for x in pkDbUsL]))))
return pkDbUsL
def calc_resample_ranges( pkDbL, pkUsL, min_pk_idx, max_pk_idx, maxDeltaDb, samplePerDb ):
if min_pk_idx == 0:
print("No silent notes were generated. Decrease the minimum peak level or the hold voltage.")
return None
resampleUsSet = set()
refPkDb = pkDbL[min_pk_idx]
#pkDbL = pkDbL[ pkIdxL ]
for i in range( min_pk_idx, max_pk_idx+1 ):
d = pkDbL[i] - pkDbL[i-1]
usL = []
# if this peak is less than maxDeltaDb above the previous pk or
# it is below the previous max peak
if d > maxDeltaDb or d <= 0 or pkDbL[i] < refPkDb:
sampleCnt = max(int(round(abs(d) * samplePerDb)),samplePerDb)
dUs = int(round((pkUsL[i] - pkUsL[i-1])/sampleCnt))
usL = [ pkUsL[i-1] + dUs*j for j in range(sampleCnt)]
if i + 1 < len(pkDbL):
d = pkDbL[i+1] - pkDbL[i]
sampleCnt = max(int(round(abs(d) * samplePerDb)),samplePerDb)
dUs = int(round((pkUsL[i+1] - pkUsL[i])/sampleCnt))
usL += [ pkUsL[i] + dUs*j for j in range(sampleCnt)]
if pkDbL[i] > refPkDb:
refPkDb = pkDbL[i]
if usL:
resampleUsSet = resampleUsSet.union( usL )
return resampleUsSet
def form_resample_pulse_time_list( inDir, analysisArgsD ):
"""" This function merges all available data from previous takes to form
a new list of pulse times to sample.
"""
# the last folder is always the midi pitch of the note under analysis
midi_pitch = int( inDir.split("/")[-1] )
dirL = os.listdir(inDir)
pkL = []
# for each take in this directory
for idir in dirL:
take_number = int(idir)
# analyze this takes audio and locate the note peaks
r = rms_analysis_main( os.path.join(inDir,idir), midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
# store the peaks in pkL[ (db,us) ]
for db,us in zip(r.pkDbL,r.pkUsL):
pkL.append( (db,us) )
# sort the peaks on increasing attack pulse microseconds
pkL = sorted( pkL, key= lambda x: x[1] )
# merge sample points that separated by less than 'minSampleDistUs' milliseconds
pkL = merge_close_sample_points( pkL, analysisArgsD['minSampleDistUs'] )
# split pkL
pkDbL,pkUsL = tuple(zip(*pkL))
# locate the first and last note
min_pk_idx, max_pk_idx = find_min_max_peak_index( pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )
# estimate the microsecond locations to resample
resampleUsSet = calc_resample_ranges( pkDbL, pkUsL, min_pk_idx, max_pk_idx, analysisArgsD['maxDeltaDb'], analysisArgsD['samplesPerDb'] )
resampleUsL = sorted( list(resampleUsSet) )
return resampleUsL, pkDbL, pkUsL
def plot_resample_pulse_times( inDir, analysisArgsD ):
newPulseUsL, rmsDbV, pulseUsL = form_resample_pulse_time_list( inDir, analysisArgsD )
midi_pitch = int( inDir.split("/")[-1] )
velTblUsL,velTblDbL = form_final_pulse_list( inDir, midi_pitch, analysisArgsD, take_id=None )
fig,ax = plt.subplots()
ax.plot(pulseUsL,rmsDbV )
for us in newPulseUsL:
ax.axvline( x = us )
ax.plot(velTblUsL,velTblDbL,marker='.',linestyle='None')
plt.show()
def find_min_max_peak_index( pkDbL, minDb, maxDbOffs ):
"""
Find the min db and max db peak.
"""
# select only the peaks from rmsV[] to work with # select only the peaks from rmsV[] to work with
yV = rmsV[ pkIdxL ] yV = pkDbL
# get the max volume note # get the max volume note
max_i = np.argmax( yV ) max_i = np.argmax( yV )
@ -38,9 +293,15 @@ def find_min_max_peak_index( rmsV, pkIdxL, minDb, maxDbOffs=0.5 ):
assert( min_i < max_i ) assert( min_i < max_i )
if min_i == 0:
print("No silent notes were generated. Decrease the minimum peak level or the hold voltage.")
return min_i, max_i return min_i, max_i
def find_skip_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx ): def find_skip_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx ):
""" Fine peaks associated with longer attacks pulses that are lower than peaks with a shorter attack pulse.
These peaks indicate degenerate portions of the pulse/db curve which must be skipped during velocity table formation
"""
skipPkIdxL = [] skipPkIdxL = []
yV = rmsV[pkIdxL] yV = rmsV[pkIdxL]
refPkDb = yV[min_pk_idx] refPkDb = yV[min_pk_idx]
@ -54,131 +315,22 @@ def find_skip_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx ):
return skipPkIdxL return skipPkIdxL
def find_out_of_range_peaks( rmsV, pkIdxL, min_pk_idx, max_pk_idx, maxDeltaDb ):
""" Locate peaks which are more than maxDeltaDb from the previous peak.
If two peaks are separated by more than maxDeltaDb then the range must be resampled
"""
def calc_harm_bins( srate, binHz, midiPitch, harmN ): oorPkIdxL = []
yV = rmsV[pkIdxL]
semi_tone = 1.0/12 for i in range( min_pk_idx, max_pk_idx+1 ):
quarter_tone = 1.0/24 if i > 0:
eigth_tone = 1.0/48 d = yV[i] - yV[i-1]
band_width_st = 3.0/48 # 3/8 tone if d > maxDeltaDb or d < 0:
oorPkIdxL.append(i)
fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12)) return oorPkIdxL
fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
for i in range(len(fund_m_binL)):
if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
fund_l_binL[i] = fund_m_binL[i] - 1
if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
fund_u_binL[i] = fund_m_binL[i] + 1
return fund_l_binL, fund_m_binL, fund_u_binL
def rms_to_db( xV, rms_srate, refWndMs ):
dbWndN = int(round(refWndMs * rms_srate / 1000.0))
dbRef = ref = np.mean(xV[0:dbWndN])
rmsDbV = 20.0 * np.log10( xV / dbRef )
return rmsDbV
def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
xN = xV.shape[0]
yN = int(((xN - wndSmpN) / hopSmpN) + 1)
assert( yN > 0)
yV = np.zeros( (yN, ) )
assert( wndSmpN > 1 )
i = 0
j = 0
while i < xN and j < yN:
if i == 0:
yV[j] = np.sqrt(xV[0]*xV[0])
elif i < wndSmpN:
yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
else:
yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
i += hopSmpN
j += 1
rms_srate = srate / hopSmpN
return rms_to_db( yV, rms_srate, refWndMs ), rms_srate
def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
specHopIdx = int(round( spectrumIdx ))
specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
mV = np.zeros((xM.shape[1]))
for i in range(xM.shape[1]):
mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
rms_srate = srate / hopSmpN
mV = rms_to_db( mV, rms_srate, refWndMs )
return mV, rms_srate, specV, specHopIdx, binHz
def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
rmsV = np.zeros((xM.shape[1],))
for i in range(xM.shape[1]):
mV = np.sqrt(np.abs(xM[:,i]))
pV = np.zeros((len(harmLBinL,)))
for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
pV[j] = np.max(mV[b0i:b1i])
rmsV[i] = np.mean( sorted(pV)[-harmN:] )
rms_srate = srate / hopSmpN
rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs )
return rmsV, rms_srate, binHz
def locate_peak_indexes( xV, xV_srate, eventMsL ):
pkIdxL = []
for begMs, endMs in eventMsL:
begSmpIdx = int(begMs * xV_srate / 1000.0)
endSmpIdx = int(endMs * xV_srate / 1000.0)
pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
return pkIdxL
def plot_spectrum( ax, srate, binHz, specV, midiPitch, harmN ): def plot_spectrum( ax, srate, binHz, specV, midiPitch, harmN ):
@ -249,68 +401,82 @@ def plot_spectral_ranges( inDir, pitchL, rmsWndMs=300, rmsHopMs=30, harmN=5, dbR
def do_td_plot( inDir ): def td_plot( ax, inDir, midi_pitch, id, analysisArgsD ):
rmsWndMs = 300
rmsHopMs = 30
dbRefWndMs = 500
harmCandN = 5
harmN = 3
minAttkDb = 5.0
seqFn = os.path.join( inDir, "seq.json") r = rms_analysis_main( inDir, midi_pitch, **analysisArgsD['rmsAnalysisArgs'] )
audioFn = os.path.join( inDir, "audio.wav")
midiPitch = int(inDir.split("/")[-1])
min_pk_idx, max_pk_idx = find_min_max_peak_index( r.pkDbL, analysisArgsD['minAttkDb'], analysisArgsD['maxDbOffset'] )
with open( seqFn, "rb") as f: skipPkIdxL = find_skip_peaks( r.rmsDbV, r.pkIdxL, min_pk_idx, max_pk_idx )
r = json.load(f)
jmpPkIdxL = find_out_of_range_peaks( r.rmsDbV, r.pkIdxL, min_pk_idx, max_pk_idx, analysisArgsD['maxDeltaDb'] )
srate, signalM = wavfile.read(audioFn) secV = np.arange(0,len(r.rmsDbV)) / r.rms_srate
sigV = signalM / float(0x7fff)
rms0DbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs ) ax.plot( secV, r.rmsDbV )
ax.plot( np.arange(0,len(r.tdRmsDbV)) / r.rms_srate, r.tdRmsDbV, color="black" )
rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midiPitch, harmCandN, harmN )
pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
min_pk_idx, max_pk_idx = find_min_max_peak_index( rmsDbV, pkIdxL, minAttkDb )
skipPkIdxL = find_skip_peaks( rmsDbV, pkIdxL, min_pk_idx, max_pk_idx )
fig,ax = plt.subplots()
fig.set_size_inches(18.5, 10.5, forward=True)
secV = np.arange(0,len(rmsDbV)) / rms_srate
ax.plot( secV, rmsDbV )
ax.plot( np.arange(0,len(rms0DbV)) / rms0_srate, rms0DbV, color="black" )
# print beg/end boundaries # print beg/end boundaries
for i,(begMs, endMs) in enumerate(r['eventTimeL']): for i,(begMs, endMs) in enumerate(r.eventTimeL):
ax.axvline( x=begMs/1000.0, color="green") ax.axvline( x=begMs/1000.0, color="green")
ax.axvline( x=endMs/1000.0, color="red") ax.axvline( x=endMs/1000.0, color="red")
ax.text(begMs/1000.0, 20.0, str(i) ) ax.text(begMs/1000.0, 20.0, str(i) )
return
# plot peak markers # plot peak markers
for i,pki in enumerate(pkIdxL): for i,pki in enumerate(r.pkIdxL):
marker = "o" if i==min_pk_idx or i==max_pk_idx else "." marker = 4 if i==min_pk_idx or i==max_pk_idx else 5
color = "red" if i in skipPkIdxL else "black" color = "red" if i in skipPkIdxL else "black"
ax.plot( [pki / rms_srate], [ rmsDbV[pki] ], marker=marker, color=color) ax.plot( [pki / r.rms_srate], [ r.rmsDbV[pki] ], marker=marker, color=color)
if i in jmpPkIdxL:
ax.plot( [pki / r.rms_srate], [ r.rmsDbV[pki] ], marker=6, color="blue")
def do_td_plot( inDir, analysisArgs ):
fig,ax = plt.subplots()
fig.set_size_inches(18.5, 10.5, forward=True)
id = int(inDir.split("/")[-1])
midi_pitch = int(inDir.split("/")[-2])
td_plot(ax,inDir,midi_pitch,id,analysisArgs)
plt.show() plt.show()
def do_td_multi_plot( inDir, analysisArgs ):
midi_pitch = int(inDir.split("/")[-1])
dirL = os.listdir(inDir)
fig,axL = plt.subplots(len(dirL),1)
for id,(idir,ax) in enumerate(zip(dirL,axL)):
td_plot(ax, os.path.join(inDir,str(id)), midi_pitch, id, analysisArgs )
plt.show()
if __name__ == "__main__": if __name__ == "__main__":
inDir = sys.argv[1] inDir = sys.argv[1]
cfgFn = sys.argv[2]
do_td_plot(inDir) cfg = parse_yaml_cfg( cfgFn )
#do_td_plot(inDir,cfg.analysisArgs)
#o_td_multi_plot(inDir,cfg.analysisArgs)
#plot_spectral_ranges( inDir, [ 24, 36, 48, 60, 72, 84, 96, 104] ) #plot_spectral_ranges( inDir, [ 24, 36, 48, 60, 72, 84, 96, 104] )
plot_resample_pulse_times( inDir, cfg.analysisArgs )

168
rms_analysis.py Normal file
View File

@ -0,0 +1,168 @@
import os,types,json
from scipy.io import wavfile
from scipy.signal import stft
import numpy as np
def calc_harm_bins( srate, binHz, midiPitch, harmN ):
semi_tone = 1.0/12
quarter_tone = 1.0/24
eigth_tone = 1.0/48
band_width_st = 3.0/48 # 3/8 tone
fundHz = (13.75 * pow(2.0,(-9.0/12.0))) * pow(2.0,(midiPitch / 12))
fund_l_binL = [int(round(fundHz * pow(2.0,-band_width_st) * i/binHz)) for i in range(1,harmN+1)]
fund_m_binL = [int(round(fundHz * i/binHz)) for i in range(1,harmN+1)]
fund_u_binL = [int(round(fundHz * pow(2.0, band_width_st) * i/binHz)) for i in range(1,harmN+1)]
for i in range(len(fund_m_binL)):
if fund_l_binL[i] >= fund_m_binL[i] and fund_l_binL[i] > 0:
fund_l_binL[i] = fund_m_binL[i] - 1
if fund_u_binL[i] <= fund_m_binL[i] and fund_u_binL[i] < len(fund_u_binL)-1:
fund_u_binL[i] = fund_m_binL[i] + 1
return fund_l_binL, fund_m_binL, fund_u_binL
def rms_to_db( xV, rms_srate, refWndMs ):
dbWndN = int(round(refWndMs * rms_srate / 1000.0))
dbRef = ref = np.mean(xV[0:dbWndN])
rmsDbV = 20.0 * np.log10( xV / dbRef )
return rmsDbV
def audio_rms( srate, xV, rmsWndMs, hopMs, refWndMs ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
xN = xV.shape[0]
yN = int(((xN - wndSmpN) / hopSmpN) + 1)
assert( yN > 0)
yV = np.zeros( (yN, ) )
assert( wndSmpN > 1 )
i = 0
j = 0
while i < xN and j < yN:
if i == 0:
yV[j] = np.sqrt(xV[0]*xV[0])
elif i < wndSmpN:
yV[j] = np.sqrt( np.mean( xV[0:i] * xV[0:i] ) )
else:
yV[j] = np.sqrt( np.mean( xV[i-wndSmpN:i] * xV[i-wndSmpN:i] ) )
i += hopSmpN
j += 1
rms_srate = srate / hopSmpN
return rms_to_db( yV, rms_srate, refWndMs ), rms_srate
def audio_stft_rms( srate, xV, rmsWndMs, hopMs, refWndMs, spectrumIdx ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
specHopIdx = int(round( spectrumIdx ))
specV = np.sqrt(np.abs(xM[:, specHopIdx ]))
mV = np.zeros((xM.shape[1]))
for i in range(xM.shape[1]):
mV[i] = np.max(np.sqrt(np.abs(xM[:,i])))
rms_srate = srate / hopSmpN
mV = rms_to_db( mV, rms_srate, refWndMs )
return mV, rms_srate, specV, specHopIdx, binHz
def audio_harm_rms( srate, xV, rmsWndMs, hopMs, dbRefWndMs, midiPitch, harmCandN, harmN ):
wndSmpN = int(round( rmsWndMs * srate / 1000.0))
hopSmpN = int(round( hopMs * srate / 1000.0))
binHz = srate / wndSmpN
f,t,xM = stft( xV, fs=srate, window="hann", nperseg=wndSmpN, noverlap=wndSmpN-hopSmpN, return_onesided=True )
harmLBinL,harmMBinL,harmUBinL = calc_harm_bins( srate, binHz, midiPitch, harmCandN )
rmsV = np.zeros((xM.shape[1],))
for i in range(xM.shape[1]):
mV = np.sqrt(np.abs(xM[:,i]))
pV = np.zeros((len(harmLBinL,)))
for j,(b0i,b1i) in enumerate(zip( harmLBinL, harmUBinL )):
pV[j] = np.max(mV[b0i:b1i])
rmsV[i] = np.mean( sorted(pV)[-harmN:] )
rms_srate = srate / hopSmpN
rmsV = rms_to_db( rmsV, rms_srate, dbRefWndMs )
return rmsV, rms_srate, binHz
def locate_peak_indexes( xV, xV_srate, eventMsL ):
pkIdxL = []
for begMs, endMs in eventMsL:
begSmpIdx = int(begMs * xV_srate / 1000.0)
endSmpIdx = int(endMs * xV_srate / 1000.0)
pkIdxL.append( begSmpIdx + np.argmax( xV[begSmpIdx:endSmpIdx] ) )
return pkIdxL
def rms_analysis_main( inDir, midi_pitch, rmsWndMs=300, rmsHopMs=30, dbRefWndMs=500, harmCandN=5, harmN=3 ):
seqFn = os.path.join( inDir, "seq.json")
audioFn = os.path.join( inDir, "audio.wav")
with open( seqFn, "rb") as f:
r = json.load(f)
srate, signalM = wavfile.read(audioFn)
sigV = signalM / float(0x7fff)
tdRmsDbV, rms0_srate = audio_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs )
rmsDbV, rms_srate, binHz = audio_harm_rms( srate, sigV, rmsWndMs, rmsHopMs, dbRefWndMs, midi_pitch, harmCandN, harmN )
pkIdxL = locate_peak_indexes( rmsDbV, rms_srate, r['eventTimeL'] )
r = types.SimpleNamespace(**{
"audio_srate":srate,
"tdRmsDbV": tdRmsDbV,
"binHz": binHz,
"rmsDbV":rmsDbV,
"rms_srate":rms_srate,
"pkIdxL":pkIdxL, # pkIdxL[ len(pulsUsL) ] - indexes into rmsDbV[] of peaks
#"min_pk_idx":min_pk_idx,
#"max_pk_idx":max_pk_idx,
"eventTimeL":r['eventTimeL'],
'pkDbL': [ rmsDbV[ i ] for i in pkIdxL ],
'pkUsL':r['pulseUsL'] })
return r