448 lines
15 KiB
Python
448 lines
15 KiB
Python
|
import os
|
||
|
import math
|
||
|
import json
|
||
|
import types
|
||
|
import wt_util
|
||
|
import calc_sample_atk_dur
|
||
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
import multiproc as mp
|
||
|
|
||
|
from scipy.interpolate import CubicSpline
|
||
|
|
||
|
|
||
|
def upsample( aV, N, interp_degree ):
|
||
|
# aV[] - signal vector
|
||
|
# N - upsample factor (must be an integer >= 2)
|
||
|
# interp_degree - "linear" , "cubic"
|
||
|
|
||
|
N = int(N)
|
||
|
|
||
|
assert( N>= 2)
|
||
|
|
||
|
aN = len(aV)
|
||
|
z = np.zeros((aN,N))
|
||
|
z[:,0] = aV
|
||
|
|
||
|
# z is a copy of aV with zeros in the positions to be interpolated
|
||
|
z = np.squeeze(np.reshape(z,(aN*N,1)))
|
||
|
|
||
|
# x contains the indexes into z which contain values from aV
|
||
|
x = [ i*N for i in range(aN) ]
|
||
|
|
||
|
# xi contains the indexes into z which have zeros
|
||
|
xi = [ i for i in range(len(z)) if i not in x and i < x[-1] ]
|
||
|
|
||
|
# calc values for the zeros in z
|
||
|
if interp_degree == "linear":
|
||
|
cs = CubicSpline(x,aV)
|
||
|
z[xi] = cs(xi)
|
||
|
|
||
|
elif interp_degree == "cubic":
|
||
|
z[xi] = np.interp(xi,x,aV)
|
||
|
else:
|
||
|
assert(0)
|
||
|
|
||
|
# The last N-1 values are not set because they would require extrapolation
|
||
|
# (they have no value to their right). Instead we set these values
|
||
|
# as the mean of the preceding N values.
|
||
|
k = (len(z)-N)+1
|
||
|
for i in range(N-1):
|
||
|
z[k+i] = np.mean(z[ k+i-N:k+i])
|
||
|
|
||
|
return z #z[0:-(N-1)]
|
||
|
|
||
|
|
||
|
|
||
|
def estimate_pitch_ac( aV, si, hzL, srate, argsD ):
|
||
|
# aV[] - audio vector containing a wavetable that starts at aV[si]
|
||
|
# hzL[] - a list of candidate pitches
|
||
|
# srate - sample rate of aV[]
|
||
|
# args[cycle_cnt] - count of cycles to autocorrelate on either side of the reference pitch at aV[si:]
|
||
|
# (1=correlate with the cycle at aV[ si-fsmp_per+cyc:] and the cycle at aV[si+fsmp_per_cyc],
|
||
|
# (2=correlate with cycles at aV[ si-2*fsmp_per+cyc:],aV[ si-fsmp_per+cyc:],aV[ si+fsmp_per+cyc:],aV[ si-2*fsmp_per+cyc:])
|
||
|
# args[up_fact] - Set to and integer greater than 1 to upsample the signal prior to estimating the pitch
|
||
|
# args[up_interp_degree] - Upsampling interpolator "linear" or "cubic"
|
||
|
|
||
|
def _auto_corr( aV, si, fsmp_per_cyc, cycle_offset_idx, interp_degree ):
|
||
|
|
||
|
smp_per_cyc = int(math.floor(fsmp_per_cyc))
|
||
|
|
||
|
xi = [si + (cycle_offset_idx * fsmp_per_cyc) + i for i in range(smp_per_cyc)]
|
||
|
x_min = int(math.floor(xi[0]))
|
||
|
x_max = int(math.ceil(xi[-1]))
|
||
|
x = [ i for i in range(x_min,x_max) ]
|
||
|
y = aV[x]
|
||
|
|
||
|
if interp_degree == "cubic":
|
||
|
cs = CubicSpline(x,y)
|
||
|
yi = cs(xi)
|
||
|
elif interp_degree == "linear":
|
||
|
yi = np.interp(xi,x,y)
|
||
|
else:
|
||
|
assert(0)
|
||
|
|
||
|
# calc the sum of squared differences between the reference cycle and the 'offset' cycle
|
||
|
ac = np.sum(np.pow(yi - aV[si:si+smp_per_cyc],2.0))
|
||
|
|
||
|
return ac
|
||
|
|
||
|
|
||
|
def auto_corr( aV, si, fsmp_per_cyc, cycle_cnt, interp_degree ):
|
||
|
|
||
|
ac = 0
|
||
|
for i in range(1,cycle_cnt+1):
|
||
|
ac = _auto_corr(aV,si,fsmp_per_cyc, i, interp_degree)
|
||
|
ac += _auto_corr(aV,si,fsmp_per_cyc, -i, interp_degree)
|
||
|
|
||
|
# return the average sum of squared diff's per cycle
|
||
|
return ac/(cycle_cnt*2)
|
||
|
|
||
|
|
||
|
def ac_upsample( aV, si, fsmp_per_cyc, cycle_cnt, up_fact, up_interp_degree ):
|
||
|
|
||
|
pad = 0 # count of leading/trailing pad positions to allow for interpolation
|
||
|
|
||
|
if up_interp_degree == "cubic":
|
||
|
pad = 2
|
||
|
elif up_interp_degre == "linear":
|
||
|
pad = 1
|
||
|
else:
|
||
|
assert(0)
|
||
|
|
||
|
# calc the beg/end of the signal segment to upsample
|
||
|
bi = si - math.ceil(fsmp_per_cyc * cycle_cnt) - pad
|
||
|
ei = si + math.ceil(fsmp_per_cyc * (cycle_cnt + 1)) + pad
|
||
|
|
||
|
up_aV = upsample(aV[bi:ei],up_fact,up_interp_degree)
|
||
|
|
||
|
# calc. index of the center signal value
|
||
|
u_si = (si-bi)*up_fact
|
||
|
|
||
|
# the center value should not change after upsampling
|
||
|
assert aV[si] == up_aV[u_si]
|
||
|
|
||
|
return up_aV,u_si
|
||
|
|
||
|
|
||
|
args = types.SimpleNamespace(**argsD)
|
||
|
|
||
|
# if upsampling was requested
|
||
|
if args.up_fact > 1:
|
||
|
hz_min = min(hzL) # Select the freq candidate with the longest period,
|
||
|
max_fsmp_per_cyc = srate/hz_min # because we want to upsample just enough of the signal to test for all possible candidates,
|
||
|
aV,si = ac_upsample( aV, si, max_fsmp_per_cyc, args.cycle_cnt, args.up_fact, args.up_interp_degree )
|
||
|
srate = srate * args.up_fact
|
||
|
|
||
|
|
||
|
# calc. the auto-correlation for every possible candidate frequency
|
||
|
acL = []
|
||
|
for hz in hzL:
|
||
|
fsmp_per_cyc = srate / hz
|
||
|
acL.append( auto_corr(aV,si,fsmp_per_cyc,args.cycle_cnt,args.interp_degree) )
|
||
|
|
||
|
|
||
|
|
||
|
if False:
|
||
|
_,ax = plt.subplots(1,1)
|
||
|
ax.plot(hzL,acL)
|
||
|
plt.show()
|
||
|
|
||
|
# winning candidate is the one with the lowest AC score
|
||
|
cand_hz_idx = np.argmin(acL)
|
||
|
|
||
|
return hzL[cand_hz_idx]
|
||
|
|
||
|
# Note that we want a higher rate of pitch tracking than wave table generation - thus
|
||
|
# we downsample the pitch tracking interval by some integer factor to arrive at the
|
||
|
# rate at the wave table generation period.
|
||
|
def gen_wave_table_list( audio_fname,
|
||
|
mark_tsv_fname, gateL,
|
||
|
midi_pitch,
|
||
|
pitch_track_interval_secs,
|
||
|
wt_interval_down_sample_fact,
|
||
|
min_wt_db,
|
||
|
dom_ch_idx,
|
||
|
est_hz_argD,
|
||
|
ac_argD ):
|
||
|
|
||
|
est_hz_args = types.SimpleNamespace(**est_hz_argD)
|
||
|
|
||
|
aM,srate = wt_util.parse_audio_file(audio_fname)
|
||
|
markL = wt_util.parse_marker_file(mark_tsv_fname)
|
||
|
ch_cnt = aM.shape[1]
|
||
|
frm_cnt = aM.shape[0]
|
||
|
pt_interval_smp = int(round(pitch_track_interval_secs*srate))
|
||
|
wt_interval_fact= int(wt_interval_down_sample_fact)
|
||
|
hz = wt_util.midi_pitch_to_hz(midi_pitch)
|
||
|
fsmp_per_cyc = srate/hz
|
||
|
fsmp_per_wt = fsmp_per_cyc * 2
|
||
|
smp_per_wt = int(math.floor(fsmp_per_wt))
|
||
|
|
||
|
# calc. the range of possible pitch estimates
|
||
|
hz_min = wt_util.midi_pitch_to_hz(midi_pitch-1)
|
||
|
hz_ctr = wt_util.midi_pitch_to_hz(midi_pitch)
|
||
|
hz_max = wt_util.midi_pitch_to_hz(midi_pitch+1)
|
||
|
cents_per_semi = 100
|
||
|
|
||
|
# hzL is a list of candidate pitches with a range of +/- 1 semitone and a resolution of 1 cent
|
||
|
hzCandL = [ hz_min + i*(hz_ctr-hz_min)/100.0 for i in range(cents_per_semi) ] + [ hz_ctr + i*(hz_max-hz_ctr)/100.0 for i in range(cents_per_semi) ]
|
||
|
|
||
|
assert( len(markL) == len(gateL) )
|
||
|
|
||
|
# setup the return data structure
|
||
|
pitchD = { "midi_pitch":midi_pitch,
|
||
|
"srate":srate,
|
||
|
"est_hz_mean":None,
|
||
|
"est_hz_err_cents":None,
|
||
|
"est_hz_std_cents":None,
|
||
|
"wt_interval_secs":pitch_track_interval_secs * wt_interval_fact,
|
||
|
"dominant_ch_idx":int(dom_ch_idx),
|
||
|
"audio_fname":audio_fname,
|
||
|
"mark_tsv_fname":mark_tsv_fname,
|
||
|
"velL":[]
|
||
|
}
|
||
|
|
||
|
hzL = []
|
||
|
for i,(beg_sec,end_sec,vel_label) in enumerate(markL):
|
||
|
bsi = int(round(beg_sec*srate))
|
||
|
esi = int(round(end_sec*srate))
|
||
|
vel = int(vel_label)
|
||
|
eai = gateL[i][1] # end of attack
|
||
|
|
||
|
velD = { "vel":vel, "bsi":bsi, "chL":[ [] for _ in range(ch_cnt)] }
|
||
|
|
||
|
for ch_idx in range(ch_cnt):
|
||
|
|
||
|
i = 0
|
||
|
while True:
|
||
|
|
||
|
wt_smp_idx = eai + i*pt_interval_smp
|
||
|
|
||
|
# select the first zero crossing after the end of the attack
|
||
|
# as the start of the first sustain wavetable
|
||
|
wtbi = wt_util.find_zero_crossing(aM[:,ch_idx],wt_smp_idx,1)
|
||
|
|
||
|
#if len(velD['chL'][ch_idx]) == 0:
|
||
|
# print(midi_pitch,vel,(wtbi-bsi)/srate)
|
||
|
|
||
|
if wtbi == None:
|
||
|
break;
|
||
|
|
||
|
wtei = wtbi + smp_per_wt
|
||
|
|
||
|
if wtei > esi:
|
||
|
break
|
||
|
|
||
|
# estimate the pitch near wave tables which are: on the 'dominant' channel,
|
||
|
# above a certain velocity and not too far into the decay
|
||
|
if ch_idx==dom_ch_idx and est_hz_args.min_wt_idx <= i and i <= est_hz_args.max_wt_idx and vel >= est_hz_args.min_vel:
|
||
|
est_hz = estimate_pitch_ac( aM[:,dom_ch_idx],wtbi,hzCandL,srate,ac_argD)
|
||
|
hzL.append( est_hz )
|
||
|
#print(vel, i, est_hz)
|
||
|
|
||
|
if i % wt_interval_fact == 0:
|
||
|
# measure the RMS of the wavetable
|
||
|
wt_rms = float(np.pow(np.mean(np.pow(aM[wtbi:wtei,ch_idx],2.0)),0.5))
|
||
|
|
||
|
# filter out quiet wavetable but guarantee that there are always at least two wt's.
|
||
|
if 20*math.log10(wt_rms) > min_wt_db or len(velD['chL'][ch_idx]) < 2:
|
||
|
|
||
|
# store the location and RMS of the wavetable
|
||
|
velD['chL'][ch_idx].append({"wtbi":int(wtbi),"wtei":int(wtei),"rms":float(wt_rms), "est_hz":0})
|
||
|
|
||
|
i+=1
|
||
|
|
||
|
|
||
|
pitchD['velL'].append(velD)
|
||
|
|
||
|
# update est_hz in each of the wavetable records
|
||
|
est_hz = np.mean(hzL)
|
||
|
est_hz_delta = np.array(hzCandL) - est_hz
|
||
|
est_hz_idx = np.argmin(np.abs(est_hz_delta))
|
||
|
est_hz_std = np.std(hzL)
|
||
|
|
||
|
if est_hz_delta[est_hz_idx] > 0:
|
||
|
est_hz_std_cents = est_hz_std / ((hz_ctr-hz_min)/100.0)
|
||
|
else:
|
||
|
est_hz_std_cents = est_hz_std / ((hz_max-hz_ctr)/100.0)
|
||
|
|
||
|
est_hz_err_cents = est_hz_idx - cents_per_semi
|
||
|
|
||
|
print(f"{midi_pitch} est pitch:{est_hz}(hz) err:{est_hz_err_cents}(cents)" )
|
||
|
|
||
|
pitchD["est_hz_mean"] = float(est_hz)
|
||
|
pitchD["est_hz_err_cents"] = float(est_hz_err_cents)
|
||
|
pitchD["est_hz_std_cents"] = float(est_hz_std_cents)
|
||
|
|
||
|
return pitchD
|
||
|
|
||
|
def _gen_wave_table_bank( src_dir, midi_pitch, argD ):
|
||
|
|
||
|
args = types.SimpleNamespace(**argD)
|
||
|
|
||
|
audio_fname = os.path.join(src_dir,f"wav/{midi_pitch:03}_samples.wav")
|
||
|
mark_tsv_fname = os.path.join(src_dir,f"{midi_pitch:03}_marker.txt")
|
||
|
|
||
|
if True:
|
||
|
gateL,ch_avgRmsL = calc_sample_atk_dur.generate_gate_db(audio_fname,
|
||
|
mark_tsv_fname,
|
||
|
args.rms_wnd_ms,
|
||
|
args.rms_hop_ms,
|
||
|
args.atk_min_dur_ms,
|
||
|
args.atk_end_thresh_db )
|
||
|
|
||
|
if False:
|
||
|
gateL,ch_avgRmsL = calc_sample_atk_dur.generate_gate_pct(audio_fname,
|
||
|
mark_tsv_fname,
|
||
|
args.rms_wnd_ms,
|
||
|
args.rms_hop_ms,
|
||
|
args.atk_min_dur_ms,
|
||
|
0.1 )
|
||
|
|
||
|
dom_ch_idx = np.argmax(ch_avgRmsL)
|
||
|
|
||
|
pitchD = gen_wave_table_list( audio_fname,
|
||
|
mark_tsv_fname,
|
||
|
gateL,
|
||
|
midi_pitch,
|
||
|
args.pitch_track_interval_secs,
|
||
|
args.wt_interval_down_sample_fact,
|
||
|
args.min_wt_db,
|
||
|
dom_ch_idx,
|
||
|
args.est_hz,
|
||
|
args.ac )
|
||
|
|
||
|
return pitchD
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
def gen_wave_table_bank_mp( processN, src_dir, midi_pitchL, out_fname, argD ):
|
||
|
|
||
|
def _multi_proc_func( procId, procArgsD, taskArgsD ):
|
||
|
|
||
|
return _gen_wave_table_bank( procArgsD['src_dir'],
|
||
|
taskArgsD['midi_pitch'],
|
||
|
procArgsD['argD'] )
|
||
|
|
||
|
procArgsD = {
|
||
|
"src_dir":src_dir,
|
||
|
"argD": argD
|
||
|
}
|
||
|
|
||
|
taskArgsL = [ { 'midi_pitch':midi_pitch } for midi_pitch in midi_pitchL ]
|
||
|
|
||
|
processN = min(processN,len(taskArgsL))
|
||
|
|
||
|
if processN > 0:
|
||
|
pitchL = mp.local_distribute_main( processN,_multi_proc_func,procArgsD,taskArgsL )
|
||
|
else:
|
||
|
pitchL = [ _gen_wave_table_bank( src_dir, r['midi_pitch'], argD ) for r in range(taskArgsL) ]
|
||
|
|
||
|
|
||
|
pitchL = sorted(pitchL,key=lambda x:x['midi_pitch'])
|
||
|
|
||
|
with open(out_fname,"w") as f:
|
||
|
json.dump({"pitchL":pitchL, "instr":"piano", "argD":argD},f)
|
||
|
|
||
|
|
||
|
|
||
|
def plot_rms( wtb_json_fname ):
|
||
|
|
||
|
with open(wtb_json_fname) as f:
|
||
|
pitchL = json.load(f)['pitchL']
|
||
|
|
||
|
pitchL = sorted(pitchL,key=lambda x:x['midi_pitch'])
|
||
|
|
||
|
rmsnL = []
|
||
|
for pitchD in pitchL:
|
||
|
_,ax = plt.subplots(1,1)
|
||
|
for wtVelD in pitchD['wtL']:
|
||
|
for velChL in wtVelD['wtL']:
|
||
|
rmsL = [ 20*math.log10(wt['rms']) for wt in velChL ]
|
||
|
ax.plot(rmsL)
|
||
|
rmsnL.append(len(rmsL))
|
||
|
|
||
|
plt.title(f"{pitchD['midi_pitch']}")
|
||
|
plt.show()
|
||
|
|
||
|
def plot_atk_dur( wtb_json_fname ):
|
||
|
|
||
|
with open(wtb_json_fname) as f:
|
||
|
pitchL = json.load(f)['pitchL']
|
||
|
|
||
|
pitchL = sorted(pitchL,key=lambda x:x['midi_pitch'])
|
||
|
|
||
|
rmsnL = []
|
||
|
for pitchD in pitchL:
|
||
|
_,ax = plt.subplots(1,1)
|
||
|
|
||
|
secL = [ (v['chL'][0][0]['wtbi']-v['bsi'])/pitchD['srate'] for v in pitchD['velL'] ]
|
||
|
velL = [ x['vel'] for x in pitchD['velL'] ]
|
||
|
ax.plot(velL,secL,marker=".")
|
||
|
|
||
|
plt.title(f"{pitchD['midi_pitch']}")
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
def plot_hz( wtb_json_fname ):
|
||
|
|
||
|
with open(wtb_json_fname) as f:
|
||
|
pitchL = json.load(f)['pitchL']
|
||
|
|
||
|
pitchL = sorted(pitchL,key=lambda x:x['midi_pitch'])
|
||
|
|
||
|
_,ax = plt.subplots(3,1)
|
||
|
|
||
|
midiL = [ pitchD['midi_pitch'] for pitchD in pitchL ]
|
||
|
hzL = [ pitchD["est_hz_mean"] for pitchD in pitchL ]
|
||
|
hzStdL = [ pitchD["est_hz_std_cents"] for pitchD in pitchL ]
|
||
|
hzErrL = [ pitchD["est_hz_err_cents"] for pitchD in pitchL ]
|
||
|
|
||
|
ax[0].plot(midiL,hzL)
|
||
|
ax[1].plot(hzL,hzStdL)
|
||
|
ax[2].hlines([0,10,20],midiL[0],midiL[-1],color="red")
|
||
|
ax[2].plot(midiL,hzErrL)
|
||
|
|
||
|
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
|
||
|
midi_pitchL = [ pitch for pitch in range(21,109) ]
|
||
|
#midi_pitchL = [60 ]
|
||
|
out_fname = "/home/kevin/temp/temp_5.json"
|
||
|
src_dir= "/home/kevin/temp/wt6"
|
||
|
|
||
|
argD = {
|
||
|
'rms_wnd_ms':50,
|
||
|
'rms_hop_ms':10,
|
||
|
'atk_min_dur_ms':1000,
|
||
|
'atk_end_thresh_db':-43.0,
|
||
|
'min_wt_db':-80.0,
|
||
|
'pitch_track_interval_secs':0.25,
|
||
|
'wt_interval_down_sample_fact':8.0, # wt_interval_secs = pitch_track_interval_secs * wt_interval_down_sample_fact
|
||
|
'est_hz': {
|
||
|
'min_vel':50,
|
||
|
'min_wt_idx':2,
|
||
|
'max_wt_idx':4
|
||
|
},
|
||
|
'ac': {
|
||
|
'cycle_cnt':8, # count of cycles to use for auto-corr. pitch detection
|
||
|
'interp_degree':"cubic",
|
||
|
'up_fact':2,
|
||
|
'up_interp_degree':"cubic"
|
||
|
}
|
||
|
}
|
||
|
|
||
|
gen_wave_table_bank_mp(20, src_dir, midi_pitchL, out_fname, argD )
|
||
|
|
||
|
#plot_rms(out_fname)
|
||
|
#plot_hz(out_fname)
|
||
|
plot_atk_dur(out_fname)
|
||
|
|