diff --git a/SingingMeansCovars.mat b/SingingMeansCovars.mat new file mode 100644 index 0000000..f604934 Binary files /dev/null and b/SingingMeansCovars.mat differ diff --git a/alignmentVisualiser.m b/alignmentVisualiser.m new file mode 100644 index 0000000..898659a --- /dev/null +++ b/alignmentVisualiser.m @@ -0,0 +1,115 @@ +function alignmentVisualiser(trace,mid,spec,fig) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% alignmentVisualiser(trace,sig,sr,mid,highlight) +% +% Description: +% Plots a gross DTW alignment overlaid with the fine alignment +% resulting from the HMM aligner on the output of YIN. Trace(1,:) +% is the list of states in the HMM, and trace(2,:) is the number of YIN +% frames for which that state is occupied. Highlight is a list of +% notes for which the steady state will be highlighted. +% +% Inputs: +% trace - 3-D matrix of a list of states (trace(1,:)), the times +% they end at (trace(2,:)), and the state indices (trace(3,:)) +% mid - midi file +% spec - spectogram of audio file (from alignmidiwav.m) +% +% Dependencies: +% Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: +% https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials +% /miditoolbox/ +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if ~exist('fig', 'var'), fig=1; end + +% Fix for ending zeros that mess up the plot +if trace(2,end)==0 + trace=trace(:,1:end-1); +end +if trace(2, end-1)==0 + trace(2,end-1)=trace(2,end-2); +end + +% hop size between frames +stftHop = 0.025; + +% read midi file +nmat=readmidi(mid); + +% plot spectogram of audio file +figure(fig) +imagesc(20*log10(spec)); +title(['Spectrogram with Aligned MIDI Notes Overlaid']); +xlabel(['Time (.05s)']); +ylabel(['Midinote']); +axis xy; +caxis(max(caxis)+[-50 0]) +colormap(1-gray) + +% zoom in fundamental frequencies +notes = nmat(:,4)'; +notes = (2.^((notes-105)/12))*440; +notes(end+1) = notes(end); +nlim = length(notes); + +% plot alignment +plotFineAlign(trace(1,:), trace(2,:), notes(1:nlim), stftHop); +if size(trace,1) >= 3 + notenums = trace(3,2:end); +else + nlim = length(notes); + notenums = [reshape(repmat(1:nlim,4,1),1,[]) nlim]; +end + + +function plotFineAlign(stateType, occupancy, notes, stftHop) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% plotFineAlign(stateType, occupancy, notes, stftHop, highlight) +% +% Description: +% Plot the HMM alignment based on the output of YIN. StateType is the +% list of states in the HMM, and occupancy is the number of YIN frames +% for which that state is occupied. Notes is a list of midi note numbers +% that are played, should be one note for each [3] in stateType. If the +% highlight vector is supplied, it should contain indices of the states +% to highlight by plotting an extra line at the bottom of the window. +% +% Inputs: +% stateType - vector with a list of states +% occupancy - vector indicating the time (in seconds) at which the states +% in stateType end +% notes - vector of notes from MIDI file +% stftHop - the hop size between frames in the spectrogram +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Plot the 4 states: silence in red, beginning transient in green, +% steady state in blue, ending transient in green. + +styles = {{'r+-', 'LineWidth', 2}, + {'g+-', 'LineWidth', 2}, + {'b+-', 'LineWidth', 2}}; + +cs = occupancy /stftHop; +segments = [cs(1:end-1); cs(2:end)]'; + +hold on + +stateNote = max(1, cumsum(stateType == 3)+1); +for i=1:size(segments,1) + plot(segments(i,:)', repmat(notes(stateNote(i)),2,1), styles{stateType(i+1)}{:}) +end + +hold off diff --git a/alignmentVisualiser.m~ b/alignmentVisualiser.m~ new file mode 100644 index 0000000..cf39138 --- /dev/null +++ b/alignmentVisualiser.m~ @@ -0,0 +1,112 @@ +function alignmentVisualiser(trace,mid,spec,fig) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% alignmentVisualiser(trace,sig,sr,mid,highlight) +% +% Description: +% Plots a gross DTW alignment overlaid with the fine alignment +% resulting from the HMM aligner on the output of YIN. Trace(1,:) +% is the list of states in the HMM, and trace(2,:) is the number of YIN +% frames for which that state is occupied. Highlight is a list of +% notes for which the steady state will be highlighted. +% +% Inputs: +% trace - 3-D matrix of a list of states (trace(1,:)), the times +% they end at (trace(2,:)), and the state indices (trace(3,:)) +% mid - midi file +% spec - spectogram of audio file (from alignmidiwav.m) +% +% Dependencies: +% Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: +% https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials +% /miditoolbox/ +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Fix for ending zeros that mess up the plot +if trace(2,end)==0 + trace=trace(:,1:end-1); +end +if trace(2, end-1)==0 + trace(2,end-1)=trace(2,end-2); +end + +% hop size between frames +stftHop = 0.025; + +% read midi file +nmat=readmidi(mid); + +% plot spectogram of audio file +imagesc(20*log10(spec)); +title(['Spectrogram with Aligned MIDI Notes Overlaid']); +xlabel(['Time (.05s)']); +ylabel(['Midinote']); +axis xy; +caxis(max(caxis)+[-50 0]) +colormap(1-gray) + +% zoom in fundamental frequencies +notes = nmat(:,4)'; +notes = (2.^((notes-105)/12))*440; +notes(end+1) = notes(end); +nlim = length(notes); + +% plot alignment +plotFineAlign(trace(1,:), trace(2,:), notes(1:nlim), stftHop); +if size(trace,1) >= 3 + notenums = trace(3,2:end); +else + nlim = length(notes); + notenums = [reshape(repmat(1:nlim,4,1),1,[]) nlim]; +end + + +function plotFineAlign(stateType, occupancy, notes, stftHop) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% plotFineAlign(stateType, occupancy, notes, stftHop, highlight) +% +% Description: +% Plot the HMM alignment based on the output of YIN. StateType is the +% list of states in the HMM, and occupancy is the number of YIN frames +% for which that state is occupied. Notes is a list of midi note numbers +% that are played, should be one note for each [3] in stateType. If the +% highlight vector is supplied, it should contain indices of the states +% to highlight by plotting an extra line at the bottom of the window. +% +% Inputs: +% stateType - vector with a list of states +% occupancy - vector indicating the time (in seconds) at which the states +% in stateType end +% notes - vector of notes from MIDI file +% stftHop - the hop size between frames in the spectrogram +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Plot the 4 states: silence in red, beginning transient in green, +% steady state in blue, ending transient in green. + +styles = {{'r+-', 'LineWidth', 2}, + {'g+-', 'LineWidth', 2}, + {'b+-', 'LineWidth', 2}}; + +cs = occupancy /stftHop; +segments = [cs(1:end-1); cs(2:end)]'; + +hold on + +stateNote = max(1, cumsum(stateType == 3)+1); +for i=1:size(segments,1) + plot(segments(i,:)', repmat(notes(stateNote(i)),2,1), styles{stateType(i+1)}{:}) +end + +hold off diff --git a/example.mid b/example.mid new file mode 100644 index 0000000..5855fd7 Binary files /dev/null and b/example.mid differ diff --git a/example.wav b/example.wav new file mode 100644 index 0000000..6e8e8de Binary files /dev/null and b/example.wav differ diff --git a/exampleScript.m b/exampleScript.m new file mode 100644 index 0000000..c857f7d --- /dev/null +++ b/exampleScript.m @@ -0,0 +1,72 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% exampleScript.m +% +% Description: +% Example of how to use the HMM alignment algorithm +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% You will need to have the following toolkits installed and in your path +% de Cheveigné, A. 2002. YIN MATLAB implementation Available from: +% http://audition.ens.fr/adc/sw/yin.zip +% Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available +% from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ +% Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available +% from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ +% Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. +% Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html +% Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: +% https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials +% /miditoolbox/ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% audio file to be aligned +audiofile=('example.wav'); + +% MIDI file to be aligned +midifile=('example.mid'); + +% number of notes to align +numNotes=6; + +% vector of order of states (according to lyrics) in stateOrd and +% corresponding note numbers in noteNum +% 1 indicates a rest at the beginning of ending of the note +% 2 indicates a transient at the beginning or ending of the note +% 3 indicates a steady state section +% the following encoding is for six syllables "A-ve Ma-ri-(i)-a" +% syllable A-ve Ma-ri-(i)-a +% state type 13 23 23 23 3 31 +% note number 11 22 33 44 5 66 +stateOrd = [1 3 2 3 2 3 2 3 3 3 1]; +noteNum = [1 1 2 2 3 3 4 4 5 6 6]; + +% load singing means and covariances for the HMM alignment +load SingingMeansCovars.mat +means=sqrtmeans; +covars=sqrtcovars; + +% specify that the means and covariances in the HMM won't be learned +learnparams=0; + +% run the alignment +[allstate selectstate,spec,yinres]=runAlignment(audiofile, midifile, numNotes, stateOrd, noteNum, means, covars, learnparams); + +% visualise the alignment +alignmentVisualiser(selectstate,midifile,spec,1); + +% get onset and offset times +times=getOnsOffs(selectstate); + +% write the onset and offset times to an audacity-readable file +dlmwrite('example.txt',[times.ons' times.offs'], 'delimiter', '\t'); + +% map timing information to the quantized MIDI file +nmatNew=getTimingData(midifile, times); + +% calculate intervals size, perceived pitch, vibrato rate, vibrato depth, and loudness +[vibratoDepth, vibratoRate, noteDynamics, intervalSize, pp,nmatNew]=getPitchVibratoDynamicsData(times,yinres,nmatNew); \ No newline at end of file diff --git a/exampleScript.m~ b/exampleScript.m~ new file mode 100644 index 0000000..d2166eb --- /dev/null +++ b/exampleScript.m~ @@ -0,0 +1,77 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% exampleScript.m +% +% Description: +% Example of how to use the HMM alignment algorithm +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% you will need to have the following toolkits installed and in your path +% de Cheveigné, A. 2002. YIN MATLAB implementation Available from: +% http://audition.ens.fr/adc/sw/yin.zip +% Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available +% from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ +% Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available +% from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ +% Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. +% Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html +% Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: +% https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials +% /miditoolbox/ + +% audio file to be aligned +audiofile=('example.wav'); + +% MIDI file to be aligned +midifile=('example.mid'); + +% number of notes to align +numNotes=6; + +% vector of order of states (according to lyrics) in stateOrd and +% corresponding note numbers in noteNum +% 1 indicates a rest at the beginning of ending of the note +% 2 indicates a transient at the beginning or ending of the note +% 3 indicates a steady state section +% the following encoding is for six syllables "A-ve Ma-ri-(i)-a" +% syllable A-ve Ma-ri-(i)-a +% state type 13 23 23 23 3 31 +% note number 11 22 33 44 5 66 +stateOrd = [1 3 2 3 2 3 2 3 3 3 1]; +noteNum = [1 1 2 2 3 3 4 4 5 6 6]; + +% load singing means and covariances for the HMM alignment +load SingingMeansCovars.mat +means=sqrtmeans; +covars=sqrtcovars; + +% specify that the means and covariances in the HMM won't be learned +learnparams=0; + +% run the alignment +[allstate selectstate,spec,yinres]=runAlignment(audiofile, midifile, numNotes, stateOrd, noteNum, means, covars, learnparams); + +% visualise the alignment +alignmentVisualiser(selectstate,midifile,spec); + +% get onset and offset times +times=getOnsOffs(selectstate); + +% map timing information to the quantized MIDI file +nmatNew=getTimingData(midifile, times) +% visualise the between the quantized version and actual performance using the pianoroll +% function from the MIDI Toolbox +figure(1) +subplot(211) +pianoroll(nmat,'b','vel') +figure(3) +pianoroll(nmatOld,'b','vel') + +% calculate intervals size, perceived pitch, vibrato rate, vibrato depth, and loudness +[vibratoDepth, vibratoRate, noteDynamics, intervalSize, pp]=getPitchVibratoDynamicsData(times,yinres) + + + diff --git a/fillpriormat_gauss.m b/fillpriormat_gauss.m new file mode 100644 index 0000000..7c08811 --- /dev/null +++ b/fillpriormat_gauss.m @@ -0,0 +1,170 @@ +function prior = fillpriormat_gauss(Nobs,ons,offs,Nstates) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% prior = fillpriormat_gauss(Nobs,ons,offs,Nstates) +% +% Description: +% Creates a prior matrix based on the DTW alignment (supplied by the input +% variables ons and offs. A rectangular window with half a Gaussian on +% each side over the onsets and offsets estimated by the DTW alignment. +% +% Inputs: +% Nobs - number of observations +% ons - vector of onset times predicted by DTW alignment +% offs - vector of offset times predicted by DTW alignment +% Nstates - number of states in the hidden Markov model +% +% Outputs: +% prior - prior matrix based on DTW alignment +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org - Johanna Devaney, 2011 +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if ~exist('Nstates', 'var'), Nstates = 5; end + +Nnotes = length(ons); +prior = sparse(Nnotes*(Nstates-1)+1,Nobs); +frames = 1:Nobs; + +for i=1:Nnotes + row = (i-1)*(Nstates-1); + insert = Nstates-5; + + % Silence + prior(row+1,:) = flatTopGaussian(frames, gh(ons,i-1,offs,i-1,frames,.5), ... + g(offs,i-1,frames), g(ons,i,frames), gh(ons,i,offs,i,frames,.5)); + + prior(row+2:row+2+insert-1,:) = repmat(prior(row+1,:),insert,1); + + % Transient, steady state, transient + prior(row+2+insert,:) = ... + flatTopGaussian(frames, g(offs,i-1,frames), ... + gh(offs,i-1,ons,i,frames,.75), gh(ons,i,offs,i,frames,.25), g(offs,i,frames)); + prior(row+3+insert,:) = ... + flatTopGaussian(frames, g(offs,i-1,frames), ... + g(ons,i,frames), g(offs,i,frames), g(ons,i+1,frames)); + prior(row+4+insert,:) = ... + flatTopGaussian(frames, g(ons,i,frames), ... + gh(ons,i,offs,i,frames,.75), gh(offs,i,ons,i+1,frames,.25), g(ons,i+1,frames)); + +end + +% The last silence +i = i+1; +prior(row+5+insert,:) = flatTopGaussIdx(frames, ons,i-1, offs,i-1, ... + offs,i, ons,i+1); + +function x = gh(v1, i1, v2, i2, domain, frac) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% x = gh(v1, i1, v2, i2, domain, frac) +% +% Description: +% Get an element that is frac fraction of the way between v1(i1) and +% v2(i2), but check bounds on both vectors. Frac of 0 returns v1(i1), +% frac of 1 returns v2(i2), frac of 1/2 (the default) returns half way +% between them. +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if ~exist('frac', 'var'), frac = 0.5; end + +x1 = g(v1, i1, domain); +x2 = g(v2, i2, domain); +x = floor(frac*x1 + (1-frac)*x2); + +function w = flatTopGaussIdx(x, b1,bi1, t1,ti1, t2,ti2, b2,bi2) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% flatTopGaussIdx(x, b1,bi1, t1,ti1, t2,ti2, b2,bi2) +% +% Description: +% Create a window function that is zeros, going up to 1s with the left +% half of a gaussian, then ones, then going back down to zeros with +% the right half of another gaussian. b1(bi1) is the x coordinate 2 +% stddevs out from the mean, which is at t1(ti1). t2(ti2) is the x +% coordinate of the mean of the second gaussian and b2(bi2) is 2 +% stddevs out from that. The points should be in that order. Vectors +% are indexed intelligently, so you don't have to worry about +% overflows or underflows. X is the set of points over which this is +% to be calculated. +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +b1 = g(b1, bi1, x); +t1 = g(t1, ti1, x); +t2 = g(t2, ti2, x); +b2 = g(b2, bi2, x); +w = flatTopGaussian(x, b1, t1, t2, b2); + + + +function x = g(vec, idx, domain) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% x = g(vec, idx, domain) +% +% Description: +% Get an element from vec, checking bounds. Domain is the set of points +% that vec is a subset of. +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if idx < 1 + x = 1; +elseif idx > length(vec) + x = domain(end); +else + x = vec(idx); +end + + + +function w = flatTopGaussian(x, b1, t1, t2, b2) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% flatTopGaussian(x, b1, t1, t2, b2) +% +% Description: +% Create a window function that is zeros, going up to 1s with the left +% half of a gaussian, then ones, then going back down to zeros with the +% right half of another gaussian. b1 is the x coordinate 2 stddevs out +% from the mean, which is at t1. t2 is the x coordinate of the mean of +% the second gaussian and b2 is 2 stddevs out from that. The points +% should be in that order. X is the set of points over which this is +% to be calculated. +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if any([b1 t1 t2] > [t1 t2 b2]) + warning('Endpoints are not in order: %f %f %f %f', b1, t1, t2, b2) +end + +left = normpdf(x, t1, (t1-b1)/2+1); +middle = ones(1,t2-t1-1); +right = normpdf(x, t2, (b2-t2)/2+1); + +left = left ./ max(left); +right = right ./ max(right); + +takeOneOut = (t1 == t2); +w = [left(1:t1) middle right(t2+takeOneOut:end)]; + \ No newline at end of file diff --git a/filltransmat.m b/filltransmat.m new file mode 100755 index 0000000..e7a1fc4 --- /dev/null +++ b/filltransmat.m @@ -0,0 +1,41 @@ +function trans = filltransmat(transseed, notes) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% trans = filltransmat (transseed, notes) +% +% Description: +% Makes a transition matrix from a seed transition matrix. The seed +% matrix is composed of the states: steady state, transient, silence, +% transient, steady state, but the full transition matrix starts and +% ends with silence, so the seed with be chopped up on the ends. +% Notes is the number of times to repeat the seed. Transseed's first +% and last states should be equivalent, as they will be overlapped +% with each other. +% +% Inputs: +% transseed - transition matrix seed +% notes - number of notes being aligned +% +% Outputs: +% trans - transition matrix +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Set up transition matrix +N = size(transseed,1); +trans = zeros(notes*(N-1)+1,notes*(N-1)+1); +Non2 = ceil(N/2); + +% Fill in first and last parts of the big matrix with the +% appropriate fragments of the seed +trans(1:Non2, 1:Non2) = transseed(Non2:end, Non2:end); +trans(end-Non2+1:end, end-Non2+1:end) = transseed(1:Non2, 1:Non2); + +% Fill in the middle parts of the big matrix with the whole seed +for i = Non2 : N-1 : (notes-1)*(N-1)+1 - Non2+1 + trans(i+(1:N)-1,i+(1:N)-1) = transseed; +end diff --git a/getOnsOffs.m b/getOnsOffs.m new file mode 100644 index 0000000..5e6e52b --- /dev/null +++ b/getOnsOffs.m @@ -0,0 +1,30 @@ +function res=getOnsOffs(onsoffs) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% res=getOnsOffs(onsoffs) +% +% Description: Extracts a list of onset and offset from an inputted +% 3*N matrix of states and corresponding ending times +% from AMPACT's HMM-based alignment algorithm +% +% Inputs: +% onsoffs - a 3*N alignment matrix, the first row is a list of N states +% the second row is the time which the state ends, and the +% third row is the state index +% +% Outputs: +% res.ons - list of onset times +% res.offs - list of offset times +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +stopping=find(onsoffs(1,:)==3); +starting=stopping-1; + +for i = 1 : length(starting) + res.ons(i)=onsoffs(2,starting(i)); + res.offs(i)=onsoffs(2,stopping(i)); +end \ No newline at end of file diff --git a/getOnsOffs.m~ b/getOnsOffs.m~ new file mode 100644 index 0000000..a2ba89e --- /dev/null +++ b/getOnsOffs.m~ @@ -0,0 +1,32 @@ +function res=getOnsOffs(onsoffs) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% res=getOnsOffs(onsoffs) +% +% Description: Extracts a list of onset and offset from an inputted +% #-N matrix of states and corresponding ending times +% +% +% +% Inputs: +% onsoffs - a 3*N alignment matrix, the first row is a list of N states +% the second row is the time which the state ends, and the +% third row is the state index +% +% Outputs: +% res.ons - list of onset times +% res.offs - list of offset times +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +stopping=find(onsoffs(1,:)==3); +starting=stopping-1; + +for i = 1 : length(starting) + res.ons(i)=onsoffs(2,starting(i)); + res.offs(i)=onsoffs(2,stopping(i)); +end \ No newline at end of file diff --git a/getPitchVibratoDynamicsData.m b/getPitchVibratoDynamicsData.m new file mode 100644 index 0000000..54f5d35 --- /dev/null +++ b/getPitchVibratoDynamicsData.m @@ -0,0 +1,42 @@ +function [vibratoDepth, vibratoRate, noteDynamic, intervalSize, pp, nmat,cents]=getPitchVibratoDynamicsData(times,yinres,nmat) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% [vibratoDepth, vibratoRate, noteDynamics, intervals] +% =getPitchVibratoDynamicsData(times,yinres) +% +% Description: +% +% Inputs: +% times - +% yinres - +% +% Outputs: +% vibratoDepth - +% vibratoRate - +% noteDynamics - +% intervalSize - +% pp - +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +for i = 1 : length(times.ons) + cents{i}=yinres.f0(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr)); + pp(i)=perceivedPitch(cents{i}, 1/yinres.sr*32, 100000); + vibrato{i}=fft(cents{i}); + vibrato{i}(1)=0; + vibrato{i}(round(end/2):end) = 0; + [vibratoDepth(i) noteVibratOpos(i)] = max(abs(vibrato{i})); + vibratoRate(i) = noteVibratOpos(i) * (44100/32) / length(vibrato{i}); + pwrs{i}=yinres.pwr(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr)); + dynamicsVals{i}=10*log10(pwrs{i}); + noteDynamic(i)=mean(dynamicsVals{i}); +end + +nmat(:,5)=(noteDynamic+100)'; + +for i=1 : length(pp)-1 + intervalSize(i) = pp(i+1)*1200-pp(i)*1200; +end \ No newline at end of file diff --git a/getPitchVibratoDynamicsData.m~ b/getPitchVibratoDynamicsData.m~ new file mode 100644 index 0000000..9a82064 --- /dev/null +++ b/getPitchVibratoDynamicsData.m~ @@ -0,0 +1,42 @@ +function [vibratoDepth, vibratoRate, noteDynamic, intervalSize, pp, nmat,cents]=getPitchVibratoDynamicsData(times,yinres,nmat) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% [vibratoDepth, vibratoRate, noteDynamics, intervals] +% =getPitchVibratoDynamicsData(times,yinres) +% +% Description: +% +% Inputs: +% times - +% yinres - +% +% Outputs: +% vibratoDepth - +% vibratoRate - +% noteDynamics - +% intervalSize - +% pp +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +for i = 1 : length(times.ons) + cents{i}=yinres.f0(times.ons(i)/32*yinres.sr:times.offs(i)/32*yinres.sr); + pp(i)=perceivedPitch(cents{i}, 32*yinres.sr, 1000000); + vibrato{i}=fft(cents{i}); + vibrato{i}(1)=0; + vibrato{i}(end/2:end) = 0; + [vibratoDepth(i) noteVibratOpos(i)] = max(abs(vibrato{i})); + vibratoRate(i) = noteVibratOpos(i) * (44100/32) / length(vibrato{i}); + pwrs{i}=yinres.pwr(times.ons(i)/32*yinres.sr:times.offs(i)/32*yinres.sr); + dynamicsVals{i}=10*log10(pwrs{i}); + noteDynamic(i)=mean(dynamicsVals{i}); +end + +nmat(:,5)=(noteDynamic+100)'; + +for i=1 : length(pp)-1 + intervalSize(i) = hzcents(pp(i),pp(i+1)); +end \ No newline at end of file diff --git a/getTimingData.m b/getTimingData.m new file mode 100644 index 0000000..29b76df --- /dev/null +++ b/getTimingData.m @@ -0,0 +1,27 @@ +function nmatNew=getTimingData(midifile, times) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% nmat=getTimingData(midifile, times) +% +% Description: +% +% Inputs: +% midifile - +% times - +% +% Outputs: +% nmatNew - +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +nmatOld=readmidi(midifile); +nmatOld(:,[1,2])=nmatOld(:,[1,2])/2; + +nmatNew=nmatOld; +nmatNew(:,6:7)=[times.ons',times.offs'-times.ons']; +offset=nmatNew(1,6)-nmatOld(1,1); +nmatNew(:,6)=nmatNew(:,6)-offset; +nmatNew(:,[1,2])=nmatNew(:,[6,7]); \ No newline at end of file diff --git a/getVals.m b/getVals.m new file mode 100644 index 0000000..73d08a8 --- /dev/null +++ b/getVals.m @@ -0,0 +1,51 @@ +function [res, yinres, spec]=getVals(filename, midifile, audiofile, sr, hop) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% [res, yinres]=getVals(filename, midifile, audiofile, sr, hop) +% +% Description: +% Gets values for DTW alignment and YIN analysis of specified audio +% signal and MIDI file +% +% Inputs: +% filename +% midifile, audiofile, sr, hop +% +% Outputs: +% res +% res.on list of DTW predicted onset times in seconds +% res.off list of DTW predicted offset times in seconds +% yinres (below are the two elements that are used) +% yinres.ap aperiodicty estimates for each frame +% yinres.pwr power estimates for each frame +% +% Dependencies: +% de Cheveigné, A. 2002. YIN MATLAB implementation Available from: +% http://audition.ens.fr/adc/sw/yin.zip +% Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: +% https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials... +% /miditoolbox/ +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% run the dyanamic time warping alignment +[res,spec] = runDTWAlignment(filename, midifile, 0.025); + +% noramlize audio file +audiofile=audiofile/sqrt(mean(audiofile.^2)); + +% read MIDI file +nmat=readmidi(midifile); + +% define parameters for YIN analysis +P.thresh = 0.01; +P.sr = sr; +P.hop = hop; +P.maxf0 = max(midi2hz(nmat(:,4)+2)); +P.minf0 = min(midi2hz(nmat(:,4)-1)); + +% run YIN on audiofile +yinres=yin(audiofile,P); \ No newline at end of file diff --git a/hzcents.m b/hzcents.m new file mode 100644 index 0000000..832b34d --- /dev/null +++ b/hzcents.m @@ -0,0 +1,30 @@ +function cents = hzcents(x1, x2) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% y = hzcents(x1, x2) +% +% Description: Calculates the difference in cents between the frequencies +% supplied in x1 and x2 using the formula: +% cents = 1200 * log(x1/x2) / log 2 +% if x1 is higher than x2 the value in cents will be positive +% if x1 is lower than x2 the value in cents will be negative +% +% Inputs: +% x1 - frequency one in hertz +% x2 - frequency two in hertz +% +% Outputs: +% cents - size of the interval in cents between x1 and x2 +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if x1 == 0 + cents = 0 +elseif x2 == 0 + cents = 0 +else + cents = 1200 * log(x2 ./ x1) ./ log(2); +end \ No newline at end of file diff --git a/hzcents.m~ b/hzcents.m~ new file mode 100644 index 0000000..6356343 --- /dev/null +++ b/hzcents.m~ @@ -0,0 +1,29 @@ +function cents = hzcents(x1, x2) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% y = hzcents(x1, x2) +% +% Description: Calculates the difference in cents between the frequencies +% supplied in x1 and x2 using the formula: +% cents = 1200 * log(x1/x2) / log 2 +% if x1 is lower than +% +% Inputs: +% x1 - frequency one in hertz +% x2 - frequency two in hertz +% +% Outputs: +% cents - size of the interval in cents between x1 and x2 +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if x1 == 0 + cents = 0 +elseif x2 == 0 + cents = 0 +else + cents = 1200 * log(x2 ./ x1) ./ log(2); +end \ No newline at end of file diff --git a/perceivedPitch.m b/perceivedPitch.m new file mode 100644 index 0000000..a283874 --- /dev/null +++ b/perceivedPitch.m @@ -0,0 +1,41 @@ +function pp = perceivedPitch(f0s, sr, gamma) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% pp = perceivedPitch(f0s, sr, gamma) +% +% Description: Calculate the perceived pitch of a note based on +% Gockel, H., B.J.C. Moore,and R.P. Carlyon. 2001. +% Influence of rate of change of frequency on the overall +% pitch of frequency-modulated Tones. Journal of the +% Acoustical Society of America. 109(2):701?12. +% +% Inputs: +% f0s - vector of fundamental frequency estimates +% sr - 1/sample rate of the f0 estimates (e.g. the hop rate in Hz of yin) +% gamma - sets the relative weighting of quickly changing vs slowly +% changing portions of notes. - a high gamma (e.g., 1000000) +% gives more weight to slowly changing portions. +% +% Outputs: +% res.ons - list of onset times +% res.offs - list of offset times +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if ~exist('gamma', 'var'), gamma = 100000; end + +% remove all NaNs in the f0 vector +f0s(isnan(f0s))=[]; + +% +deriv = [diff(f0s)*sr -100]; + +% +weights = exp(-gamma * abs(deriv)); + +% calculate the perceived pitch as +pp = f0s(:)' * weights(:) / sum(weights); \ No newline at end of file diff --git a/perceivedPitch.m~ b/perceivedPitch.m~ new file mode 100644 index 0000000..8349215 --- /dev/null +++ b/perceivedPitch.m~ @@ -0,0 +1,35 @@ +function pp = perceivedPitch(f0s, sr, gamma) + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% pp = perceivedPitch(f0s, sr, gamma) +% +% Description: Calculate the perceived pitch of a note based on +% Gockel, H., B.J.C. Moore,and R.P. Carlyon. 2001. +% Influence of rate of change of frequency on the overall +% pitch of frequency-modulated Tones." Journal of the Acoustical Society of America 109, no. 2 (2001): 701?12. +% Inputs: +% onsoffs - a 3*N alignment matrix, the first row is a list of N states +% the second row is the time which the state ends, and the +% third row is the state index +% +% Outputs: +% res.ons - list of onset times +% res.offs - list of offset times +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% +% +% The note is provided as a vector of f0s and the sample rate at which they +% are measured (e.g. the hop rate in Hz of yin). The gamma parameter sets +% the relative weighting of quickly changing vs slowly changing portions of +% notes. A high gamma gives more weight to slowly changing portions. + +f0s(isnan(f0s))=[]; +deriv = [diff(f0s)*sr -100]; +weights = exp(-gamma * abs(deriv)); +pp = f0s(:)' * weights(:) / sum(weights); \ No newline at end of file diff --git a/plotFineAlign.m b/plotFineAlign.m new file mode 100644 index 0000000..a319080 --- /dev/null +++ b/plotFineAlign.m @@ -0,0 +1,44 @@ +function plotFineAlign(stateType, occupancy, notes, stftHop) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% plotFineAlign(stateType, occupancy, notes, stftHop, highlight) +% +% Description: +% Plot the HMM alignment based on the output of YIN. StateType is the +% list of states in the HMM, and occupancy is the number of YIN frames +% for which that state is occupied. Notes is a list of midi note numbers +% that are played, should be one note for each [3] in stateType. If the +% highlight vector is supplied, it should contain indices of the states +% to highlight by plotting an extra line at the bottom of the window. +% +% Inputs: +% stateType - vector with a list of states +% occupancy - vector indicating the time (in seconds) at which the states +% in stateType end +% notes - vector of notes from MIDI file +% stftHop - the hop size between frames in the spectrogram +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Plot the 4 states: silence in red, beginning transient in green, +% steady state in blue, ending transient in green. + +styles = {{'r+-', 'LineWidth', 2}, + {'g+-', 'LineWidth', 2}, + {'b+-', 'LineWidth', 2}}; + +cs = occupancy /stftHop; +segments = [cs(1:end-1); cs(2:end)]'; + +hold on + +stateNote = max(1, cumsum(stateType == 3)+1); +for i=1:size(segments,1) + plot(segments(i,:)', repmat(notes(stateNote(i)),2,1), styles{stateType(i+1)}{:}) +end + +hold off \ No newline at end of file diff --git a/runAlignment.m b/runAlignment.m new file mode 100644 index 0000000..66655b1 --- /dev/null +++ b/runAlignment.m @@ -0,0 +1,73 @@ +function [allstate,selectstate,spec,yinres]=runAlignment(filename, midiname, numNotes, stateOrd2, noteNum, means, covars, learnparams) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% [allstate selectstate spec yinres]=seeAlignment(audiofile,midifile,... +% numNotes, stateOrd, noteNum, means, covars,learnparams) +% +% Description: +% Calls the DTW alignment function and refines the results with the HMM +% alignment algorithm, with both a basic and modified state spaces (based +% on the lyrics). This function returns the results of both the state +% spaces as well as the YIN analysis of the specified audio file. +% +% Inputs: +% filename - name of audio file +% midiname - name of MIDI file +% numNotes - number of notes in the MIDI file to be aligned +% stateOrd2 - vector of state sequence +% noteNum - vector of note numbers corresponding to state sequence +% means - mean values for each state +% covars - covariance values for each state +% learnparams - flag as to whether to learn means and covars in the HMM +% +% Outputs: +% allstate - ending times for each state +% selectstate - ending times for each state +% spec - spectogram of the audio file +% yinres - structure of results of funning the YIN algorithm on the +% audio signal indicated by the input variable filename +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if ~exist('learnparams', 'var'), learnparams = 0; end + +% refine stateOrd2 to correspond to the number of states specified +% in numStates +numStates = max(find(noteNum <= numNotes)); +stateOrd2=stateOrd2(1:numStates); +noteNum=noteNum(1:numStates); + +% read audio file and perform DTW alignment and YIN analysis +hop = 32; +[audiofile, sr] = wavread(filename); + +% normalize audio file +audiofile=audiofile/sqrt(mean(audiofile.^2))*.6; + +%get vals +[align, yinres, spec] = getVals(filename, midiname, audiofile, sr, hop); +clear audiofile + +% run HMM alignment with the full state sequence +[vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] = runHMMAlignment(numNotes, means, covars, align, yinres, sr, learnparams); + +% tally of the number of frames in each state +histvals = hist(vpath, 1:max(vpath)); + +% ending time of each state in seconds +cumsumvals = cumsum(histvals*hop/sr); + +% run HMM alignment with the state sequence refined, based on the lyrics +cumsumvals2=selectStates(startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd2,noteNum,sr); + +% create 3*N matrices of the alignments, where the first row is the +% current states, the second row is the time which the state ends, and +% the third row is the state index and N is the total number of states +allstate=stateOrd; +allstate(2,1:length(cumsumvals))=cumsumvals; +selectstate=stateOrd2; +selectstate(2,1:length(cumsumvals2))=cumsumvals2; +selectstate(3,:) = noteNum; \ No newline at end of file diff --git a/runDTWAlignment.m b/runDTWAlignment.m new file mode 100644 index 0000000..7627ce3 --- /dev/null +++ b/runDTWAlignment.m @@ -0,0 +1,55 @@ +function [align,spec] = runDTWAlignment(audiofile, midorig, tres) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% align = runDTWAlignment(sig, sr, midorig, tres, plot) +% +% Description: +% Performs a dynamic time warping alignment between specified audio and +% MIDI files and returns a matrix with the aligned onset and offset times +% (with corresponding MIDI note numbers) and a spectrogram of the audio +% +% Inputs: +% sig - audio file +% midorig - midi file +% tres - time resolution for MIDI to spectrum information conversion +% +% Outputs: +% align - dynamic time warping MIDI-audio alignment structure +% align.on - onset times +% align.off - offset times +% align.midiNote - MIDI note numbers +% spec - sepctrogram +% +% Dependencies: +% Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available +% from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ +% Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available +% from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ +% Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: +% https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials +% /miditoolbox/ +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if nargin < 5 + tres = 0.025; +end + +mid = midorig; + +% run alignment using peak structure distance as a feature +[dtw.M,dtw.MA,dtw.RA,dtw.S,spec,dtw.notemask] = alignmidiwav(mid,... + audiofile,tres,1); + +% read midi file and map the times in the midi file to the audio +nmat = readmidi(mid); +nmat(:,7) = nmat(:,6) + nmat(:,7); +nmat(:,1:2) = maptimes(nmat(:,6:7),(dtw.MA-1)*tres,(dtw.RA-1)*tres); + +% create output alignment +align.on = nmat(:,1); +align.off = nmat(:,2); +align.midiNote = nmat(:,4); diff --git a/runHMMAlignment.m b/runHMMAlignment.m new file mode 100644 index 0000000..0149891 --- /dev/null +++ b/runHMMAlignment.m @@ -0,0 +1,125 @@ +function [vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] = runHMMAlignment(notenum, means, covars, align, yinres, sr, learnparams) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%[vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] +% = runHMMAlignment(notenum, means, covars, align, yinres, sr, learnparams) +% +% Description: +% Refines DTW alignment values with a three-state HMM, identifying +% silence,transient, and steady state parts of the signal. The HMM +% uses the DTW alignment as a prior. +% +% Inputs: +% notenum - number of notes to be aligned +% means - 3x2 matrix of mean aperiodicy and power values HMM states +% column: silence, trans, steady state +% rows: aperiodicity, power +% covars - 3x2 matrix of covariances for the aperiodicy and power +% values (as per means) +% res - structure containing inital DTW aligment +% yinres - structure containg yin analysis of the signal +% sr - sampling rate of the signal +% +% Outputs: +% vpath - verterbi path +% startingState - starting state for the HMM +% prior - prior matrix from DTW alignment +% trans - transition matrix +% meansFull - means matrix +% covarsFull - covariance matrix +% mixmat - matrix of priors for GMM for each state +% obs - two row matrix observations (aperiodicty and power) +% stateOrd - modified state order sequence +% +% Dependencies: +% Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. +% Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org - Johanna Devaney, 2011 +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if ~exist('learnparams', 'var'), shift = 0; end + +% create vectors of onsets and offsets times from DTW alignment +ons=floor(align.on*sr/32); +offs=floor(align.off*sr/32); + +% create observation matrix +obs(1,:)=sqrt(yinres.ap(1:offs(notenum)+50)); +obs(2,:)=sqrt(yinres.pwr(1:offs(notenum)+50)); +obs(3,:)=69+12*yinres.f0(1:offs(notenum)+50); % convert octave to midi note + +% replace any NaNs in the observation matrix with zeros +obs(isnan(obs))=0; + +% refine the list of onsets and offsets according to the number of notes +% specified in the input arg 'not +prior_ons=ons(1:notenum); +prior_offs=offs(1:notenum); +notes = length(prior_ons); + +% states: silence, trans, steady state +% rows: aperiodicity, power +stateOrdSeed = [1 2 3 2 1]; +stateOrd = [repmat(stateOrdSeed(1:end-1),1,notes) stateOrdSeed(end)]; + +% use stateOrd to expand means and covars to each appearance +midiNotes = repmat(align.midiNote(1:notenum)', length(stateOrdSeed)-1, 1); +midiNotes = [midiNotes(:)' midiNotes(end)]; +meansFull = [means(:,stateOrd); midiNotes]; + +covars(3,3,1) = 100; +covars(3,3,2) = 5; +covars(3,3,3) = 1; +covarsFull = covars(:,:,stateOrd); + +mixmat = ones(length(stateOrd),1); + +% transititon matrix seed +% {steady state, transient, silence, transient, steady state} +transseed=zeros(5,5); +transseed(1,1)=.99; +transseed(2,2)=.98; +transseed(3,3)=.98; +transseed(4,4)=.98; +transseed(5,5)=.99; +transseed(1,2)=.0018; +transseed(1,3)=.0007; +transseed(1,4)=.0042; +transseed(1,5)=.0033; +transseed(2,3)=0.0018; +transseed(2,4)=0.0102; +transseed(2,5)=0.0080; +transseed(3,4)=0.0112; +transseed(3,5)=0.0088; +transseed(4,5)=0.02; + +% call filltransmat to expand the transition matrix to the appropriate size +trans = filltransmat(transseed,notes); + +% create starting state space matrix +startingState = [1; zeros(4*notes,1)]; + +% call fillpriormat_gauss to create a prior matrix +prior=fillpriormat_gauss(size(obs,2),prior_ons,prior_offs,5); + +if learnparams + % use the mhmm_em function from Kevin Murphy's HMM toolkit to + % learn the HMM parameters + save orig_hmm_params + [LL, startingState, trans, meansFull, covarsFull, mixmat1] = ... + mhmm_em(obs, startingState, trans, meansFull, covarsFull, mixmat, 'max_iter', 1, 'adj_prior', 0, 'adj_trans', 0, 'adj_mix', 0, 'cov_type', 'diag'); + save new_hmm_params +end + +% create a likelihood matrix with the mixgauss_prob function from Kevin +% Murphy's HMM toolkit +like = mixgauss_prob(obs, meansFull, covarsFull, mixmat,1); + +% use the veterbi path function from Kevin Murphy's HMM toolkit to find the +% most likely path +prlike=prior.*like; +clear like +vpath=viterbi_path(startingState, trans, prlike); diff --git a/selectStates.m b/selectStates.m new file mode 100755 index 0000000..bf2d229 --- /dev/null +++ b/selectStates.m @@ -0,0 +1,55 @@ +function cumsumvals2=selectStates(startingState,prior,... + trans,meansFull,covarsFull,mixmat,obs,stateO,noteNum,sr) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% [vpath2,histvals2,cumsumvals2]=selectStates(startingState,prior,trans, +% meansFull,covarsFull,mixmat,obs,stateO,noteNum,sr) +% +% Description: +% Refines the HMM parameters according to the modiefed state +% sequence vector (stateO) passed into the function. +% +% Inputs: +% startingState - starting state for the HMM +% prior - prior matrix from DTW alignment +% trans - transition matrix +% meansFull - means matrix +% covarsFull - covariance matrix +% mixmat - matrix of priors for GMM for each state +% obs - two row matrix observations (aperiodicty and power) +% stateO - modified state order sequence +% noteNum - number of notes to be aligned +% sr - sampling rate +% +% Outputs: +% vpath2 - viterbi path +% histvals2 - tally of the number of frames in each state +% cumsumvals2 - ending time of each state in seconds +% +% Dependencies: +% Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. +% Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% create new versions the inputted variables based on the state sequence +% StateO +vec = (stateO + (noteNum - 1)*4); +startingState2 = startingState(vec, :); +prior2 = prior(vec, :); +trans2 = trans(vec, vec); +trans2 = diag(1./sum(trans2,2))*trans2; +meansFull2 = meansFull(:,vec); +covarsFull2 = covarsFull(:,:,vec); +mixmat2 = mixmat(vec,:); + +% calculate the likelihood and vitiberi path with the new variables +like2 = mixgauss_prob(obs, meansFull2, covarsFull2, mixmat2); +vpath2=viterbi_path(startingState2, trans2, prior2.*like2); + +% create a vector of the modified alignment times +histvals2 = hist(vpath2, 1:max(vpath2)); +cumsumvals2 = cumsum(histvals2*32/sr); diff --git a/visualiser.m b/visualiser.m new file mode 100644 index 0000000..19ead09 --- /dev/null +++ b/visualiser.m @@ -0,0 +1,69 @@ +function visualiser(trace,mid,spec) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% visualiser(trace,sig,sr,mid,highlight) +% +% Description: +% Plots a gross DTW alignment overlaid with the fine alignment +% resulting from the HMM aligner on the output of YIN. Trace(1,:) +% is the list of states in the hmm (currently ignored, assumed to +% be 1,2,3,2,1,2,3,2...), and trace(2,:) is the number of YIN +% frames for which that state is occupied. Highlight is a list of +% notes for which the steady state will be highlighted. +% +% Inputs: +% trace - 3-D matrix of a list of states (trace(1,:)), the times +% they end at (trace(2,:)), and the state indices (trace(3,:)) +% mid - midi file +% spec - spectogram of audio file (from alignmidiwav.m) +% +% Dependencies: +% Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: +% https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials +% /miditoolbox/ +% +% Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) +% http://www.ampact.org +% (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel +% (mim@mr-pc.org), all rights reserved. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Fix for ending zeros that mess up the plot +if trace(2,end)==0 + trace=trace(:,1:end-1); +end +if trace(2, end-1)==0 + trace(2,end-1)=trace(2,end-2); +end + +% hop size between frames +stftHop = 0.025; + +% read midi file +nmat=readmidi(mid); + +% plot spectogram of audio file +imagesc(20*log10(spec)); +title(['Spectrogram with Aligned MIDI Notes Overlaid']); +xlabel(['Time (.05s)']); +ylabel(['Midinote']); +axis xy; +caxis(max(caxis)+[-50 0]) +colormap(1-gray) + +% zoom in fundamental frequencies +notes = nmat(:,4)'; +notes = (2.^((notes-105)/12))*440; +notes(end+1) = notes(end); +nlim = length(notes); + +% plot alignment +plotFineAlign(trace(1,:), trace(2,:), notes(1:nlim), stftHop); +if size(trace,1) >= 3 + notenums = trace(3,2:end); +else + nlim = length(notes); + notenums = [reshape(repmat(1:nlim,4,1),1,[]) nlim]; +end + +