You are on page 1of 12

Detect using the time-domain Autocorrelation algorithm (findautomin is required)

:
fastauto.m
function [P, W] = fastauto(s, winsize, jump, corrthresh)
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%

[P, W] = fastauto(s, winsize, jump, corrthresh)


Implements a pitch detector using a fast-autocorrelation algorithm.
Returns P, a vector of frequencies corresponding to each point in time
and W, a vector of frequencies corresponding to each window
Takes s, the monaural signal; winsize, the length of the window; jump, the
distance between the left edge of a window and the left edge of the next
window; and corrthresh the correlation detection threshold.
Advisory: let winsize = 1000 and jump = 500, corrthresh should be no
greater than 0.075 for optimal results.
Subordinate functions: <findautomin>

fast = 0;
P = zeros( ((1 + ceil(size(s,1) / winsize)) * winsize) , 1);
length = ceil(size(s,1) / winsize) * winsize;
P(size(P,1) : size(P,1) + 1200) = 0;
W = 0;
Tprev = -1;
windownumber = 1;
s(size(s, 1) : (ceil(size(s,1) / winsize) * winsize ) + 1200) = 0;
for i = 1 : jump : length %(ceil(size(P,1) / winsize) * winsize)
if(Tprev > 40)
% This executes if we know the pitch from the previous frame and
% assumes that we can find the new pitch reasonably close to the
% old one.
shift = findautomin(s, i, Tprev - 20, Tprev + 20, corrthresh);
Tprev = shift;
fast = fast + 1;
else
% If we don't have an "old pitch", find one for this frame
shift = findautomin(s, i, 10, 800, corrthresh);
Tprev = shift;
end
% Calculate the frequency
frequency = 44100 / shift;
if(shift > 70)
W(windownumber, 1) = frequency;
% If the entire frame was zero...

if (sum(P(i : i + winsize)) == 0)
P(i : i + winsize) = frequency;
else
% Otherwise, take an average of what was there before
for j = i : i + winsize
if( P(j, 1) == 0)
P(j, 1) = frequency;
else
P(j, 1) = (P(j, 1) + frequency) / 2;
end
end
end
else
W(windownumber, 1) = 0;
end
windownumber = windownumber + 1;
end
%W = W(1: ceil( (size(s, 1) - winsize) / jump) + 1);
sprintf('FAST-Autocorrelated windows: %d', fast)
sprintf('Fraction of total: %g', fast / windownumber)
findautomin.m
function shift = findautomin(s, i, start, stop, corrthresh)
shift = start;
daprev = 0;
da = 0;
sprev = 0;
tot = 1;
if(max( s(i:i+1200,1) ) > 0.1)
while( (~(daprev < 0 & da > 0) | ~( sprev < corrthresh)) & shift < stop)
sprev = tot;
tot = 0;
daprev = da;
for j = 0 : 599
tot = tot + abs( s(i + j, 1) - s(i +j + shift, 1));
end
tot = tot / 600;
shift = shift + 1;
da = tot - sprev;
end
if(shift == stop)
shift = -1;
else

shift = shift - 2;
end
else
shift = -1;
end

Detect using the frequency-domain HPS algorithm:


hps.m
%Frequency Domain Pitch Detection
%f_y = pitch_detec(x, window, hop, xformlength)
function f_y = pitch_detec(x, window, hop, xformlength)
%Windowing input signal
numwinds = ceil((size(x,1) - window)/hop) + 1;
windstart = 1;
h=1;
for(windnum = 1:numwinds)
%First fetch the samples to be used in the current window, zeropadding
%if necessary for the last window.
if(windnum ~= numwinds)
windx = x(windstart:windstart + window - 1);
else
windx = x(windstart:size(x,1));
windx(size(windx,1) + 1:window) = 0;
y(size(x, 1) + 1:windstart + window - 1) = 0;
end
%Apply the Hanning window function to the samples.
windx = windx .* hanning(window);
%STFT (Convert from Time Domain to Freq Domain using fft of length 4096)
f_x = fft(windx, xformlength);
%HPS
%function f_y = hps(f_x)
f_x = f_x(1 : size(f_x,1) / 2);
f_x = abs(f_x);
%HPS, PartI: downsampling
for i = 1:length(f_x)
f_x2(i,1) = 1;
f_x3(i,1) = 1;
f_x4(i,1) = 1;
%
f_x5(i,1) = 1;
end
for i = 1:floor((length(f_x)-1)/2)
f_x2(i,1) = (f_x(2*i,1) + f_x((2*i)+1,1))/2;
end
for i = 1:floor((length(f_x)-2)/3)
f_x3(i,1) = (f_x(3*i,1) + f_x((3*i)+1,1) + f_x((3*i)+2,1))/3;

end
for i = 1:floor((length(f_x)-3)/4)
f_x4(i,1) = (f_x(4*i,1) + f_x((4*i)+1,1) + f_x((4*i)+2,1) + f_x((4*i)+3,
1))/4;
end
% for i = 1:floor((length(f_x)-4)/5)
%
f_x5(i,1) = (f_x(5*i,1) + f_x((5*i)+1,1) + f_x((5*i)+2,1) + f_x((5*i)
+3,1) + f_x((5*i)+4,1))/5;
% end
%HPS, PartII: calculate product
f_ym = (1*f_x) .* (1.0*f_x2);% .* (1*f_x3) .* f_x4; %.* f_x5;
%HPS, PartIII: find max
f_y1 = max(f_ym);
for c = 1 : size(f_ym)
if(f_ym(c, 1) == f_y1)
index = c;
end
end
% Convert that to a frequency
f_y(h) = (index / xformlength) * 44100;
% Do a post-processing LPF
if(f_y(h) > 600)
f_y(h) = 0;
end
%Don't forget to increment the windstart pointer.
windstart = windstart + hop;
%

f_y(h) = f_y1;
h=h+1;
f_y = abs(f_y)';

end
Pitch Correction
Correct pitch using the Time Shifting algorithm:
psolarev.m
function y = PSOLA(x, pdetect, ptarget, window, hop, fs)
%y = PSOLA(x, pdetect, ptarget, window, hop, fs)
%
%Input arguments: (all vectors are assumed to be column vectors)
%x - signal to be pitch shifted
%pdetect - vector of detected fund. frequencies in Hz (one for each window)
%ptarget - vector of target fund. frequencies in Hz (one for each window)
%window - number of samples per Hanning window **must be an even number**
%hop - hop size between windows
%fs - sampling frequency in Hz
numwinds = floor((size(x, 1) - window) / hop) + 1;
y = zeros(size(x,1),1);

windstart = 1;
for(windnum = 1:numwinds)
%Fetch the samples for this window.
windx = x(windstart: windstart + window - 1);
windy = zeros(window, 1);
if(pdetect(windnum) ~= 0)
%Compute the detected and target period in seconds.
Tdetect = 1 / pdetect(windnum);
Ttarget = 1 / ptarget(windnum);
%Compute the detected and target period in samples.
Tsampdetect = floor(Tdetect * fs);
Tsamptarget = floor(Ttarget * fs);
%Calculate the number of periods that will appear in this window.
numperiods = floor(window / Tsampdetect) - 1;
%Calculate old marker vector, assuming markers are placed every
%Tsampdetect starting at 1.
for(i=1:numperiods)
oldmarker(i,1) = 1 + (i - 1) * Tsampdetect;
end
%Generate matrix B with each column corresponding to a period in
%the input window and the containing the Hanning windowed samples
%around the start of the period.
B = zeros(Tsampdetect * 2, numperiods);
newB = zeros(Tsampdetect*2, numperiods);
C = zeros(Tsampdetect * 2, numperiods);
%Do the first period separately as it is a special case.
B(1 + Tsampdetect: 2*Tsampdetect, 1) = windx(1:Tsampdetect);
B(:,1) = B(:,1) .* hanning(Tsampdetect * 2);
%Flip the column.
for(j = 1:2*Tsampdetect)
newB(j,1) = B(2*Tsampdetect - j + 1, 1);
end
%Now do the rest of the periods
for(i=2:numperiods)
B(:,i) = windx(oldmarker(i) - Tsampdetect:oldmarker(i) + Tsampdetect
- 1) .* hanning(Tsampdetect * 2);
%Flip the column
for(j=1:2*Tsampdetect)
newB(j,i) = B(2*Tsampdetect - j + 1, i);
end
end
%Find the optimal path through the matrix B.
path = pathfinder(newB, 4);
oldmarker = 0;
newmarker = 0;
%Turn the path vector into pitch markers.
oldmarker = zeros(numperiods,1);
for(i = 1:numperiods)

oldmarker(i) = Tsampdetect - path(i) + (i - 1)*Tsampdetect + 1;


end
%Copy the first period of the signal as is.
windy(1:Tsampdetect) = windx(1:Tsampdetect) .* hanning(Tsampdetect);
%Calculate new pitch markers.
numnewperiods = floor((window - Tsampdetect - oldmarker(1))/ Tsamptarget
);
for(i=1:numnewperiods)
newmarker(i,1) = oldmarker(1) + (i - 1) * Tsamptarget;
end
for(i = 2:numnewperiods)
%Find the old marker to use for this new marker.
diff = inf;
for(num = 2:numperiods)
if(abs(newmarker(i) - oldmarker(num)) < diff)
markertouse = num;
diff = abs(newmarker(i) - oldmarker(num));
end
end
thisperiod = zeros(2*Tsampdetect, 1);
for(t = 1:2*Tsampdetect)
if((t - Tsampdetect) + oldmarker(markertouse) >= 1)
thisperiod(t, 1) = windx((t - Tsampdetect) + oldmarker(marke
rtouse));
end
end
if(windnum>150 & windnum <= 155)
C(:,i) = thisperiod;
end
thisperiod = thisperiod .* hanning(2*Tsampdetect);
if(newmarker(i) - Tsampdetect >= 1)
windy(newmarker(i) - Tsampdetect: newmarker(i) + Tsampdetect - 1
) = windy(newmarker(i) - Tsampdetect: newmarker(i) + Tsampdetect - 1) + thisperi
od;
end
end
%Place the modified window into the output vector.
windy = windy .* hanning(window);
y(windstart: windstart + window - 1) = y(windstart: windstart + window 1) + windy;
else
%This is an unvoiced segment, so we will copy it as is.
y(windstart: windstart + window - 1) = y(windstart: windstart + window 1) + hanning(window) .* windx;
end
%Increment the window start pointer
windstart = windstart + hop;
end
function path = pathfinder(MAT,N)
%
% y = pathfinder(MAT)
%
% This function traces a path from the first to the last columns
% of MAT, one that does not exceed slope == N (N integer >0) when

%
%
%
%

assuming that successive rows are separated by one unit, and that
successive columns are separated by one unit) and has the maximum
possible cumulative MAT values along the path. The output
path y adheres to the sample points of MAT.

% calculate best-path cumulative errors:


[mrows,mcols] = size(MAT);
sf = mean(mean(MAT));
MAT = [-Inf*ones(N,mcols); MAT; -Inf*ones(N,mcols)];
best_paths = zeros(size(MAT));
range = N + (1:mrows);
T = zeros(1+2*N,mrows);
B = zeros(1+2*N,mrows);
R = zeros(1,(1+2*N)*mrows);
for i = -N:N
B(i+N+1,:) = ones(1,mrows) * sf/sqrt(1+i*i);
R(mrows*(i+N)+[1:mrows]) = range + i;
end
for col = 2:mcols
T = reshape(MAT(R,col-1),mrows,1+2*N)';
[temp1,temp2] = max(T+B);
MAT(range,col) = MAT(range,col) + temp1';
best_paths(range,col) = temp2';
end
% trace the optimal path backwards through the cum. error matrix:
best_paths = best_paths - N - 1;
path = zeros(1,mcols);
[total_error,row] = max(MAT(:,mcols));
path(mcols) = row;
for col = mcols:-1:2
row = row + best_paths(row,col);
path(col-1) = row;
end
path = path - N;
return
Correct pitch using the Modified Phase Vocoder:
modpvshift.m
function y = modpvshift(x, pdetect, ptarget, window, hop, fs, sens)
%y = modpvshift(x, pdetect, ptarget, window, hop, fs)
%
%Input arguments: (all vectors are assumed to be column vectors)
%x - signal to be pitch shifted
%pdetect - vector of detected fund. frequencies in Hz (one for each window)
%ptarget - vector of target fund. frequencies in Hz (one for each window)
%window - number of samples per Hanning window **must be an even number**
%hop - hop size between windows
%fs - sampling frequency in Hz
%
%Note that for the algorithm to perform properly, the windows must have at
%least 75% overlap (i.e. hop <= 25% of window size). Also note that the
%number of samples per window must be an even number and preferably a power
%of two, though this is not a necessity.

%Compute
%digital
pdelta =
pdelta =

the amount of shift necessary for each window and convert to


frequency.
ptarget - pdetect;
pdelta / fs;

%Compute the shift in terms of fft bins using the digital frequency.
binbeta = ptarget ./ pdetect;
%Compute the number of windows required and set up a pointer to the start
%of the first windw.
numwinds = ceil((size(x,1) - window)/hop) + 1;
windstart = 1;
%Initialize variables.
y = zeros(size(x));
lastframeinfl = ones(window / 2 + 1);
lastframepeakphase = 1;
for(windnum = 1:numwinds)
%First fetch the samples to be used in the current window, zeropadding
%if necessary for the last window.
if(windnum ~= numwinds)
windx = x(windstart:windstart + window - 1);
else
windx = x(windstart:size(x,1));
windx(size(windx,1) + 1:window) = 0;
y(size(x, 1) + 1:windstart + window - 1) = 0;
end
%Apply the Hanning window function to the samples.
windx = windx .* hanning(window);
%If the window is unvoiced, leave it alone.
if(pdetect(windnum) == 0)
y(windstart:windstart + window - 1) = y(windstart:windstart + window - 1
) + windx;
%Increment the window start pointer by the hop size.
windstart = windstart + hop;
%Otherwise, we need to pitch shift.
else
%Compute the fft of the windowed signal.
windX = fft(windx);
%Since our signal is purely real, working with all of the fft
%coefficients is redundant. The negative frequencies are conjugate
%symmetric with the positive frequencies, so we need only work with the
%positive frequencies to do our pitch scaling. We will reconstruct the
%negative frequencies as a last step before performing the inverse fft.
halfwind = (window / 2) + 1;
windX = windX(1:halfwind);
%Find and record the peaks in the spectrum by bin number. A peak is
%defined as a bin whose magnitude is greater than its two nearest
%neighbors on either side.

peakindex = 1;
peak = 0;
for(bin = 3:(halfwind - 2))
if((abs(windX(bin)) > abs(windX(bin - 1))) & (abs(windX(bin)) > abs(wind
X(bin - 2))))
if((abs(windX(bin)) > abs(windX(bin + 1))) & (abs(windX(bin)) > abs(
windX(bin + 2))))
peak(peakindex, 1) = bin;
peakindex = peakindex + 1;
end
end
end
%Divide the window into regions of influence from each peak. Assume
%the influence of a given peak extends to the midpoint of the distance
%to the next peak (or to the end of the fft). (An alternate method
%would be to assume the turning point is the bin of smallest magnitude
%between the peaks.) Define a vector which has a value for
%every bin and points to the element of peak that exerts influence.
%For instance, if influence(4) = 3, then bin 4 is under the influence
%of the peak whose bin number can be found at peak(3).
%First we deal with the first and last peak, whose influences end at
%the ends of the fft.
influence = 0;
influence(1:peak(1),1) = 1;
influence(peak(size(peak,1)):halfwind,1) = size(peak,1);
%Now we compute the rest of the influences using the midpoint algorithm
%described above.
for(index = 2:size(peak,1))
diff = peak(index) - peak(index - 1);
len = ceil((diff + 1) / 2);
influence(peak(index - 1):peak(index - 1) + len) = index - 1;
influence(peak(index - 1) + len + 1:peak(index)) = index;
end
%In order to calculate the amount of shift needed for each peak, we
%must know the frequency that the peak corresponds to. We find this by
%fitting a parabola to the peak bin and its two neighbors. The vertex
%will be the actual frequency. The amount to shift by is the frequency
%times a constant multiple (the ratio of the target pitch to the
%detected pitch in this window).
shift = zeros(size(peak,1),1);
for(peaknum = 1:size(peak,1))
peakbin = peak(peaknum);
bins = [peakbin - 1; peakbin; peakbin + 1]';
vals = 10*log(abs(windX(bins)));
a = [bins(1)^2 bins(1) 1; bins(2)^2 bins(2) 1; bins(3)^2 bins(3) 1];
p = inv(a)*vals;
vertex = (-p(2) / (2*p(1)));
shift(peaknum) = vertex * binbeta(windnum) - vertex;
ndetect(peakbin, windnum) = shift(peaknum);
end
%Now we must shift the peaks (and their corresponding areas of
%influence).
%Since we do not constrain the amount of the shift to be an integer
%number of bins, we will have to use interpolation to calculate what
%the new values of the integer bins will be. For the sake of

%minimizing algorithmic complexity, we will simply use linear


%interpolation to implement these fractional shifts. For a peak to be
%shifted, we will extract all the values in each region of influence
%into a two column matrix where each bin number in the first column is
%paired with its complex value in the second column. Then we will use
%linear interpolation to insert it into a new fft vector which includes
%only the complex values we are computing here (all other values are 0) at
%the appropriate new indices. Then we will subtract the values from
%the old peak regions since they have shifted away. Additionally, the
%vector will be paired with a new influence showing how these regions
%change based on the peak shifting. Instead of referring to indices in the
%peak vector, the new influence vector will contain the peak number
%(n) of the peak it is influenced by. Once this is done for every shifted
%peak, we will have two vectors which give information about how to
%manipulate the coefficients and eventually the phase of the original fft
%to produce the desired pitch scaling.
%If any of the new bin numbers spill over (go beyond the half window
%size) into the negative frequencies, we will reflect it back into the
%positive frequencies and perform a complex conjugation, since the
%signal is real.
fftdelta = zeros(halfwind,1);
newinfl = zeros(halfwind,1);
for(peaknum = 1:size(peak, 1))
if(true)
areaindex = 1;
areadata = 0;
for(binnum = 1:halfwind)
if(influence(binnum) == peaknum)
areadata(areaindex, 1) = binnum;
areadata(areaindex, 2) = windX(binnum);
areaindex = areaindex + 1;
end
end
%Locate this peak in the last frame and accumulate the phasor
%from the last frame to this one.
peakphase(peaknum) = lastframepeakphase(lastframeinfl(peak(peaknum))
)*exp(j*shift(peaknum)*2*pi/window*hop);
for(areaindex = 1:size(areadata,1))
fftdelta(areadata(areaindex,1)) = fftdelta(areadata(areaindex,1)
) - areadata(areaindex, 2);
end
areadata(:,1) = areadata(:,1) + shift(peaknum);
binoffset = areadata(1,1) - floor(areadata(1,1));
for(areaindex = 2:size(areadata,1))
newbin = floor(areadata(areaindex,1));
newbinval = binoffset * areadata(areaindex - 1, 2);
newbinval = newbinval + (1 - binoffset) * areadata(areaindex, 2)
;
if(newbin > halfwind)
newbin = 2 * halfwind - newbin;
newbinval = conj(newbinval);
elseif(newbin <= 0)
newbin = abs(newbin) + 2;
newbinval = conj(newbinval);
end

fftdelta(newbin) = fftdelta(newbin) + newbinval;


phasor(newbin) = peakphase(peaknum);
end
newbin = floor(areadata(1,1));
if(newbin > halfwind)
newbin = 2 * halfwind - newbin;
fftdelta(newbin) = conj(fftdelta(newbin) + (1 - binoffset) * are
adata(1,2));
elseif(newbin <= 0)
newbin = abs(newbin) + 2
fftdelta(newbin) = conj(fftdelta(newbin) + (1 - binoffset) * are
adata(1,2));
else
fftdelta(newbin) = fftdelta(newbin) + (1 - binoffset) * areadata
(1,2);
phasor(newbin) = peakphase(peaknum);
end
newbin = ceil(areadata(size(areadata,1),1));
if(newbin > halfwind)
newbin = 2 * halfwind - newbin;
fftdelta(newbin) = conj(fftdelta(newbin) + binoffset * aread
ata(size(areadata,1),2));
elseif(newbin <= 0)
newbin = abs(newbin) + 2;
fftdelta(newbin) = conj(fftdelta(newbin) + binoffset * areadata(
size(areadata,1),2));
else
fftdelta(newbin) = fftdelta(newbin) + binoffset * areadata(size(
areadata,1),2);
phasor(newbin) = peakphase(peaknum);
end
end
end
%Now that we have a vector specifying the amount of change needed
%for each bin, we can go through and add the value from fftdelta.
windX = windX + fftdelta;
%Now we must adjust the phases of the shifted peak regions specified in
%the newinfl vector to ensure inter-frame phase continuity.
for(binnum = 1:halfwind)
windX(binnum) = windX(binnum)*phasor(binnum);
end
%Finally, we must re-create the truncated fft coefficients by using the
%knowledge of the conjugate symmetries that must exist because the
%signal is real and then take the inverse fft to take our modified
%signal back to the time domain. Then we will reinsert this window
%into the output signal.
for(index = (halfwind + 1):window)
windX(index) = conj(windX(2 * halfwind - index));
end
windstart + window - 1
y(windstart:windstart + window - 1) = y(windstart:windstart + window - 1) +
real(ifft(windX));
%Increment the window start pointer by the hop size.
windstart = windstart + hop;

%Save the influence and phase information for use in the next frame.
lastframeinfl = influence;
lastframepeakphase = peakphase;
end
end

You might also like