Professional Documents
Culture Documents
noiseFloorLevel)
% segment into chunks -> do the crazy
% jeff lieberman, 2005
% this function opens a wave file [handel for now]
% and applies segmentation based loosely on jehan's segmentation algorithm,
% with changes made to specifically highlight rhythmic activity.
% bring in test sound:
[y,fs,nbits] = wavread(filename);
%y = sound file
% fs = sample rate
% bits = bitrate
t = (1:length(y))/44100;
% convert to mono:
z = 0.5*(y(:,1)+y(:,2));
soundLeft = y(:,1);
soundRight = y(:,2);
% STFT on the data
N = 2048;
R = 512;
win = hamming(R);
L = 2000;
overlap = 512-128;
% spectrogram:
[B,f,tspec] = specgram(z,N,fs,win,overlap);
timeRatio = floor(length(t)/length(tspec));
% generate the Bark scale, for separation of the spectrogram:
barkIndeces = ceil(13*atan(0.00076*f)+3.5*atan((f/7500).^2));
% generate the energies for each bark thing, by taking the maximal
% frequency within that bark range:
for i=1:25,
barkEnergy(i) = max(find(barkIndeces == i));
end
% generate the new Bark spectrogram, with 25 divisions
for i = 1:25,
currIndeces = find(barkIndeces == i); % find current bark members
Bnew(i,:) = sum(B(currIndeces,:))/length(find(barkIndeces == i));
end
% smooth the Bark Spectrogram by convoluting every band with a 200ms half
% hanning [raised cosine] window:
%%%%%%%%%%%%%%%%%%
% future version: when detecting grain size, change this blending size
%%%%%%%%%%%%%%%%%%
windowSecs = 0.05 ; %0.2 - 0.4 sec, with 0.003 sec per frame
N = floor(windowSecs*44100/timeRatio); % 200ms half-window so we make a .4s
window first
raisedWindowFull = window(@hamming,N);
raisedWindow = raisedWindowFull(floor(N/2):N);
% now that we have the window, convolve:
for i = 1:25,
end
FUTURE REVISION:
make this cutoff frequency by looking at sound data and variance within,
so that is auto-calculates the amount of noise present to get rid of the
floor
%
errorbar(t(approxIndeces),zeros(size(approxIndeces)),zeros(size(approxIndec
es))+0.5,'rx');
% now search the previous 0.03 seconds to try to find the minimum of
% loudness:
% tspecMaxFrames are where to start, and 0.03 seconds is how many frames:
minFrames = ceil(0.01*(44100/timeRatio)); % how many frames back to check:
for i = 1:length(maxTimes), % for each time of the maximum smoothed
loudness. search for nearby minima in the loudness function
[trash minimumLoudnessIndeces(i)] =
min( loudness( max(tspecMaxFrames(i)-minFrames,1) : tspecMaxFrames(i) ) );
minimumLoudnessIndeces(i) = max(1,minimumLoudnessIndeces(i) +
tspecMaxFrames(i) - 1 - minFrames);
end
%
%
%
%
and -finally-... search for the nearest place where the sample has a zero
crossing and is heading positive, to that minimum in loudness. this way
we can cleanly cut up the samples into constituent parts.
search forward:
%finalTimes:
possibleTimes = tspec(minimumLoudnessIndeces); % ones to test
for i=1:length(possibleTimes),
% find nearest time index to start with: can be optimized later:
real = find(t > possibleTimes(i));
startIndex = real(1); % first time_index that occurs bigger than the
other spec;
% now search for the nearest negative to positive zero crossing:
while(z(startIndex) > 0) % wait to go negative
startIndex = startIndex + 1;
end
while(z(startIndex) < 0) % wait to go positive
startIndex = startIndex + 1;
end
% now we just crossed from negative to positive
finalTimeIndex(i) = startIndex;
end
% output is finalTimeIndex
finalTimeIndex = [finalTimeIndex length(t)];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% plotting:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
figure(1), clf
subplot(8,1,1);
plot(t,z);
axis tight;
title('WAV File Data, temporary mono conversion');
% plot it:
subplot(8,1,2);
imagesc(tspec,f,log10(abs(B)));
axis xy;
colormap('jet');
title('Spectrogram');