Seg File

function [finalTimeIndex, weights, soundLeft, soundRight] = nasty(filename,
noiseFloorLevel)
% segment into chunks -> do the crazy
% jeff lieberman, 2005
% this function opens a wave file [handel for now]
% and applies segmentation based loosely on jehan's segmentation algorithm,
% with changes made to specifically highlight rhythmic activity.
% bring in test sound:
[y,fs,nbits] = wavread(filename);
%y = sound file
% fs = sample rate
% bits = bitrate
t = (1:length(y))/44100;
% convert to mono:
z = 0.5*(y(:,1)+y(:,2));
soundLeft = y(:,1);
soundRight = y(:,2);
% STFT on the data
N = 2048;
R = 512;
win = hamming(R);
L = 2000;
overlap = 512-128;
% length of the fft window - ~46ms, power of two.

% 12ms window [44100*0.012 = ~512 samples]
% window shape
% shift each time
% compute every 3ms
% spectrogram:
[B,f,tspec] = specgram(z,N,fs,win,overlap);
timeRatio = floor(length(t)/length(tspec));
% generate the Bark scale, for separation of the spectrogram:
barkIndeces = ceil(13*atan(0.00076*f)+3.5*atan((f/7500).^2));
% generate the energies for each bark thing, by taking the maximal
% frequency within that bark range:
for i=1:25,
barkEnergy(i) = max(find(barkIndeces == i));
end
% generate the new Bark spectrogram, with 25 divisions
for i = 1:25,
currIndeces = find(barkIndeces == i); % find current bark members
Bnew(i,:) = sum(B(currIndeces,:))/length(find(barkIndeces == i));
end
% smooth the Bark Spectrogram by convoluting every band with a 200ms half
% hanning [raised cosine] window:
%%%%%%%%%%%%%%%%%%
% future version: when detecting grain size, change this blending size
%%%%%%%%%%%%%%%%%%
windowSecs = 0.05 ; %0.2 - 0.4 sec, with 0.003 sec per frame
N = floor(windowSecs*44100/timeRatio); % 200ms half-window so we make a .4s
window first
raisedWindowFull = window(@hamming,N);
raisedWindow = raisedWindowFull(floor(N/2):N);
% now that we have the window, convolve:
for i = 1:25,
end
BnewSmooth(i,:) = conv( abs(Bnew(i,:)), raisedWindow );
windowWidth = N*0.5*timeRatio/44100; % time of window = 0.2

tspecStretched = tspec*(tspec(end)+windowWidth)/tspec(end);
% compute the loudness function on the raw data:
% for each time t, the loudness is the sum of energies in each band
loudnessReg = log10(sum(barkEnergy*abs(Bnew)+1,1)); % +1 allows zero volume
activity to not hit -inf
pad = ones(1,length(raisedWindow)-1);
% compute the loudness function on the smooth data:
% for each time t, the loudness is the sum of energies in each band
loudness = log10(sum(barkEnergy*BnewSmooth+1,1));
pad = ones(1,length(raisedWindow)-1);
% compute the event detection function [consecutive differences]
eventRaw = diff(loudness);
% smooth this function by convolution w/ Hamming window
newWind = 0.1;
convWindow = hamming(floor(newWind*44100/timeRatio));
% 150ms hamming
window
pad = [pad ones(1,length(convWindow)-1)];
smootherEvents = conv(convWindow, eventRaw);
windowWidth = 0.5*newWind; % half window time
windowWidthFrames = floor(windowWidth*44100/timeRatio); % number of frames
for the window
% find local maxima for this smooth function for onset transients:
[min maxes] = localmaxmin(smootherEvents);
% find those that aren't at the way end:
newMax = maxes(find(maxes < length(tspec)));
% fix those that are too low amplitude:
%
%
%
%
FUTURE REVISION:
make this cutoff frequency by looking at sound data and variance within,
so that is auto-calculates the amount of noise present to get rid of the
floor
newMax = newMax(find( (smootherEvents(newMax)/max(smootherEvents)) >

noiseFloorLevel ));
% for each time of max, get the weight as the actual value of the max event
% loudness:
weights = smootherEvents(newMax);
% max signifies the indeces of events. so, look in the previous 20ms [how
% many indeces?] for a minimum in the loudness function, and that is the
% trigger index:
maxTimes = tspec(newMax) - windowWidth;
tspecMaxFrames = newMax - windowWidthFrames; % approx frames for maxes in
tspec
% find indeces of original times:
approxIndeces = floor(newMax*timeRatio-44100*windowWidth); % second half is
number of frames for windowWidth
% subplot(8,1,1);
% hold on;
%
errorbar(t(approxIndeces),zeros(size(approxIndeces)),zeros(size(approxIndec
es))+0.5,'rx');
% now search the previous 0.03 seconds to try to find the minimum of
% loudness:
% tspecMaxFrames are where to start, and 0.03 seconds is how many frames:
minFrames = ceil(0.01*(44100/timeRatio)); % how many frames back to check:
for i = 1:length(maxTimes), % for each time of the maximum smoothed
loudness. search for nearby minima in the loudness function
[trash minimumLoudnessIndeces(i)] =
min( loudness( max(tspecMaxFrames(i)-minFrames,1) : tspecMaxFrames(i) ) );
minimumLoudnessIndeces(i) = max(1,minimumLoudnessIndeces(i) +
tspecMaxFrames(i) - 1 - minFrames);
end
%
%
%
%
and -finally-... search for the nearest place where the sample has a zero
crossing and is heading positive, to that minimum in loudness. this way
we can cleanly cut up the samples into constituent parts.
search forward:
%finalTimes:
possibleTimes = tspec(minimumLoudnessIndeces); % ones to test
for i=1:length(possibleTimes),
% find nearest time index to start with: can be optimized later:
real = find(t > possibleTimes(i));
startIndex = real(1); % first time_index that occurs bigger than the
other spec;
% now search for the nearest negative to positive zero crossing:
while(z(startIndex) > 0) % wait to go negative
startIndex = startIndex + 1;
end
while(z(startIndex) < 0) % wait to go positive
startIndex = startIndex + 1;
end
% now we just crossed from negative to positive
finalTimeIndex(i) = startIndex;
end
% output is finalTimeIndex
finalTimeIndex = [finalTimeIndex length(t)];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% plotting:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
figure(1), clf
subplot(8,1,1);
plot(t,z);
axis tight;
title('WAV File Data, temporary mono conversion');
% plot it:
subplot(8,1,2);
imagesc(tspec,f,log10(abs(B)));
axis xy;
colormap('jet');
title('Spectrogram');
% plot improved bark version:

subplot(8,1,3);
imagesc(tspec,barkIndeces,log10(abs(Bnew)));
axis xy;
colormap('jet');
title('Improved Bark Spectrogram');
cc = axis;
% plot:
subplot(8,1,4);
imagesc(tspecStretched,barkIndeces,log10(abs(BnewSmooth)));
axis xy;
colormap('jet')
title('Smoothed Bark Spectrogram');
axis(cc); % match previous axis
subplot(8,1,5);
plot(tspec,loudnessReg/max(loudnessReg));
title('Loudness from Regular Bark Scale');
axis tight;
subplot(8,1,6);
plot(tspec,loudness(1:length(tspec))/max(loudness));
title('Loudness from Smoothed Bark Scale');
axis tight;
subplot(8,1,7);
plot(tspec,eventRaw(1:length(tspec))/max(eventRaw));
title('Event Detection');
axis tight;
cc = axis;
% plot normalized: subtract out the added stuff at the end
subplot(8,1,8);
plot(tspec-windowWidth,
smootherEvents(1:length(tspec))/max(smootherEvents));
title('Smoothed Event Detection Function w/ Labeled Maxima')
axis tight;
hold on;
errorbar(tspec(newMax)windowWidth,zeros(1,length(newMax)),1*ones(1,length(newMax)),'k');
axis(cc);
% add bar markers
subplot(8,1,6);
hold on;
errorbar(tspec(minimumLoudnessIndeces),zeros(size(minimumLoudnessIndeces)),
...
ones(size(minimumLoudnessIndeces)));
% plot these on the original plot:
subplot(8,1,1);
hold on;
errorbar(tspec(minimumLoudnessIndeces),0*ones(size(minimumLoudnessIndeces))
,...
ones(size(minimumLoudnessIndeces)),'r');

Seg File

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Seg File

Uploaded by

Copyright:

Available Formats

function [finalTimeIndex, weights, soundLeft, soundRight] = nasty(filename,

% length of the fft window - ~46ms, power of two.

BnewSmooth(i,:) = conv( abs(Bnew(i,:)), raisedWindow );

windowWidth = N0.5timeRatio/44100; % time of window = 0.2

newMax = newMax(find( (smootherEvents(newMax)/max(smootherEvents)) >

% plot improved bark version:

You might also like