Professional Documents
Culture Documents
CODE 1
function [norm_voice,h] = Voice_Rec(sample_freq)
option = 'n';
option_rec = 'n';
record_len = 1; %Record time length in seconds
%sample_freq = 8192; %Sampling frequency in Hertz
sample_time = sample_freq * record_len;
'Get ready to record your voice'
name = input('Enter the file name you want to save the file with: ','s');
file_name = sprintf('%s.wav',name);
option_rec = input('Press y to record: ','s');
if option_rec=='y'
while option=='n',
input('Press enter when ready to record--> ');
record = wavrecord(sample_time, sample_freq); %Records the input through the sound
card to the variable with specified sampling frequency
input('Press enter to listen the recorded voice--> ');
sound(record, sample_freq);
option = input('Press y to save or n to record again: ','s');
end
wavwrite(record, sample_freq, file_name); %Save the recorded data to a file with the
specified file name in .wav format
end
[voice_read,FS,NBITS]=wavread(file_name);
norm_voice = normalize(voice_read);
norm_voice = downsmpl(norm_voice, sample_freq);
le=32;
h=daubcqf(le,'min');
z=1;
a=1;
sampled=0;
while z<freq,
sampled(a) = sqrt(abs(voice(z)*voice(z+1)));
a=a+1;
z = z+2;
end
sampled = sampled';
for j = 1:K-1,
a = -a * 0.25 * (j + K - 1)/j;
h_0 = [0 h_0] + [h_0 0];
p = [0 -p] + [p 0];
p = [0 -p] + [p 0];
q = [0 q 0] + a*p;
end;
q = sort(roots(q));
qt = q(1:K-1);
if TYPE=='mid',
if rem(K,2)==1,
qt = q([1:4:N-2 2:4:N-2]);
else
qt = q([1 4:4:K-1 5:4:K-1 N-3:-4:K N-4:-4:K]);
end;
end;
h_0 = conv(h_0,real(poly(qt)));
h_0 = sqrt(2)*h_0/sum(h_0); %Normalize to sqrt(2);
if(TYPE=='max'),
h_0 = fliplr(h_0);
end;
if(abs(sum(h_0 .^ 2))-1 > 1e-4)
error('Numerically unstable for this value of "N".');
end;
h_1 = rot90(h_0,2);
h_1(1:2:N)=-h_1(1:2:N);
CODE 2
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
% We now want to combine FFT bins so that each filter has unit
% weight, assuming a triangular weighting function. First figure
% out the height of the triangle, then we can figure out each
% frequencies contribution
mfccFilterWeights = zeros(totalFilters,fftSize);
triangleHeight = 2./(upper-lower);
fftFreqs = (0:fftSize-1)/fftSize*samplingRate;
for chan=1:totalFilters
mfccFilterWeights(chan,:) = ...
(fftFreqs > lower(chan) & fftFreqs <= center(chan)).* ...
triangleHeight(chan).*(fftFreqs-lower(chan))/(center(chan)-lower(chan)) + ...
(fftFreqs > center(chan) & fftFreqs < upper(chan)).* ...
triangleHeight(chan).*(upper(chan)-fftFreqs)/(upper(chan)-center(chan));
end
%semilogx(fftFreqs,mfccFilterWeights')
%axis([lower(1) upper(totalFilters) 0 max(max(mfccFilterWeights))])
hamWindow = 0.54 - 0.46*cos(2*pi*(0:windowSize-1)/windowSize);
if 0 % Window it like ComplexSpectrum
windowStep = samplingRate/frameRate;
a = .54;
b = -.46;
wr = sqrt(windowStep/windowSize);
phi = pi/windowSize;
hamWindow = 2*wr/sqrt(4*a*a+2*b*b)* ...
(a + b*cos(2*pi*(0:windowSize-1)/windowSize + phi));
end
% Figure out Discrete Cosine Transform. We want a matrix
% dct(i,j) which is totalFilters x cepstralCoefficients in size.
% The i,j component is given by
% cos( i * (j+0.5)/totalFilters pi )
% where we have assumed that i and j start at 0.
mfccDCTMatrix = 1/sqrt(totalFilters/2)*cos((0:(cepstralCoefficients-1))' * ...
(2*(0:(totalFilters-1))+1) * pi/2/totalFilters);
mfccDCTMatrix(1,:) = mfccDCTMatrix(1,:) * sqrt(2)/2;
%imagesc(mfccDCTMatrix);
% Filter the input with the preemphasis filter. Also figure how
% many columns of data we will end up with.
if 1
preEmphasized = filter([1 -.97], 1, input);
else
preEmphasized = input;
end
windowStep = samplingRate/frameRate;
cols = fix((length(input)-windowSize)/windowStep);
% Allocate all the space we need for the output arrays.
ceps = zeros(cepstralCoefficients, cols);
if (nargout > 1) freqresp = zeros(fftSize/2, cols); end;
if (nargout > 2) fb = zeros(totalFilters, cols); end;
% Invert the filter bank center frequencies. For each FFT bin
% we want to know the exact position in the filter bank to find
% the original frequency response. The next block of code finds the
% integer and fractional sampling positions.
if (nargout > 4)
fr = (0:(fftSize/2-1))'/(fftSize/2)*samplingRate/2;
j = 1;
for i=1:(fftSize/2)
if fr(i) > center(j+1)
j = j + 1;
end
if j > totalFilters-1
j = totalFilters-1;
end
fr(i) = min(totalFilters-.0001, ...
max(1,j + (fr(i)-center(j))/(center(j+1)-center(j))));
end
fri = fix(fr);
frac = fr - fri;
freqrecon = zeros(fftSize/2, cols);
end
% Ok, now let's do the processing. For each chunk of data:
% * Window the data with a hamming window,
% * Shift it into FFT order,
% * Find the magnitude of the fft,
% * Convert the fft data into filter bank outputs,
% * Find the log base 10,
% * Find the cosine transform to reduce dimensionality.
for start=0:cols-1
first = floor(start*windowStep) + 1;
last = first + windowSize-1;
fftData = zeros(1,fftSize);
fftData(1:windowSize) = preEmphasized(first:last).*hamWindow;
fftMag = abs(fft(fftData));
CODE 3
http://archive.kaskus.co.id/thread/9698184/0#1
function y = cut(a)
y1 = cutFront(a); //potong depan si >a-- a itu ntar suara yang mau direkam
y2 = cutBack(y1); //potong belakang >y1---suara a yang udah dipotong
depannya
function z = CutFront(a)
m1 = length(a);
j1 = 500;
i1 = 1;
count = 0;
while j1 < m1 && i1 < m1
frame1 = a([i1:j1],: );
mutlak1 = abs(frame1) ;
rata1 = mean(mutlak1) ;
if rata1 < 0.00840
i1 = i1 + 500;
j1 = j1 + 500;
else
z = a([i1:m1],: ) ;
i1 = m1;
j1 = m1;
end
end
function y = CutBack(b)
m = length(b);
j = m - 500;
i = m;
count = 0;
while j > 1 && i > 1
frame = b([j:i],: ) ;
mutlak = abs(frame) ;
rata = mean(mutlak) ;
if rata < 0.00840
i = i - 500;
j = j - 500;
else
y = b([1:i],: ) ;
i = 1;
j = 1;
end
end
CODE 4
http://sourceforge.net/projects/hmm-asr-matlab/files/
CODE 5
http://www.ece.northwestern.edu/~ismail/courses/c92/speech_examples/index.html
% An example showing how to obtain a speech signal from microphone
% and compute its Fourier Transform (FFT)
Fs = 10000;
% Sampling Frequency (Hz)
Nseconds = 1; % Length of speech signal
fprintf('say a word immediately after hitting enter: ');
input('');
% Get time-domain speech signal from microphone
y = wavrecord(Nseconds*Fs, Fs, 'double');
% Plot time-domain signal
subplot(2,1,1);
t=(0:(Nseconds*Fs)-1)*Nseconds/(Nseconds*Fs);
plot(t,y);
xlabel('time');
% Compute FFT
x = fft(y);
% Get response until Fs/2 (for frequency from Fs/2 to Fs, response is
repeated)
x = x(1:floor(Nseconds*Fs/2));
% Plot magnitude vs. frequency
subplot(2,1,2);
m = abs(x);
f = (0:length(x)-1)*(Fs/2)/length(x);
plot(f,m);
xlabel('Frequency (Hz)');
ylabel('Magnitude');
LANJUTAN
function bof_training
% This code is for creating the library of Bank of Filters (BOF) features.
% get a word from microphone
Fs = 10000;
% Sampling Frequency (Hz)
n = 100;
% Number of filters (frequency bands)
Nseconds = 1; % Length of speech signal
% List of words (you can add more words to this list but make sure
% each word has five characters (if less then pad it with spaces)
words = ['
up';
' down';
' left';
'right'];
% Matrix to store features for each word (rows correspond to words)
fw = zeros(size(words,1),n);
fprintf('You will get one second to say each word.\n\n');
% For each word, get the word from microphone and compute its features
for i=1:size(words,1)
LANJUTAN
function bof_recognition
% Speech Recognition using Bank of Filters (BOF)
% load training data
load words_bof.mat;
% ask user to say a word
fprintf('You will get one second to say your word.\n\n');
fprintf('Hit enter and say your word immediately:');
% pause for enter key
junk=input('');
% get the word from microphone
y = wavrecord(1*Fs, Fs, 'double');
% Calculate the features of the word
f = bof_feature(y,n,Fs);
% Calculate distances between the spoken word and each word in the library
d = zeros(1,size(words,1));
for i=1:size(words,1)
d(i) = sqrt(sum((f-fw(i,:)).^2));
end
% Print the word with minimum feature distance. This word was spoken.
[temp1,indx1] = min(d);
fprintf('You said: %s\n',words(indx1,:));
CODE 6
Get ready to record your name
for i = 1:1
file = sprintf(%s%d.wav,'rec,i);
input(Press enter when ready to record your name);
y = wavrecord(88200,44100);
soundsc(y,44100);
wavwrite(y,44100,file);
end
name = input (Enter the name that must be recognized:,'s);
for j = 1:1
file = sprintf (%s%d.wav,'rec,j);
path=rec1.wav;
file=cat(2,path,file);
[t, fs] = wavread (file);
% Crop recording to a window that just contains the speech
s = abs (t);
end
now the comparison algo
input (Press enter when ready)
usertemp = wavrecord (88200,44100);
sound ( usertemp,44100 );
rec = input ( Press button 1 to record again or press enter to
proceed:);
while rec == 1
rec = 0;
input (Press enter when you ready)
usertemp = wavrecord (88200,44100);
soundsc (usertemp,44100);
rec = input (Press 1 again to record again : );
end
% Crop recording to a window that just contains the speech
s1 = abs (usertemp);
%s1=usertemp;
subplot(2,1,1);
plot(s);
title(Your Original voice);
subplot(2,1,2);
title(Tested voice);
plot(s1);
if s==s1
fprintf(You are %s,name);
else
fprintf(You are not %s,name);
end