You are on page 1of 11

LISTING CODE VOICE RECOGNITION MATLAB

CODE 1
function [norm_voice,h] = Voice_Rec(sample_freq)
option = 'n';
option_rec = 'n';
record_len = 1; %Record time length in seconds
%sample_freq = 8192; %Sampling frequency in Hertz
sample_time = sample_freq * record_len;
'Get ready to record your voice'
name = input('Enter the file name you want to save the file with: ','s');
file_name = sprintf('%s.wav',name);
option_rec = input('Press y to record: ','s');
if option_rec=='y'
while option=='n',
input('Press enter when ready to record--> ');
record = wavrecord(sample_time, sample_freq); %Records the input through the sound
card to the variable with specified sampling frequency
input('Press enter to listen the recorded voice--> ');
sound(record, sample_freq);
option = input('Press y to save or n to record again: ','s');
end
wavwrite(record, sample_freq, file_name); %Save the recorded data to a file with the
specified file name in .wav format
end
[voice_read,FS,NBITS]=wavread(file_name);
norm_voice = normalize(voice_read);
norm_voice = downsmpl(norm_voice, sample_freq);
le=32;
h=daubcqf(le,'min');

function vec = normalize(vec)


temp_vec = vec-mean(vec);
sum_temp_vec = sum(temp_vec.*temp_vec);
sqrt_temp_vec = sqrt(sum_temp_vec);
vec = (1/sqrt_temp_vec)*temp_vec;
function sampled = downsmpl(voice, freq)
x=freq;
y = freq/2;

z=1;
a=1;
sampled=0;
while z<freq,
sampled(a) = sqrt(abs(voice(z)*voice(z+1)));
a=a+1;
z = z+2;
end
sampled = sampled';

function [h_0,h_1] = daubcqf(N,TYPE)


% [h_0,h_1] = daubcqf(N,TYPE);
%
% Function computes the Daubechies' scaling and wavelet filters
% (normalized to sqrt(2)).
%
% Input:
% N : Length of filter (must be even)
% TYPE : Optional parameter that distinguishes the minimum phase,
% maximum phase and mid-phase solutions ('min', 'max', or
% 'mid'). If no argument is specified, the minimum phase
% solution is used.
%
% Output:
% h_0 : Minimal phase Daubechies' scaling filter
% h_1 : Minimal phase Daubechies' wavelet filter
%
% Example:
% N = 4;
% TYPE = 'min';
% [h_0,h_1] = daubcqf(N,TYPE)
% h_0 = 0.4830 0.8365 0.2241 -0.1294
% h_1 = 0.1294 0.2241 -0.8365 0.4830
%
if(nargin < 2),
TYPE = 'min';
end;
if(rem(N,2) ~= 0),
error('No Daubechies filter exists for ODD length');
end;
K = N/2;
a = 1;
p = 1;
q = 1;
h_0 = [1 1];

for j = 1:K-1,
a = -a * 0.25 * (j + K - 1)/j;
h_0 = [0 h_0] + [h_0 0];
p = [0 -p] + [p 0];
p = [0 -p] + [p 0];
q = [0 q 0] + a*p;
end;
q = sort(roots(q));
qt = q(1:K-1);
if TYPE=='mid',
if rem(K,2)==1,
qt = q([1:4:N-2 2:4:N-2]);
else
qt = q([1 4:4:K-1 5:4:K-1 N-3:-4:K N-4:-4:K]);
end;
end;
h_0 = conv(h_0,real(poly(qt)));
h_0 = sqrt(2)*h_0/sum(h_0); %Normalize to sqrt(2);
if(TYPE=='max'),
h_0 = fliplr(h_0);
end;
if(abs(sum(h_0 .^ 2))-1 > 1e-4)
error('Numerically unstable for this value of "N".');
end;
h_1 = rot90(h_0,2);
h_1(1:2:N)=-h_1(1:2:N);

CODE 2
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%

mfcc - Mel frequency cepstrum coefficient analysis.


[ceps,freqresp,fb,fbrecon,freqrecon] = ...
mfcc(input, samplingRate, [frameRate])
Find the cepstral coefficients (ceps) corresponding to the
input. Four other quantities are optionally returned that
represent:
the detailed fft magnitude (freqresp) used in MFCC calculation,
the mel-scale filter bank output (fb)
the filter bank output by inverting the cepstrals with a cosine
transform (fbrecon),
the smooth frequency response by interpolating the fb reconstruction
(freqrecon)
-- Malcolm Slaney, August 1993
Modified a bit to make testing an algorithm easier... 4/15/94
Fixed Cosine Transform (indices of cos() were swapped) - 5/26/95
Added optional frameRate argument - 6/8/95

% Added proper filterbank reconstruction using inverse DCT - 10/27/95


% Added filterbank inversion to reconstruct spectrum - 11/1/95
% (c) 1998 Interval Research Corporation
function [ceps,freqresp,fb,fbrecon,freqrecon] = ...
MFCC(input, samplingRate, frameRate)
global mfccDCTMatrix mfccFilterWeights
[r c] = size(input);
if (r > c)
input=input';
end
% Filter bank parameters
lowestFrequency = 133.3333;
linearFilters = 13;
linearSpacing = 66.66666666;
logFilters = 27;
logSpacing = 1.0711703;
fftSize = 512;
cepstralCoefficients = 13;
windowSize = 400;
windowSize = 256; % Standard says 400, but 256 makes more sense
% Really should be a function of the sample
% rate (and the lowestFrequency) and the
% frame rate.
if (nargin < 2) samplingRate = 16000; end;
if (nargin < 3) frameRate = 100; end;
% Keep this around for later....
totalFilters = linearFilters + logFilters;
%
%
%
%

Now figure the band edges. Interesting frequencies are spaced


by linearSpacing for a while, then go logarithmic. First figure
all the interesting frequencies. Lower, center, and upper band
edges are all consequtive interesting frequencies.

freqs = lowestFrequency + (0:linearFilters-1)*linearSpacing;


freqs(linearFilters+1:totalFilters+2) = ...
freqs(linearFilters) * logSpacing.^(1:logFilters+2);
lower = freqs(1:totalFilters);
center = freqs(2:totalFilters+1);
upper = freqs(3:totalFilters+2);

% We now want to combine FFT bins so that each filter has unit
% weight, assuming a triangular weighting function. First figure
% out the height of the triangle, then we can figure out each
% frequencies contribution
mfccFilterWeights = zeros(totalFilters,fftSize);
triangleHeight = 2./(upper-lower);
fftFreqs = (0:fftSize-1)/fftSize*samplingRate;
for chan=1:totalFilters
mfccFilterWeights(chan,:) = ...
(fftFreqs > lower(chan) & fftFreqs <= center(chan)).* ...
triangleHeight(chan).*(fftFreqs-lower(chan))/(center(chan)-lower(chan)) + ...
(fftFreqs > center(chan) & fftFreqs < upper(chan)).* ...
triangleHeight(chan).*(upper(chan)-fftFreqs)/(upper(chan)-center(chan));
end
%semilogx(fftFreqs,mfccFilterWeights')
%axis([lower(1) upper(totalFilters) 0 max(max(mfccFilterWeights))])
hamWindow = 0.54 - 0.46*cos(2*pi*(0:windowSize-1)/windowSize);
if 0 % Window it like ComplexSpectrum
windowStep = samplingRate/frameRate;
a = .54;
b = -.46;
wr = sqrt(windowStep/windowSize);
phi = pi/windowSize;
hamWindow = 2*wr/sqrt(4*a*a+2*b*b)* ...
(a + b*cos(2*pi*(0:windowSize-1)/windowSize + phi));
end
% Figure out Discrete Cosine Transform. We want a matrix
% dct(i,j) which is totalFilters x cepstralCoefficients in size.
% The i,j component is given by
% cos( i * (j+0.5)/totalFilters pi )
% where we have assumed that i and j start at 0.
mfccDCTMatrix = 1/sqrt(totalFilters/2)*cos((0:(cepstralCoefficients-1))' * ...
(2*(0:(totalFilters-1))+1) * pi/2/totalFilters);
mfccDCTMatrix(1,:) = mfccDCTMatrix(1,:) * sqrt(2)/2;
%imagesc(mfccDCTMatrix);
% Filter the input with the preemphasis filter. Also figure how
% many columns of data we will end up with.
if 1
preEmphasized = filter([1 -.97], 1, input);
else

preEmphasized = input;
end
windowStep = samplingRate/frameRate;
cols = fix((length(input)-windowSize)/windowStep);
% Allocate all the space we need for the output arrays.
ceps = zeros(cepstralCoefficients, cols);
if (nargout > 1) freqresp = zeros(fftSize/2, cols); end;
if (nargout > 2) fb = zeros(totalFilters, cols); end;
% Invert the filter bank center frequencies. For each FFT bin
% we want to know the exact position in the filter bank to find
% the original frequency response. The next block of code finds the
% integer and fractional sampling positions.
if (nargout > 4)
fr = (0:(fftSize/2-1))'/(fftSize/2)*samplingRate/2;
j = 1;
for i=1:(fftSize/2)
if fr(i) > center(j+1)
j = j + 1;
end
if j > totalFilters-1
j = totalFilters-1;
end
fr(i) = min(totalFilters-.0001, ...
max(1,j + (fr(i)-center(j))/(center(j+1)-center(j))));
end
fri = fix(fr);
frac = fr - fri;
freqrecon = zeros(fftSize/2, cols);
end
% Ok, now let's do the processing. For each chunk of data:
% * Window the data with a hamming window,
% * Shift it into FFT order,
% * Find the magnitude of the fft,
% * Convert the fft data into filter bank outputs,
% * Find the log base 10,
% * Find the cosine transform to reduce dimensionality.
for start=0:cols-1
first = floor(start*windowStep) + 1;
last = first + windowSize-1;
fftData = zeros(1,fftSize);
fftData(1:windowSize) = preEmphasized(first:last).*hamWindow;
fftMag = abs(fft(fftData));

earMag = log10(mfccFilterWeights * fftMag');


ceps(:,start+1) = mfccDCTMatrix * earMag;
if (nargout > 1) freqresp(:,start+1) = fftMag(1:fftSize/2)'; end;
if (nargout > 2) fb(:,start+1) = earMag; end
if (nargout > 3)
fbrecon(:,start+1) = ...
mfccDCTMatrix(1:cepstralCoefficients,:)' * ...
ceps(:,start+1);
end
if (nargout > 4)
f10 = 10.^fbrecon(:,start+1);
freqrecon(:,start+1) = samplingRate/fftSize * ...
(f10(fri).*(1-frac) + f10(fri+1).*frac);
end
end
% OK, just to check things, let's also reconstruct the original FB
% output. We do this by multiplying the cepstral data by the transpose
% of the original DCT matrix. This all works because we were careful to
% scale the DCT matrix so it was orthonormal.
if 1 && (nargout > 3)
fbrecon = mfccDCTMatrix(1:cepstralCoefficients,:)' * ceps;
% imagesc(mt(:,1:cepstralCoefficients)*mfccDCTMatrix );
end;

CODE 3
http://archive.kaskus.co.id/thread/9698184/0#1

function y = cut(a)
y1 = cutFront(a); //potong depan si >a-- a itu ntar suara yang mau direkam
y2 = cutBack(y1); //potong belakang >y1---suara a yang udah dipotong
depannya
function z = CutFront(a)
m1 = length(a);
j1 = 500;
i1 = 1;
count = 0;
while j1 < m1 && i1 < m1
frame1 = a([i1:j1],: );

mutlak1 = abs(frame1) ;
rata1 = mean(mutlak1) ;
if rata1 < 0.00840
i1 = i1 + 500;
j1 = j1 + 500;
else
z = a([i1:m1],: ) ;
i1 = m1;
j1 = m1;
end
end

function y = CutBack(b)
m = length(b);
j = m - 500;
i = m;
count = 0;
while j > 1 && i > 1
frame = b([j:i],: ) ;
mutlak = abs(frame) ;
rata = mean(mutlak) ;
if rata < 0.00840
i = i - 500;
j = j - 500;
else
y = b([1:i],: ) ;
i = 1;
j = 1;
end
end

CODE 4
http://sourceforge.net/projects/hmm-asr-matlab/files/

CODE 5
http://www.ece.northwestern.edu/~ismail/courses/c92/speech_examples/index.html
% An example showing how to obtain a speech signal from microphone
% and compute its Fourier Transform (FFT)

Fs = 10000;
% Sampling Frequency (Hz)
Nseconds = 1; % Length of speech signal
fprintf('say a word immediately after hitting enter: ');
input('');
% Get time-domain speech signal from microphone
y = wavrecord(Nseconds*Fs, Fs, 'double');
% Plot time-domain signal
subplot(2,1,1);
t=(0:(Nseconds*Fs)-1)*Nseconds/(Nseconds*Fs);
plot(t,y);
xlabel('time');
% Compute FFT
x = fft(y);
% Get response until Fs/2 (for frequency from Fs/2 to Fs, response is
repeated)
x = x(1:floor(Nseconds*Fs/2));
% Plot magnitude vs. frequency
subplot(2,1,2);
m = abs(x);
f = (0:length(x)-1)*(Fs/2)/length(x);
plot(f,m);
xlabel('Frequency (Hz)');
ylabel('Magnitude');

LANJUTAN
function bof_training
% This code is for creating the library of Bank of Filters (BOF) features.
% get a word from microphone
Fs = 10000;
% Sampling Frequency (Hz)
n = 100;
% Number of filters (frequency bands)
Nseconds = 1; % Length of speech signal
% List of words (you can add more words to this list but make sure
% each word has five characters (if less then pad it with spaces)
words = ['
up';
' down';
' left';
'right'];
% Matrix to store features for each word (rows correspond to words)
fw = zeros(size(words,1),n);
fprintf('You will get one second to say each word.\n\n');
% For each word, get the word from microphone and compute its features
for i=1:size(words,1)

fprintf('Hit enter and say immediately ''%s'':',words(i,:));


% pause for enter key
junk=input('');
% get a word from microphone
y = wavrecord(Nseconds*Fs, Fs, 'double');
% Calculate the features of the word
f = bof_feature(y,n,Fs);
% Save them in the features matrix
fw(i,:) = f;
%
abs(f)
%
plot(1:Fs,fft(y));
end
% Save features matrix to a file (this file will be loaded into Matlab during
speech recognition)
save words_bof.mat Fs n words fw;

LANJUTAN
function bof_recognition
% Speech Recognition using Bank of Filters (BOF)
% load training data
load words_bof.mat;
% ask user to say a word
fprintf('You will get one second to say your word.\n\n');
fprintf('Hit enter and say your word immediately:');
% pause for enter key
junk=input('');
% get the word from microphone
y = wavrecord(1*Fs, Fs, 'double');
% Calculate the features of the word
f = bof_feature(y,n,Fs);
% Calculate distances between the spoken word and each word in the library
d = zeros(1,size(words,1));
for i=1:size(words,1)
d(i) = sqrt(sum((f-fw(i,:)).^2));
end
% Print the word with minimum feature distance. This word was spoken.
[temp1,indx1] = min(d);
fprintf('You said: %s\n',words(indx1,:));

CODE 6
Get ready to record your name
for i = 1:1

file = sprintf(%s%d.wav,'rec,i);
input(Press enter when ready to record your name);
y = wavrecord(88200,44100);
soundsc(y,44100);
wavwrite(y,44100,file);
end
name = input (Enter the name that must be recognized:,'s);
for j = 1:1
file = sprintf (%s%d.wav,'rec,j);
path=rec1.wav;
file=cat(2,path,file);
[t, fs] = wavread (file);
% Crop recording to a window that just contains the speech
s = abs (t);
end
now the comparison algo
input (Press enter when ready)
usertemp = wavrecord (88200,44100);
sound ( usertemp,44100 );
rec = input ( Press button 1 to record again or press enter to
proceed:);
while rec == 1
rec = 0;
input (Press enter when you ready)
usertemp = wavrecord (88200,44100);
soundsc (usertemp,44100);
rec = input (Press 1 again to record again : );
end
% Crop recording to a window that just contains the speech
s1 = abs (usertemp);
%s1=usertemp;
subplot(2,1,1);
plot(s);
title(Your Original voice);
subplot(2,1,2);
title(Tested voice);
plot(s1);
if s==s1
fprintf(You are %s,name);
else
fprintf(You are not %s,name);
end

You might also like