第六次上机作业:
gaussianKernel.m
dataset3Params.m
processEmail.m
emailFeatures.m
gaussianKernel.m
高斯核函数
sim = exp(-sum((x1 - x2) .^ 2) / (2 * sigma * sigma));
dataset3Params.m
求最优的C和sigma
function [C, sigma] = dataset3Params(X, y, Xval, yval)
C = 1;
sigma = 0.3;
right_c = C;
right_sigma = sigma;
error = 1;
predictions = zeros(size(yval),1);
t = [0.01;0.03;0.1;0.3;1;3;10;30];
for i = 1:8
for j = 1:8
C = t(i);
sigma = t(j);
%调用包里的svmTrain函数
model = svmTrain(X, y , C, @(x1, x2) gaussianKernel(x1, x2, sigma));
%交叉验证
predictions = svmPredict(model, Xval);
temp = mean(double(predictions ~= yval));
%取错最小的C和sigma
if(temp < error)
error = temp;
right_c = C;
right_sigma = sigma;
end
end
end
C = right_c;
sigma = right_sigma;
end
processEmail.m
将切分后邮件里的每个关键词的索引号放进一个矩阵里
for i = 1 : size(vocabList)
if strcmp(vocabList{i}, str) == 1;
word_indices = [word_indices; i];
break;
end;
end;
emailFeatures.m
将存在的词记为1,否则为0
x = zeros(n, 1);%n为词的总个数
for i = 1 : size(word_indices)
if(x(word_indices(i)) == 0)
x(word_indices(i)) = 1;
end
end