machine learning exercise6 Coursera

第六次上机作业:

gaussianKernel.m
dataset3Params.m
processEmail.m
emailFeatures.m

gaussianKernel.m

高斯核函数

sim = exp(-sum((x1 - x2) .^ 2) / (2 * sigma * sigma));

dataset3Params.m

求最优的C和sigma

function [C, sigma] = dataset3Params(X, y, Xval, yval)
C = 1;
sigma = 0.3;

right_c = C;
right_sigma = sigma;
error = 1;

predictions = zeros(size(yval),1);
t = [0.01;0.03;0.1;0.3;1;3;10;30];
for i = 1:8
  for j = 1:8
    C = t(i);
    sigma = t(j);
    %调用包里的svmTrain函数
    model = svmTrain(X, y , C, @(x1, x2) gaussianKernel(x1, x2, sigma));
    %交叉验证
    predictions = svmPredict(model, Xval);
    temp = mean(double(predictions ~= yval));
    %取错最小的C和sigma
      if(temp < error)
        error = temp;
        right_c = C;
        right_sigma = sigma;
      end
  end
end
C = right_c;
sigma = right_sigma;
end

processEmail.m

将切分后邮件里的每个关键词的索引号放进一个矩阵里

for   i = 1 : size(vocabList)
  if strcmp(vocabList{i}, str) == 1;
    word_indices = [word_indices; i];
    break;
   end;
 end;

emailFeatures.m

将存在的词记为1,否则为0

x = zeros(n, 1);%n为词的总个数
for i = 1 : size(word_indices)
    if(x(word_indices(i)) == 0)
      x(word_indices(i)) = 1;
    end
end