Shark Machine Learning Library
  • About Shark
  • Sourceforge
    • Project Summary
    • Downloads
    • Subversion Repository
  • Getting Started
  • Tutorials
  • FAQ
  • Main Modules
    • ReClaM
    • EALib
    • MOO-EALib
    • Fuzzy
  • Tools
    • Mixture
    • Array
    • Rng
    • LinAlg
    • FileUtil
  • Main Page
  • Related Pages
  • Classes

NoisySvmLikelihood.cpp

Go to the documentation of this file.
00001 //===========================================================================
00041 //===========================================================================
00042 
00043 
00044 #include <Rng/GlobalRng.h>
00045 #include <ReClaM/NoisySvmLikelihood.h>
00046 #include <ReClaM/Svm.h>
00047 #include <ReClaM/SigmoidModel.h>
00048 #include <ReClaM/NegativeLogLikelihood.h>
00049 #include <ReClaM/Rprop.h>
00050 
00051 #include <vector>
00052 #include <algorithm>
00053 
00054 
00055 // choose exactly one of these:
00056 
00057 // exponential decaying sigmoid
00058 #define SIG_E
00059 
00060 // polynomial decaying sigmoid
00061 // #define SIG_P
00062 
00063 
00064 
00066 
00067 
00068 NoisySvmLikelihood::NoisySvmLikelihood(double trainFraction)
00069 {
00070     RANGE_CHECK(trainFraction > 0.0 && trainFraction < 1.0);
00071 
00072     this->trainFraction = trainFraction;
00073 }
00074 
00075 NoisySvmLikelihood::~NoisySvmLikelihood()
00076 {
00077 }
00078 
00079 
00080 double NoisySvmLikelihood::error(Model& model, const Array<double>& input, const Array<double>& target)
00081 {
00082     SIZE_CHECK(input.ndim() == 2);
00083     SIZE_CHECK(target.ndim() == 2);
00084 
00085     // check the model type
00086     C_SVM* csvm = dynamic_cast<C_SVM*>(&model);
00087     if (csvm == NULL || csvm->is2norm()) throw SHARKEXCEPTION("[NoisySvmLikelihood::error] model must be a 1-norm C-SVM.");
00088 
00089     // randomly split the data
00090     int i, ic = input.dim(0);
00091     int dim = input.dim(1);
00092     int train_c = (int)(trainFraction * ic);
00093     int test_c = ic - train_c;
00094     SIZE_CHECK(train_c > 0 && test_c > 0);
00095     Array<double> train_d(train_c, dim);
00096     Array<double> train_l(train_c, 1);
00097     Array<double> test_d(test_c, dim);
00098     Array<double> test_l(test_c, 1);
00099     std::vector<int> permutation(ic);
00100     for (i=0; i<ic; i++) permutation[i] = i;
00101     for (i=0; i<ic; i++)
00102     {
00103         int j = Rng::discrete(0, ic-1);
00104         int tmp = permutation[j];
00105         permutation[j] = permutation[i];
00106         permutation[i] = tmp;
00107     }
00108     for (i=0; i<train_c; i++)
00109     {
00110         train_d[i] = input[permutation[i]];
00111         train_l[i] = target[permutation[i]];
00112     }
00113     for (i=0; i<test_c; i++)
00114     {
00115         test_d[i] = input[permutation[train_c + i]];
00116         test_l[i] = target[permutation[train_c + i]];
00117     }
00118 
00119     // train the SVM
00120     SVM* svm = csvm->getSVM();
00121     SVM_Optimizer opt;
00122     opt.init(*csvm);
00123     opt.optimize(*svm, train_d, train_l);
00124 
00125     // predict the validation data
00126     Array<double> z(test_c, 1);
00127     svm->model(test_d, z);
00128 
00129     // train a sigmoid on the validation data
00130 #ifdef SIG_E
00131     SigmoidModel sigmoid;
00132 #endif
00133 #ifdef SIG_P
00134     SimpleSigmoidModel sigmoid;
00135 #endif
00136     NegativeLogLikelihood nll;
00137     IRpropPlus rprop;
00138     rprop.init(sigmoid);
00139     for (i=0; i<100; i++)
00140     {
00141         rprop.optimize(sigmoid, nll, z, test_l);
00142 #ifdef SIG_E
00143         sigmoid.setParameter(1, 0.0);
00144         if (sigmoid.getParameter(0) > 0.0) sigmoid.setParameter(0, 0.0);
00145 #endif
00146 #ifdef SIG_P
00147         if (sigmoid.getParameter(0) < 0.0) sigmoid.setParameter(0, 0.0);
00148 #endif
00149     }
00150 
00151     // return the best negative log likelihood
00152     return nll.error(sigmoid, z, test_l);
00153 }
00154 
00155 double NoisySvmLikelihood::errorDerivative(Model& model, const Array<double>& input, const Array<double>& target, Array<double>& derivative)
00156 {
00157     SIZE_CHECK(input.ndim() == 2);
00158     SIZE_CHECK(target.ndim() == 2);
00159 
00160     // check the model type
00161     C_SVM* csvm = dynamic_cast<C_SVM*>(&model);
00162     if (csvm == NULL || csvm->is2norm()) throw SHARKEXCEPTION("[NoisySvmLikelihood::errorDerivative] model must be a 1-norm C-SVM.");
00163 
00164     // randomly split the data
00165     int i, ic = input.dim(0);
00166     int dim = input.dim(1);
00167     int train_c = (int)(trainFraction * ic);
00168     int test_c = ic - train_c;
00169     SIZE_CHECK(train_c > 0 && test_c > 0);
00170     Array<double> train_d(train_c, dim);
00171     Array<double> train_l(train_c, 1);
00172     Array<double> test_d(test_c, dim);
00173     Array<double> test_l(test_c, 1);
00174     std::vector<int> permutation(ic);
00175     for (i=0; i<ic; i++) permutation[i] = i;
00176     for (i=0; i<ic; i++)
00177     {
00178         int j = Rng::discrete(0, ic-1);
00179         int tmp = permutation[j];
00180         permutation[j] = permutation[i];
00181         permutation[i] = tmp;
00182     }
00183     for (i=0; i<train_c; i++)
00184     {
00185         train_d[i] = input[permutation[i]];
00186         train_l[i] = target[permutation[i]];
00187     }
00188     for (i=0; i<test_c; i++)
00189     {
00190         test_d[i] = input[permutation[train_c + i]];
00191         test_l[i] = target[permutation[train_c + i]];
00192     }
00193 
00194     // train the SVM
00195     SVM* svm = csvm->getSVM();
00196     SVM_Optimizer opt;
00197     opt.init(*csvm);
00198     opt.optimize(*svm, train_d, train_l);
00199 
00200     // predict the validation data
00201     Array<double> z(test_c, 1);
00202     svm->model(test_d, z);
00203 
00204     // train a sigmoid on the validation data
00205 #ifdef SIG_E
00206     SigmoidModel sigmoid;
00207 #endif
00208 #ifdef SIG_P
00209     SimpleSigmoidModel sigmoid;
00210 #endif
00211     NegativeLogLikelihood nll;
00212     IRpropPlus rprop;
00213     rprop.init(sigmoid);
00214     for (i=0; i<100; i++)
00215     {
00216         rprop.optimize(sigmoid, nll, z, test_l);
00217 #ifdef SIG_E
00218         sigmoid.setParameter(1, 0.0);
00219         if (sigmoid.getParameter(0) > 0.0) sigmoid.setParameter(0, 0.0);
00220 #endif
00221 #ifdef SIG_P
00222         if (sigmoid.getParameter(0) < 0.0) sigmoid.setParameter(0, 0.0);
00223 #endif
00224     }
00225 
00226     // compute the derivative
00227     Array<double> p(test_c, 1);
00228     sigmoid.model(z, p);
00229 
00230     int b, bc = csvm->getParameterDimension();
00231     derivative.resize(bc, false);
00232     derivative = 0.0;
00233     Array<double> dz_dtheta;
00234     csvm->PrepareDerivative();
00235     for (i=0; i<test_c; i++)
00236     {
00237         // compute the derivative of the negative log likelihood
00238         double dL_dp;
00239         if (test_l(i, 0) > 0.0) dL_dp = -1.0 / p(i, 0);
00240         else dL_dp = -1.0 / (p(i, 0) - 1.0);
00241 
00242         // compute the derivative of the sigmoid
00243 #ifdef SIG_E
00244         double dp_dz = - sigmoid.getParameter(0) * p(i, 0) * (1.0 - p(i, 0));
00245 #endif
00246 #ifdef SIG_P
00247         double x = sigmoid.getParameter(0) * p(i, 0);
00248         double N = 1.0 + fabs(x);
00249         double dp_dz = sigmoid.getParameter(0) / (N * N);
00250 #endif
00251 
00252         // compute the derivative of the SVM
00253         csvm->modelDerivative(test_d[i], dz_dtheta);
00254 
00255         // total derivative = partial derivative
00256         for (b=0; b<bc; b++) derivative(b) += dL_dp * dp_dz * dz_dtheta(0, b);
00257     }
00258 
00259     // return the best negative log likelihood
00260     return nll.error(sigmoid, z, test_l);
00261 }