• Main Page
  • Related Pages
  • Classes
  • Files
  • Examples
  • File List
  • File Members

Svm.cpp

Go to the documentation of this file.
00001 //===========================================================================
00038 //===========================================================================
00039 
00040 #define _SCL_SECURE_NO_WARNINGS
00041 
00042 #include <math.h>
00043 #include <fstream>
00044 #include <iostream>
00045 #include <LinAlg/LinAlg.h>
00046 #include <LinAlg/VecMat.h>
00047 #include <ReClaM/Svm.h>
00048 #include <ReClaM/QuadraticProgram.h>
00049 
00050 
00051 using namespace std;
00052 
00053 
00055 
00056 
00057 SVM::SVM(KernelFunction* pKernel, bool bSignOutput)
00058 {
00059     kernel = pKernel;
00060     signOutput = bSignOutput;
00061 
00062     x = NULL;
00063     bOwnMemory = false;
00064 
00065     examples = 0;
00066     inputDimension = 0;
00067     outputDimension = 1;
00068 
00069     parameter.resize(1, false);
00070     parameter = 0.0;
00071 }
00072 
00073 SVM::SVM(KernelFunction* pKernel, const Array<double>& input, bool bSignOutput)
00074 {
00075     kernel = pKernel;
00076     signOutput = bSignOutput;
00077 
00078     x = NULL;
00079     bOwnMemory = false;
00080 
00081     examples = 0;
00082     inputDimension = 0;
00083     outputDimension = 1;
00084 
00085     SetTrainingData(input);
00086 }
00087 
00088 SVM::~SVM()
00089 {
00090     if (bOwnMemory) delete x;
00091 }
00092 
00093 
00094 void SVM::SetTrainingData(const Array<double>& input, bool copy)
00095 {
00096     if (bOwnMemory) delete x;
00097 
00098     if (copy)
00099     {
00100         x = new Array<double> (input);
00101         bOwnMemory = true;
00102     }
00103     else
00104     {
00105         x = &input;
00106         bOwnMemory = false;
00107     }
00108 
00109     examples = input.dim(0);
00110     inputDimension = input.dim(1);
00111 
00112     parameter.resize(examples + 1, false);
00113     parameter = 0.0;
00114 }
00115 
00116 void SVM::model(const Array<double>& input, Array<double>& output)
00117 {
00118     unsigned int i;
00119     double a;
00120     double alpha;
00121     if (input.ndim() == 1)
00122     {
00123         output.resize(1, false);
00124         a = parameter(examples);
00125         for (i = 0; i < examples; i++)
00126         {
00127             alpha = parameter(i);
00128             if (alpha != 0.0)
00129             {
00130                 a += alpha * kernel->eval((*x)[i], input);
00131             }
00132         }
00133         if (signOutput) output(0) = (a > 0.0) ? 1.0 : -1.0;
00134         else output(0) = a;
00135     }
00136     else if (input.ndim() == 2)
00137     {
00138         int j, jc = input.dim(0);
00139         output.resize(jc, 1, false);
00140         for (j = 0; j < jc; j++)
00141         {
00142             a = parameter(examples);
00143             const ArrayReference<double> ii = input[j];
00144             for (i = 0; i < examples; i++)
00145             {
00146                 alpha = parameter(i);
00147                 if (alpha != 0.0)
00148                 {
00149                     a += alpha * kernel->eval((*x)[i], ii);
00150                 }
00151             }
00152             if (signOutput) output(j, 0) = (a > 0.0) ? 1.0 : -1.0;
00153             else output(j, 0) = a;
00154         }
00155     }
00156     else throw SHARKEXCEPTION("[SVM::model] invalid dimension");
00157 }
00158 
00159 double SVM::model(const Array<double>& input)
00160 {
00161     unsigned int i;
00162     double a;
00163     double alpha;
00164     if (input.ndim() == 1)
00165     {
00166         a = parameter(examples);
00167         for (i = 0; i < examples; i++)
00168         {
00169             alpha = parameter(i);
00170             if (alpha != 0.0)
00171             {
00172                 a += alpha * kernel->eval((*x)[i], input);
00173             }
00174         }
00175     }
00176     else throw SHARKEXCEPTION("[SVM::model] invalid dimension");
00177     if (signOutput) return(a > 0.0) ? 1.0 : -1.0;
00178     else return a;
00179 }
00180 
00181 void SVM::modelDerivative(const Array<double>& input, Array<double>& derivative)
00182 {
00183     unsigned int i;
00184     double v;
00185     if (input.ndim() == 1)
00186     {
00187         derivative.resize(1, examples + 1, false);
00188         derivative = 0.0;
00189         for (i = 0; i < examples; i++)
00190         {
00191             v = kernel->eval((*x)[i], input);
00192             derivative(0, i) = v;
00193         }
00194         derivative(0, examples) = 1.0;
00195     }
00196     else throw SHARKEXCEPTION("[SVM::modelDerivative] invalid dimension");
00197 }
00198 
00199 void SVM::modelDerivative(const Array<double>& input, Array<double>& output, Array<double>& derivative)
00200 {
00201     unsigned int i;
00202     double a, v;
00203     if (input.ndim() == 1)
00204     {
00205         derivative.resize(1, examples + 1, false);
00206         derivative = 0.0;
00207         output.resize(1, false);
00208         a = parameter(examples);
00209         for (i = 0; i < examples; i++)
00210         {
00211             v = kernel->eval((*x)[i], input);
00212             a += parameter(i) * v;
00213             derivative(0, i) = v;
00214         }
00215         output(0) = a;
00216         derivative(0, examples) = 1.0;
00217     }
00218     else throw SHARKEXCEPTION("[SVM::modelDerivative] invalid dimension");
00219 }
00220 
00221 bool SVM::LoadSVMModel(std::istream& is)
00222 {
00223     char buffer[50];
00224     unsigned int t, d;
00225 
00226     // read the header line
00227     is.read(buffer, 21);
00228 
00229     buffer[21] = 0;
00230     if (strcmp(buffer, "Shark SVM model\r\nSV: ") != 0) return false;
00231     if (! is.good()) return false;
00232 
00233     // read the number of support vectors
00234     is >> examples;
00235 
00236     is.read(buffer, 7); buffer[7] = 0;
00237     if (strcmp(buffer, "\r\nDIM: ") != 0) return false;
00238     if (! is.good()) return false;
00239 
00240     // read the input space dimension
00241     is >> inputDimension;
00242 
00243     is.read(buffer, 10); buffer[10] = 0;
00244     if (strcmp(buffer, "\r\nkernel: ") != 0) return false;
00245     if (! is.good()) return false;
00246 
00247     // read the kernel parameters
00248     is >> (*kernel);
00249 
00250     is.read(buffer, 14); buffer[14] = 0;
00251     if (strcmp(buffer, "coefficients: ") != 0) return false;
00252     if (! is.good()) return false;
00253 
00254     // read alpha and b
00255     parameter.resize(examples + 1, false);
00256     for (t = 0; t < examples; t++)
00257     {
00258         is >> parameter(t);
00259         is.read(buffer, 1);
00260         if (buffer[0] != ' ') return false;
00261     }
00262 
00263     is >> parameter(examples);
00264 
00265     is.read(buffer, 2); buffer[2] = 0;
00266     if (strcmp(buffer, "\r\n") != 0) return false;
00267     if (! is.good()) return false;
00268 
00269     // read the support vectors
00270     Array<double>* sv = new Array<double> (examples, inputDimension);
00271     for (t = 0; t < examples; t++)
00272     {
00273         for (d = 0; d < inputDimension; d++)
00274         {
00275             is >> (*sv)(t, d);
00276             if (d < inputDimension - 1)
00277             {
00278                 is.read(buffer, 1);
00279                 if (buffer[0] != ' ') return false;
00280             }
00281         }
00282 
00283         is.read(buffer, 2); buffer[2] = 0;
00284         if (strcmp(buffer, "\r\n") != 0) return false;
00285 
00286         if (! is.good()) return false;
00287     }
00288 
00289     bOwnMemory = true;
00290     x = sv;
00291 
00292     return true;
00293 }
00294 
00295 bool SVM::SaveSVMModel(std::ostream& os)
00296 {
00297     unsigned int t, d;
00298     unsigned int T = 0;
00299     for (t = 0; t < examples; t++) if (parameter(t) != 0.0) T++;
00300 
00301     // write the header line
00302     os.write("Shark SVM model\r\nSV: ", 21);
00303 
00304     // write the number of support vectors
00305     os << T;
00306 
00307     os.write("\r\nDIM: ", 7);
00308     if (! os.good()) return false;
00309 
00310     // write the input space dimension
00311     os << inputDimension;
00312 
00313     os.write("\r\nkernel: ", 10);
00314     if (! os.good()) return false;
00315 
00316     // write the kernel parameters
00317     os << (*kernel);
00318 
00319     os.write("coefficients: ", 14);
00320     if (! os.good()) return false;
00321 
00322     // write alpha and b
00323     for (t = 0; t < examples; t++) if (parameter(t) != 0.0) os << parameter(t) << " ";
00324     os << parameter(examples) << "\r\n";
00325     if (! os.good()) return false;
00326 
00327     // write the support vectors
00328     for (t = 0; t < examples; t++)
00329     {
00330         if (parameter(t) != 0.0)
00331         {
00332             for (d = 0; d < inputDimension; d++)
00333             {
00334                 os << (*x)(t, d);
00335                 if (d < inputDimension - 1) os << " ";
00336             }
00337             os.write("\r\n", 2);
00338             if (! os.good()) return false;
00339         }
00340     }
00341 
00342     return true;
00343 }
00344 
00345 // static
00346 SVM* SVM::ImportLibsvmModel(std::istream& is)
00347 {
00348     char buffer[256];
00349     int status;
00350     int i;
00351 
00352     LinearKernel* linear = NULL;
00353     PolynomialKernel* poly = NULL;
00354     RBFKernel* rbf = NULL;
00355     KernelFunction* pKernel = NULL;
00356     double b = 0.0;
00357     int examples = -1;
00358     int dimension = -1;
00359     while (true)
00360     {
00361         status = ReadToken(is, buffer, sizeof(buffer), "\n");
00362         if (status > 1000) return NULL;
00363 
00364         if (memcmp(buffer, "svm_type ", 9) == 0)
00365         {
00366             if (strcmp(buffer + 9, "c_svc") != 0) return NULL;
00367         }
00368         else if (memcmp(buffer, "kernel_type ", 12) == 0)
00369         {
00370             if (strcmp(buffer + 12, "linear") == 0)
00371             {
00372                 linear = new LinearKernel();
00373                 pKernel = linear;
00374             }
00375             else if (strcmp(buffer + 12, "rbf") == 0)
00376             {
00377                 rbf = new RBFKernel(1.0);
00378                 pKernel = rbf;
00379             }
00380             else if (strcmp(buffer + 12, "polynomial") == 0)
00381             {
00382                 poly = new PolynomialKernel(1, 1.0);
00383                 pKernel = poly;
00384             }
00385             else return NULL;
00386         }
00387         else if (memcmp(buffer, "degree ", 7) == 0)
00388         {
00389             if (poly != NULL)
00390             {
00391                 poly->setParameter(0, atof(buffer + 7));
00392             }
00393         }
00394         else if (memcmp(buffer, "gamma ", 6) == 0)
00395         {
00396             if (rbf != NULL)
00397             {
00398                 rbf->setParameter(0, atof(buffer + 6));
00399             }
00400         }
00401         else if (memcmp(buffer, "coef0 ", 6) == 0)
00402         {
00403             if (poly != NULL)
00404             {
00405                 poly->setParameter(1, atof(buffer + 6));
00406             }
00407         }
00408         else if (memcmp(buffer, "nr_class ", 9) == 0)
00409         {
00410             if (strcmp(buffer + 9, "2") != 0) return NULL;
00411         }
00412         else if (memcmp(buffer, "total_sv ", 9) == 0)
00413         {
00414             examples = atoi(buffer + 9);
00415         }
00416         else if (memcmp(buffer, "rho ", 4) == 0)
00417         {
00418             b = atof(buffer + 4);
00419         }
00420         else if (memcmp(buffer, "label ", 6) == 0)
00421         {
00422             if (strcmp(buffer + 6, "1 -1") != 0) return NULL;
00423         }
00424         else if (strcmp(buffer, "SV") == 0)
00425         {
00426             break;
00427         }
00428         else
00429         {
00430             // just ignore unknown tags
00431         }
00432     }
00433     if (examples == -1) return NULL;
00434     if (pKernel == NULL) return NULL;
00435 
00436     // first pass: determine the data dimension
00437     int start = is.tellg();
00438     for (i = 0; i < examples; i++)
00439     {
00440         // read the coefficient
00441         status = ReadToken(is, buffer, sizeof(buffer), " ");
00442         if (status != ' ') return NULL;
00443 
00444         while (true)
00445         {
00446             // read the index
00447             status = ReadToken(is, buffer, sizeof(buffer), ":\n");
00448             if (status > 1000) return NULL;
00449             if (status == '\n') break;
00450             int index = atoi(buffer);
00451             if (index >= dimension) dimension = index + 1;
00452 
00453             // read the value
00454             status = ReadToken(is, buffer, sizeof(buffer), " \n");
00455             if (status > 1000) return NULL;
00456             if (status == '\n') break;
00457         }
00458     }
00459     if (dimension == -1) return NULL;
00460 
00461     // create the objects
00462     SVM* pSVM = new SVM(pKernel);
00463     Array<double>* sv = new Array<double> (examples, dimension);
00464     *sv = 0.0;
00465     pSVM->parameter.resize(examples + 1, false);
00466     pSVM->parameter(examples) = b;
00467     pSVM->examples = examples;
00468     pSVM->inputDimension = dimension;
00469 
00470     // second pass: read the data
00471     is.seekg(start, ios::beg);
00472     for (i = 0; i < examples; i++)
00473     {
00474         // read the coefficient
00475         status = ReadToken(is, buffer, sizeof(buffer), " ");
00476         if (status != ' ') return NULL;
00477         pSVM->parameter(i) = atof(buffer);
00478 
00479         while (true)
00480         {
00481             // read the index
00482             status = ReadToken(is, buffer, sizeof(buffer), ":\n");
00483             if (status > 1000) return NULL;
00484             if (status == '\n') break;
00485             int index = atoi(buffer);
00486             if (index >= dimension) dimension = index + 1;
00487 
00488             // read the value
00489             status = ReadToken(is, buffer, sizeof(buffer), " \n");
00490             if (status > 1000) return NULL;
00491             (*sv)(i, index) = atof(buffer);
00492             if (status == '\n') break;
00493         }
00494     }
00495 
00496     pSVM->x = sv;
00497     pSVM->bOwnMemory = true;
00498 
00499     return pSVM;
00500 }
00501 
00502 // static
00503 SVM* SVM::ImportSvmlightModel(std::istream& is)
00504 {
00505     char buffer[256];
00506     int status;
00507     KernelFunction* pKernel;
00508 
00509     // first line is a comment
00510     status = DiscardUntil(is, "\n");
00511     if (status != '\n') return NULL;
00512 
00513     // kernel type
00514     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00515     if (status > 1000) return NULL;
00516     if (status == '#') DiscardUntil(is, "\n");
00517     int kernel = atoi(buffer);
00518 
00519     // degree
00520     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00521     if (status > 1000) return NULL;
00522     if (status == '#') DiscardUntil(is, "\n");
00523     int degree = atoi(buffer);
00524 
00525     // gamma
00526     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00527     if (status > 1000) return NULL;
00528     if (status == '#') DiscardUntil(is, "\n");
00529     double gamma = atof(buffer);
00530 
00531     // poly coeff s
00532     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00533     if (status > 1000) return NULL;
00534     if (status == '#') DiscardUntil(is, "\n");
00535     double s = atof(buffer);
00536 
00537     // poly coeff c/r
00538     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00539     if (status > 1000) return NULL;
00540     if (status == '#') DiscardUntil(is, "\n");
00541     double r = atof(buffer);
00542 
00543     // string parameter u
00544     status = DiscardUntil(is, "\n");
00545     if (status > 1000) return NULL;
00546 
00547     // highest feature index
00548     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00549     if (status > 1000) return NULL;
00550     if (status == '#') DiscardUntil(is, "\n");
00551     int dimension = atoi(buffer) + 1;
00552 
00553     // number of training documents
00554     status = DiscardUntil(is, "\n");
00555     if (status > 1000) return NULL;
00556 
00557     // number of support vectors plus 1
00558     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00559     if (status > 1000) return NULL;
00560     if (status == '#') DiscardUntil(is, "\n");
00561     int examples = atoi(buffer) - 1;
00562 
00563     // threshold b
00564     status = ReadToken(is, buffer, sizeof(buffer), "#\n");
00565     if (status > 1000) return NULL;
00566     if (status == '#') DiscardUntil(is, "\n");
00567     double b = atof(buffer);
00568 
00569     // create the objects
00570     if (kernel == 0) pKernel = new LinearKernel();
00571     else if (kernel == 1)
00572     {
00573         if (s == 0.0) return NULL;
00574         pKernel = new PolynomialKernel(degree, r / s);
00575     }
00576     else if (kernel == 2) pKernel = new RBFKernel(gamma);
00577     else return NULL;
00578     SVM* pSVM = new SVM(pKernel);
00579     Array<double>* sv = new Array<double> (examples, dimension);
00580     *sv = 0.0;
00581     pSVM->parameter.resize(examples + 1, false);
00582     pSVM->parameter(examples) = b;
00583     pSVM->examples = examples;
00584     pSVM->inputDimension = dimension;
00585 
00586     // read the data
00587     int i, index;
00588     for (i = 0; i < examples; i++)
00589     {
00590         // read the coefficient
00591         status = ReadToken(is, buffer, sizeof(buffer), " ");
00592         if (status > 1000) return NULL;
00593         pSVM->parameter(i) = atof(buffer);
00594 
00595         while (true)
00596         {
00597             // read the index
00598             status = ReadToken(is, buffer, sizeof(buffer), ":#\n");
00599             if (status > 1000) return NULL;
00600             if (status == '#')
00601             {
00602                 if (DiscardUntil(is, "\n") > 1000) return NULL;
00603                 break;
00604             }
00605             else if (status == '\n') break;
00606             index = atoi(buffer);
00607 
00608             // read the value
00609             status = ReadToken(is, buffer, sizeof(buffer), " #\n");
00610             if (status > 1000) return NULL;
00611             (*sv)(i, index) = atof(buffer);
00612             if (status == '#')
00613             {
00614                 if (DiscardUntil(is, "\n") > 1000) return NULL;
00615                 break;
00616             }
00617             else if (status == '\n') break;
00618         }
00619     }
00620 
00621     pSVM->x = sv;
00622     pSVM->bOwnMemory = true;
00623 
00624     return pSVM;
00625 }
00626 
00627 void SVM::MakeSparse()
00628 {
00629     if (! bOwnMemory) return;
00630 
00631     // count support vectors and compute a map
00632     int i, ic = getExamples();
00633     std::vector<int> map;
00634     for (i=0; i<ic; i++)
00635     {
00636         if (parameter(i) != 0.0)
00637         {
00638             map.push_back(i);
00639         }
00640     }
00641     int s, sc = map.size();
00642 
00643     // copy relevant coefficients and training data
00644     int dim = x->dim(1);
00645     Array<double> p(sc + 1);
00646     Array<double>* xx = new Array<double>(sc, dim);
00647 
00648     for (s=0; s<sc; s++)
00649     {
00650         i = map[s];
00651         (*xx)[s] = (*x)[i];
00652         p(s) = parameter(i);
00653     }
00654     p(sc) = parameter(ic);
00655 
00656     parameter = p;
00657     examples = sc;
00658     delete x;
00659     x = xx;
00660 }
00661 
00662 // static
00663 int SVM::ReadToken(std::istream& is, char* buffer, int maxlength, const char* separators)
00664 {
00665     int i;
00666     int s, sc = strlen(separators);
00667     char c;
00668     for (i = 0; i < maxlength - 1; i++)
00669     {
00670         if (is.readsome(&c, 1) == 0) return 1001;
00671         if (is.bad()) return 1002;
00672         for (s = 0; s < sc; s++) if (separators[s] == c) break;
00673         if (s < sc)
00674         {
00675             buffer[i] = 0;
00676             return separators[s];
00677         }
00678         buffer[i] = c;
00679     }
00680     buffer[i] = 0;
00681     return 1003;
00682 }
00683 
00684 // static
00685 int SVM::DiscardUntil(std::istream& is, const char* separators)
00686 {
00687     int s, sc = strlen(separators);
00688     char c;
00689     while (true)
00690     {
00691         if (is.readsome(&c, 1) == 0) return 1001;
00692         if (is.bad()) return 1002;
00693         for (s = 0; s < sc; s++) if (separators[s] == c) return c;
00694     }
00695 }
00696 
00697 
00699 
00700 
00701 MultiClassSVM::MultiClassSVM(KernelFunction* pKernel, unsigned int numberOfClasses, bool bOrthogonalVectors, bool bNumberOutput)
00702 {
00703     this->kernel = pKernel;
00704     this->classes = numberOfClasses;
00705     this->examples = 0;
00706     this->bOwnMemory = false;
00707     this->numberOutput = bNumberOutput;
00708 
00709     prototypes.resize(classes, classes, false);
00710     if (bOrthogonalVectors)
00711     {
00712         prototypes = 0.0;
00713         unsigned int i;
00714         for (i=0; i<classes; i++) prototypes(i, i) = 1.0;
00715     }
00716     else
00717     {
00718         throw SHARKEXCEPTION("[MultiClassSVM::MultiClassSVM] simplex vector initialization not implemented yet");
00719     }
00720 
00721     inputDimension = 0;
00722     outputDimension = (numberOutput ? 1 : classes);
00723     x = NULL;
00724 }
00725 
00726 MultiClassSVM::MultiClassSVM(KernelFunction* pKernel, Array<double> prototypes, bool bNumberOutput)
00727 {
00728     SIZE_CHECK(prototypes.ndim() == 2);
00729     SIZE_CHECK(prototypes.dim(0) == prototypes.dim(1));
00730 
00731     this->kernel = pKernel;
00732     this->classes = prototypes.dim(0);
00733     this->examples = 0;
00734     this->bOwnMemory = false;
00735     this->numberOutput = bNumberOutput;
00736     this->prototypes = prototypes;
00737 
00738     inputDimension = 0;
00739     outputDimension = (numberOutput ? 1 : classes);
00740     x = NULL;
00741 }
00742 
00743 MultiClassSVM::~MultiClassSVM()
00744 {
00745     if (bOwnMemory) delete x;
00746 }
00747 
00748 
00749 void MultiClassSVM::SetTrainingData(const Array<double>& input, bool copy)
00750 {
00751     SIZE_CHECK(input.ndim() == 2);
00752     if (bOwnMemory) delete x;
00753 
00754     examples = input.dim(0);
00755     inputDimension = input.dim(1);
00756 
00757     parameter.resize(examples * classes + classes, false);
00758     parameter = 0.0;
00759 
00760     if (copy)
00761     {
00762         x = new Array<double>(input);
00763         bOwnMemory = true;
00764     }
00765     else
00766     {
00767         x = &input;
00768         bOwnMemory = false;
00769     }
00770 }
00771 
00772 void MultiClassSVM::model(const Array<double>& input, Array<double>& output)
00773 {
00774     if (numberOutput)
00775     {
00776         Array<double> tmp(classes);
00777         if (input.ndim() == 1)
00778         {
00779             output.resize(1, false);
00780             Predict(input, tmp);
00781             output(0) = VectorToClass(tmp);
00782         }
00783         else if (input.ndim() == 2)
00784         {
00785             unsigned int i, ic = input.dim(0);
00786             output.resize(ic, 1, false);
00787             for (i=0; i<ic; i++)
00788             {
00789                 Predict(input[i], tmp);
00790                 output(i, 0) = VectorToClass(tmp);
00791             }
00792         }
00793         else throw SHARKEXCEPTION("[MultiClassSVM::model] invalid dimension");
00794     }
00795     else
00796     {
00797         if (input.ndim() == 1)
00798         {
00799             output.resize(classes, false);
00800             Predict(input, output);
00801         }
00802         else if (input.ndim() == 2)
00803         {
00804             unsigned int i, ic = input.dim(0);
00805             output.resize(ic, classes, false);
00806             for (i=0; i<ic; i++)
00807             {
00808                 Predict(input[i], output[i]);
00809             }
00810         }
00811         else throw SHARKEXCEPTION("[MultiClassSVM::model] invalid dimension");
00812     }
00813 }
00814 
00815 unsigned int MultiClassSVM::model(const Array<double>& input)
00816 {
00817     SIZE_CHECK(input.ndim() == 1);
00818     SIZE_CHECK(input.dim(0) == inputDimension);
00819 
00820     Array<double> tmp(classes);
00821     Predict(input, tmp);
00822     return VectorToClass(tmp);
00823 }
00824 
00825 void MultiClassSVM::Normalize()
00826 {
00827     unsigned int c;
00828     unsigned int i, j;
00829     for (c=0; c<classes; c++)
00830     {
00831         // compute the squared norm
00832         double norm2 = 0.0;
00833         for (i=0; i<examples; i++)
00834         {
00835             for (j=0; j<i; j++)
00836             {
00837                 double k = kernel->eval((*x)[i], (*x)[j]);
00838                 norm2 += 2.0 * parameter(classes*i+c) * parameter(classes*j+c) * k;
00839             }
00840             double k = kernel->eval((*x)[i], (*x)[i]);
00841             norm2 += parameter(classes*i+c) * parameter(classes*i+c) * k;
00842         }
00843 
00844         // normalize
00845         double norm = sqrt(norm2);
00846         for (i=0; i<examples; i++) parameter(classes*i+c) /= norm;
00847     }
00848 }
00849 
00850 unsigned int MultiClassSVM::VectorToClass(const Array<double>& v)
00851 {
00852     SIZE_CHECK(v.ndim() == 1);
00853     SIZE_CHECK(v.dim(0) == classes);
00854 
00855     unsigned int c, i, best = 0;
00856     double scp, bestscp = -1e100;
00857     for (c=0; c<classes; c++)
00858     {
00859         scp = 0.0;
00860         for (i=0; i<classes; i++)
00861         {
00862             scp += v(i) * prototypes(c, i);
00863         }
00864         if (scp > bestscp)
00865         {
00866             bestscp = scp;
00867             best = c;
00868         }
00869     }
00870 
00871     return best;
00872 }
00873 
00874 // We assume that the output array has correct size.
00875 void MultiClassSVM::Predict(const Array<double>& input, Array<double>& output)
00876 {
00877     unsigned int c, e, m;
00878     output = 0.0;
00879 
00880     for (e=0; e<examples; e++)
00881     {
00882         double k = kernel->eval((*x)[e], input);
00883         for (c=0; c<classes; c++)
00884         {
00885             for (m=0; m<classes; m++)
00886             {
00887                 output(m) += getAlpha(e, c) * k * prototypes(c, m);
00888             }
00889         }
00890     }
00891 
00892     for (c=0; c<classes; c++)
00893     {
00894         for (m=0; m<classes; m++)
00895         {
00896             output(m) += getOffset(c) * prototypes(c, m);
00897         }
00898     }
00899 }
00900 
00901 // We assume that the output array has correct size.
00902 void MultiClassSVM::Predict(const Array<double>& input, ArrayReference<double> output)
00903 {
00904     unsigned int c, e, m;
00905     output = 0.0;
00906 
00907     for (e=0; e<examples; e++)
00908     {
00909         double k = kernel->eval((*x)[e], input);
00910         for (c=0; c<classes; c++)
00911         {
00912             for (m=0; m<classes; m++)
00913             {
00914                 output(m) += getAlpha(e, c) * k * prototypes(c, m);
00915             }
00916         }
00917     }
00918 
00919     for (c=0; c<classes; c++)
00920     {
00921         for (m=0; m<classes; m++)
00922         {
00923             output(m) += getOffset(c) * prototypes(c, m);
00924         }
00925     }
00926 }
00927 
00928 
00930 
00931 
00932 MetaSVM::MetaSVM(SVM* pSVM, unsigned int numberOfHyperParameters)
00933 {
00934     svm = pSVM;
00935     kernel = pSVM->getKernel();
00936     hyperparameters = numberOfHyperParameters;
00937 
00938     unsigned int k, kc = kernel->getParameterDimension();
00939     parameter.resize(hyperparameters + kc, false);
00940     parameter = 0.0;
00941     for (k = 0; k < kc; k++) parameter(k + hyperparameters) = kernel->getParameter(k);
00942 }
00943 
00944 MetaSVM::MetaSVM(MultiClassSVM* pSVM, unsigned int numberOfHyperParameters)
00945 {
00946     svm = pSVM;
00947     kernel = pSVM->getKernel();
00948     hyperparameters = numberOfHyperParameters;
00949 
00950     unsigned int k, kc = kernel->getParameterDimension();
00951     parameter.resize(hyperparameters + kc, false);
00952     parameter = 0.0;
00953     for (k = 0; k < kc; k++) parameter(k + hyperparameters) = kernel->getParameter(k);
00954 }
00955 
00956 MetaSVM::~MetaSVM()
00957 {
00958 }
00959 
00960 
00961 void MetaSVM::model(const Array<double>& input, Array<double>& output)
00962 {
00963     svm->model(input, output);
00964 }
00965 
00966 void MetaSVM::setParameter(unsigned int index, double value)
00967 {
00968     Model::setParameter(index, value);
00969     if (index >= hyperparameters)
00970     {
00971         // set a kernel parameter
00972         kernel->setParameter(index - hyperparameters, value);
00973     }
00974 }
00975 
00976 bool MetaSVM::isFeasible()
00977 {
00978     return (kernel->isFeasible());
00979 }
00980 
00981 
00983 
00984 
00985 C_SVM::C_SVM(SVM* pSVM, double Cplus, double Cminus, bool norm2, bool unconst)
00986 : MetaSVM(pSVM, 1)
00987 {
00988     C_ratio = Cminus / Cplus;
00989     norm2penalty = norm2;
00990     exponential = unconst;
00991 
00992     setParameter(0, (exponential) ? log(Cplus) : Cplus);
00993 }
00994 
00995 C_SVM::~C_SVM()
00996 {
00997 }
00998 
00999 
01000 void C_SVM::PrepareDerivative()
01001 {
01002     if (norm2penalty) throw SHARKEXCEPTION("[C_SVM::PrepareDerivative] PrepareDerivative works only in the 1-norm case.");
01003 
01004     SVM* svm = getSVM();
01005     const Array<double>& pt = svm->getPoints();
01006     int a, ac = svm->getExamples();
01007     int s, sc = svm->getParameterDimension();
01008     int k, kc = kernel->getParameterDimension();
01009     int pc = getParameterDimension();
01010     Array<double> der;
01011 
01012     alpha_b_Derivative.resize(sc, pc, false);
01013     alpha_b_Derivative = 0.0;
01014 
01015     // determine free and bounded support vectors
01016     std::vector<int> fi;
01017     std::vector<int> ri;
01018     for (a=0; a<ac; a++)
01019     {
01020         double alpha = svm->getAlpha(a);
01021         if (alpha != 0.0)
01022         {
01023             if ((alpha == -C_minus) || (alpha == C_plus)) ri.push_back(a);
01024             else fi.push_back(a);
01025         }
01026     }
01027     int f, fc = fi.size();
01028     int r, rc = ri.size();
01029     fi.push_back(ac);
01030 
01031     Array2D<double> H(fc+1, fc+1);
01032     Array2D<double> H_inv(fc+1, fc+1);
01033     Array2D<double> R(fc+1, rc);
01034     Array<double> dH(kc, fc+1, fc+1);
01035     Array<double> dR(kc, fc+1, rc);
01036     Array<double> alpha_f(fc+1);
01037     Array<double> alpha_r(rc);
01038     for (f=0; f<fc; f++) alpha_f(f) = svm->getAlpha(fi[f]);
01039     alpha_f(fc) = svm->getOffset();
01040     for (r=0; r<rc; r++) alpha_r(r) = svm->getAlpha(ri[r]);
01041 
01042     // initialize H and dH
01043     for (f=0; f<fc; f++)
01044     {
01045         int f2;
01046         for (f2=0; f2<f; f2++)
01047         {
01048             H(f, f2) = H(f2, f) = kernel->evalDerivative(pt[fi[f]], pt[fi[f2]], der);
01049             for (k=0; k<kc; k++)
01050             {
01051                 dH(k, f, f2) = dH(k, f2, f) = der(k);
01052             }
01053         }
01054         H(f, f) = kernel->evalDerivative(pt[fi[f]], pt[fi[f]], der);
01055         for (k=0; k<kc; k++)
01056         {
01057             dH(k, f, f) = der(k);
01058         }
01059         H(fc, f) = H(f, fc) = 1.0;
01060         for (k=0; k<kc; k++) dH(k, fc, f) = dH(k, f, fc) = 0.0;
01061     }
01062     H(fc, fc) = 0.0;
01063     for (k=0; k<kc; k++) dH(k, fc, fc) = 0.0;
01064 
01065     // compute H_inv
01066 //  invertSymm(H_inv, H);
01067     g_inverse(H, H_inv, 200, 1e-10, true);
01068 
01069     // initialize R and dR
01070     for (r=0; r<rc; r++)
01071     {
01072         for (f=0; f<fc; f++)
01073         {
01074             R(f, r) = kernel->evalDerivative(pt[fi[f]], pt[ri[r]], der);
01075             for (k=0; k<kc; k++)
01076             {
01077                 dR(k, f, r) = der(k);
01078             }
01079         }
01080         R(fc, r) = 1.0;
01081         for (k=0; k<kc; k++) dR(k, fc, r) = 0.0;
01082     }
01083 
01084     der.resize(fc+1, false);
01085 
01086     // compute the derivative of (\alpha, b) w.r.t. C
01087     if (rc > 0)
01088     {
01089         Array<double> y(rc);
01090         for (r=0; r<rc; r++) y(r) = ((alpha_r(r) > 0.0) ? 1.0 : -1.0);
01091         Array<double> Ry(fc+1);
01092         matColVec(Ry, R, y);
01093         matColVec(der, H_inv, Ry);
01094         for (f=0; f<=fc; f++) alpha_b_Derivative(fi[f], 0) = -der(f);
01095         for (r=0; r<rc; r++) alpha_b_Derivative(ri[r], 0) = ((alpha_r(r) > 0.0) ? 1.0 : -C_ratio);
01096         if (exponential)
01097         {
01098             for (s=0; s<sc; s++) alpha_b_Derivative(s, 0) *= C_plus;
01099         }
01100     }
01101 
01102     // compute the derivative of (\alpha, b) w.r.t. the kernel parameters
01103     for (k=0; k<kc; k++)
01104     {
01105         Array<double> sum(fc+1);
01106         Array<double> tmp(fc+1);
01107         matColVec(sum, dH[k], alpha_f);
01108         matColVec(tmp, dR[k], alpha_r);
01109         for (f=0; f<=fc; f++) sum(f) += tmp(f);
01110         matColVec(der, H_inv, sum);
01111         for (f=0; f<=fc; f++) alpha_b_Derivative(fi[f], k+1) = -der(f);
01112     }
01113 }
01114 
01115 void C_SVM::modelDerivative(const Array<double>& input, Array<double>& derivative)
01116 {
01117     SVM* svm = getSVM();
01118     const Array<double>& pt = svm->getPoints();
01119     int k, kc = kernel->getParameterDimension();
01120     int a, ac = svm->getExamples();
01121     int pc = getParameterDimension();
01122 
01123     derivative.resize(1, pc, false);
01124     for (k=0; k<=kc; k++) derivative(0, k) = alpha_b_Derivative(ac, k);
01125 
01126     Array<double> der(pc);
01127 
01128     for (a=0; a<ac; a++)
01129     {
01130         double alpha = svm->getAlpha(a);
01131         double ker = kernel->evalDerivative(input, pt[a], der);
01132         derivative(0, 0) += ker * alpha_b_Derivative(a, 0);
01133         for (k=0; k<kc; k++)
01134         {
01135             derivative(0, k+1) += alpha * der(k) + ker * alpha_b_Derivative(a, k+1);
01136         }
01137     }
01138 }
01139 
01140 void C_SVM::setParameter(unsigned int index, double value)
01141 {
01142     MetaSVM::setParameter(index, value);
01143     if (index == 0)
01144     {
01145         // set C_+ and C_-
01146         if (exponential)
01147         {
01148             value = exp(value);
01149         }
01150         C_plus = value;
01151         C_minus = C_ratio * value;
01152     }
01153 }
01154 
01155 bool C_SVM::isFeasible()
01156 {
01157     return (C_plus > 0.0 && MetaSVM::isFeasible());
01158 }
01159 
01160 
01162 
01163 
01164 OneClassSVM::OneClassSVM(SVM* pSVM, double fractionNu)
01165 : MetaSVM(pSVM, 1)
01166 {
01167     setParameter(0, fractionNu);
01168 }
01169 
01170 OneClassSVM::~OneClassSVM()
01171 {
01172 }
01173 
01174 
01175 void OneClassSVM::setParameter(unsigned int index, double value)
01176 {
01177     MetaSVM::setParameter(index, value);
01178     if (index == 0)
01179     {
01180         // There is no way to set this value correctly if this function is calles by the standard constructor.
01181         // That is, because of the dependency on the number of training examples.
01182 
01183         // The correct initialization takes place in SVM_Optimizer::optimize(...)
01184         nu = value;
01185     }
01186 }
01187 
01188 bool OneClassSVM::isFeasible()
01189 {
01190     return (nu > 0.0 && MetaSVM::isFeasible());
01191 }
01192 
01193 
01195 
01196 
01197 Epsilon_SVM::Epsilon_SVM(SVM* pSVM, double C, double epsilon, bool unconst)
01198 : MetaSVM(pSVM, 2)
01199 {
01200     this->C = C;
01201     this->epsilon = epsilon;
01202 
01203     exponential = unconst;
01204 
01205     setParameter(0, (exponential) ? log(C) : C);
01206     setParameter(1, (exponential) ? log(epsilon) : epsilon);
01207 }
01208 
01209 Epsilon_SVM::~Epsilon_SVM()
01210 {
01211 }
01212 
01213 
01214 void Epsilon_SVM::setParameter(unsigned int index, double value)
01215 {
01216     if (index < hyperparameters)
01217     {
01218         Model::setParameter(index, value);
01219         if (index == 0)
01220         {
01221             if (exponential) C = exp(value); else C = value;
01222         }
01223         else
01224         {
01225             if (exponential) epsilon = exp(value); else epsilon = value;
01226         }
01227     }
01228     else
01229     {
01230         MetaSVM::setParameter(index, value);
01231     }
01232 }
01233 
01234 bool Epsilon_SVM::isFeasible()
01235 {
01236     return (C > 0.0 && epsilon > 0.0 && MetaSVM::isFeasible());
01237 }
01238 
01239 
01241 
01242 
01243 RegularizationNetwork::RegularizationNetwork(SVM* pSVM, double gamma)
01244 : MetaSVM(pSVM, 1)
01245 {
01246     parameter(0) = gamma;
01247 }
01248 
01249 RegularizationNetwork::~RegularizationNetwork()
01250 {
01251 }
01252 
01253 
01254 bool RegularizationNetwork::isFeasible()
01255 {
01256     return (parameter(0) > 0.0 && MetaSVM::isFeasible());
01257 }
01258 
01259 
01261 
01262 
01263 AllInOneMcSVM::AllInOneMcSVM(MultiClassSVM* pSVM, double C)
01264 : MetaSVM(pSVM, 1)
01265 {
01266     parameter(0) = C;
01267 }
01268 
01269 AllInOneMcSVM::~AllInOneMcSVM()
01270 {
01271 }
01272 
01273 
01274 bool AllInOneMcSVM::isFeasible()
01275 {
01276     return (parameter(0) > 0.0 && MetaSVM::isFeasible());
01277 }
01278 
01279 
01281 
01282 
01283 CrammerSingerMcSVM::CrammerSingerMcSVM(MultiClassSVM* pSVM, double beta)
01284 : MetaSVM(pSVM, 1)
01285 {
01286     parameter(0) = beta;
01287 }
01288 
01289 CrammerSingerMcSVM::~CrammerSingerMcSVM()
01290 {
01291 }
01292 
01293 
01294 bool CrammerSingerMcSVM::isFeasible()
01295 {
01296     return (parameter(0) > 0.0 && MetaSVM::isFeasible());
01297 }
01298 
01299 
01301 
01302 
01303 OVAMcSVM::OVAMcSVM(MultiClassSVM* pSVM, double C)
01304 : MetaSVM(pSVM, 1)
01305 {
01306     parameter(0) = C;
01307 }
01308 
01309 OVAMcSVM::~OVAMcSVM()
01310 {
01311 }
01312 
01313 
01314 bool OVAMcSVM::isFeasible()
01315 {
01316     return (parameter(0) > 0.0 && MetaSVM::isFeasible());
01317 }
01318 
01319 
01321 
01322 
01323 OCCMcSVM::OCCMcSVM(MultiClassSVM* pSVM, double C)
01324 : MetaSVM(pSVM, 1)
01325 {
01326     parameter(0) = C;
01327 }
01328 
01329 OCCMcSVM::~OCCMcSVM()
01330 {
01331 }
01332 
01333 
01334 bool OCCMcSVM::isFeasible()
01335 {
01336     return (parameter(0) > 0.0 && MetaSVM::isFeasible());
01337 }
01338 
01339 
01341 
01342 
01343 SVM_Optimizer::SVM_Optimizer()
01344 {
01345     solver = NULL;
01346     matrix = NULL;
01347     cache = NULL;
01348 
01349     printInfo = false;
01350     accuracy = 0.001;
01351     cacheMB = 100;
01352     maxIter = -1;
01353     maxSeconds = -1;
01354 }
01355 
01356 SVM_Optimizer::~SVM_Optimizer()
01357 {
01358     if (solver != NULL)
01359     {
01360         delete solver;
01361         solver = NULL;
01362     }
01363     if (matrix != NULL)
01364     {
01365         delete matrix;
01366         matrix = NULL;
01367     }
01368     if (cache != NULL)
01369     {
01370         delete cache;
01371         cache = NULL;
01372     }
01373 }
01374 
01375 
01376 void SVM_Optimizer::init(Model& model)
01377 {
01378     C_SVM* csvm = dynamic_cast<C_SVM*>(&model);
01379     Epsilon_SVM* esvm = dynamic_cast<Epsilon_SVM*>(&model);
01380     OneClassSVM* OCsvm = dynamic_cast<OneClassSVM*>(&model);
01381     RegularizationNetwork* rn = dynamic_cast<RegularizationNetwork*>(&model);
01382     AllInOneMcSVM* aio = dynamic_cast<AllInOneMcSVM*>(&model);
01383     CrammerSingerMcSVM* cs = dynamic_cast<CrammerSingerMcSVM*>(&model);
01384     OVAMcSVM* ova = dynamic_cast<OVAMcSVM*>(&model);
01385     OCCMcSVM* bc = dynamic_cast<OCCMcSVM*>(&model);
01386 
01387     if (csvm != NULL)
01388     {
01389         if (csvm->is2norm()) mode = eC2;
01390         else mode = eC1;
01391         Cplus = csvm->get_Cplus();
01392         Cminus = csvm->get_Cminus();
01393     }
01394     else if (esvm != NULL)
01395     {
01396         mode = eEpsilon;
01397         C = esvm->get_C();
01398         epsilon = esvm->get_epsilon();
01399     }
01400     else if (OCsvm != NULL)
01401     {
01402         mode = e1Class;
01403         fractionOfOutliers = OCsvm->getNu();
01404     }
01405     else if (rn != NULL)
01406     {
01407         mode = eRegularizationNetwork;
01408         gamma = rn->get_gamma();
01409     }
01410     else if (aio != NULL)
01411     {
01412         mode = eAllInOne;
01413         C = aio->get_C();
01414     }
01415     else if (cs != NULL)
01416     {
01417         mode = eCrammerSinger;
01418         beta = cs->get_beta();
01419     }
01420     else if (ova != NULL)
01421     {
01422         mode = eOVA;
01423         C = ova->get_C();
01424     }
01425     else if (bc != NULL)
01426     {
01427         mode = eOCC;
01428         C = bc->get_C();
01429     }
01430     else throw SHARKEXCEPTION("[SVM_Optimizer::init] The model is not a valid support vector machine meta model.");
01431 
01432     if (solver != NULL)
01433     {
01434         delete solver;
01435         solver = NULL;
01436     }
01437     if (matrix != NULL)
01438     {
01439         delete matrix;
01440         matrix = NULL;
01441     }
01442     if (cache != NULL)
01443     {
01444         delete cache;
01445         cache = NULL;
01446     }
01447 }
01448 
01449 double SVM_Optimizer::optimize(Model& model, ErrorFunction& error, const Array<double>& input, const Array<double>& target)
01450 {
01451     switch (mode)
01452     {
01453         case eC1:
01454         case eC2:
01455         case eEpsilon:
01456         case eNu:
01457         case eRegularizationNetwork:
01458         case e1Class:
01459         {
01460             SVM* svm = dynamic_cast<SVM*>(&model);
01461             if (svm == NULL)
01462             {
01463                 MetaSVM* meta = dynamic_cast<MetaSVM*>(&model);;
01464                 if (meta != NULL) svm = meta->getSVM();
01465             }
01466             if (svm == NULL) throw SHARKEXCEPTION("[SVM_Optimizer::optimize] The model is not a valid SVM or SVM meta model.");
01467 
01468             return optimize(*svm, input, target);
01469         }
01470 
01471         case eAllInOne:
01472         case eCrammerSinger:
01473         case eOVA:
01474         case eOCC:
01475         {
01476             MultiClassSVM* svm = dynamic_cast<MultiClassSVM*>(&model);
01477             if (svm == NULL)
01478             {
01479                 MetaSVM* meta = dynamic_cast<MetaSVM*>(&model);;
01480                 if (meta != NULL) svm = meta->getMultiClassSVM();
01481             }
01482             if (svm == NULL) throw SHARKEXCEPTION("[SVM_Optimizer::optimize] The model is not a valid MultiClassSVM or SVM meta model.");
01483 
01484             optimize(*svm, input, target);
01485             return 0.0;
01486         }
01487 
01488         default:
01489         {
01490             throw SHARKEXCEPTION("[SVM_Optimizer::optimize] call init(...) before optimize(...)");
01491             return 0.0;
01492         }
01493     }
01494 }
01495 
01496 double SVM_Optimizer::optimize(SVM& model, const Array<double>& input, const Array<double>& target, bool copy)
01497 {
01498     if (solver != NULL)
01499     {
01500         delete solver;
01501         solver = NULL;
01502     }
01503     if (matrix != NULL)
01504     {
01505         delete matrix;
01506         matrix = NULL;
01507     }
01508     if (cache != NULL)
01509     {
01510         delete cache;
01511         cache = NULL;
01512     }
01513 
01514     unsigned int e, examples = input.dim(0);
01515 
01516     model.SetTrainingData(input, copy);
01517     KernelFunction* kernel = model.getKernel();
01518 
01519     Array<double> alpha(examples);
01520     double b = 0.0;
01521     double ret = 0.0;
01522 
01523     if (mode == eC1)
01524     {
01525         // C-SVM with 1-norm penalty
01526         Array<double> linear(examples);
01527         Array<double> lower(examples);
01528         Array<double> upper(examples);
01529         for (e = 0; e < examples; e++)
01530         {
01531             alpha(e) = model.getParameter(e);
01532             linear(e) = target(e, 0);
01533             if (target(e, 0) > 0.0)
01534             {
01535                 lower(e) = 0.0;
01536                 upper(e) = Cplus;
01537             }
01538             else
01539             {
01540                 lower(e) = -Cminus;
01541                 upper(e) = 0.0;
01542             }
01543         }
01544 
01545         matrix = new KernelMatrix(kernel, input);
01546         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
01547 
01548         QpSvmDecomp* solver = new QpSvmDecomp(*cache);
01549         this->solver = solver;
01550         solver->setVerbose(printInfo);
01551         solver->setMaxIterations(maxIter);
01552         solver->setMaxSeconds(maxSeconds);
01553 
01554         ret = solver->Solve(linear, lower, upper, alpha, accuracy);
01555 
01556         // computation of b
01557         Array<double> gradient;
01558         solver->getGradient(gradient);
01559         double lowerBound = -1e100;
01560         double upperBound = 1e100;
01561         double sum = 0.0;
01562         unsigned int freeVars = 0;
01563         double value;
01564         for (e = 0; e < examples; e++)
01565         {
01566             value = gradient(e);
01567             if (alpha(e) == lower(e))
01568             {
01569                 if (value > lowerBound) lowerBound = value;
01570             }
01571             else if (alpha(e) == upper(e))
01572             {
01573                 if (value < upperBound) upperBound = value;
01574             }
01575             else
01576             {
01577                 sum += value;
01578                 freeVars++;
01579             }
01580         }
01581 
01582         if (freeVars > 0)
01583             b = sum / freeVars;                     // stabilized exact value
01584         else
01585             b = 0.5 * (lowerBound + upperBound);    // best estimate
01586     }
01587     else if (mode == eC2)
01588     {
01589         // C-SVM with 2-norm penalty
01590         Array<double> linear(examples);
01591         Array<double> diag(examples);
01592         Array<double> lower(examples);
01593         Array<double> upper(examples);
01594         for (e = 0; e < examples; e++)
01595         {
01596             alpha(e) = model.getParameter(e);
01597             linear(e) = target(e, 0);
01598             if (target(e, 0) > 0.0)
01599             {
01600                 diag(e) = 1.0 / Cplus;
01601                 lower(e) = 0.0;
01602                 upper(e) = 1e100;
01603             }
01604             else
01605             {
01606                 diag(e) = 1.0 / Cminus;
01607                 lower(e) = -1e100;
01608                 upper(e) = 0.0;
01609             }
01610         }
01611 
01612         matrix = new RegularizedKernelMatrix(kernel, input, diag);
01613         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
01614 
01615         QpSvmDecomp* solver = new QpSvmDecomp(*cache);
01616         this->solver = solver;
01617         solver->setVerbose(printInfo);
01618         solver->setMaxIterations(maxIter);
01619         solver->setMaxSeconds(maxSeconds);
01620 
01621         ret = solver->Solve(linear, lower, upper, alpha, accuracy);
01622 
01623         // computation of b
01624         Array<double> gradient;
01625         solver->getGradient(gradient);
01626         double lowerBound = -1e100;
01627         double upperBound = 1e100;
01628         double sum = 0.0;
01629         unsigned int freeVars = 0;
01630         double value;
01631         for (e = 0; e < examples; e++)
01632         {
01633             value = gradient(e);
01634             if (alpha(e) == lower(e))
01635             {
01636                 if (value > lowerBound) lowerBound = value;
01637             }
01638             else if (alpha(e) == upper(e))
01639             {
01640                 if (value < upperBound) upperBound = value;
01641             }
01642             else
01643             {
01644                 sum += value;
01645                 freeVars++;
01646             }
01647         }
01648 
01649         if (freeVars > 0)
01650             b = sum / freeVars;                     // stabilized exact value
01651         else
01652             b = 0.5 * (lowerBound + upperBound);    // best estimate
01653     }
01654     else if (mode == eEpsilon)
01655     {
01656         // epsilon-SVM
01657         Array<double> solution(2*examples);
01658         Array<double> linear(2*examples);
01659         Array<double> lower(2*examples);
01660         Array<double> upper(2*examples);
01661         for (e = 0; e < examples; e++)
01662         {
01663             double p = model.getParameter(e);
01664             if (p >= 0.0)
01665             {
01666                 solution(e) = p;
01667                 solution(e + examples) = 0.0;
01668             }
01669             else
01670             {
01671                 solution(e) = 0.0;
01672                 solution(e + examples) = -p;
01673             }
01674             linear(e) = target(e, 0) - epsilon;
01675             linear(e + examples) = target(e, 0) + epsilon;
01676             lower(e) = 0.0;
01677             lower(e + examples) = -C;
01678             upper(e) = C;
01679             upper(e + examples) = 0.0;
01680         }
01681 
01682         KernelMatrix* kernelmatrix = new KernelMatrix(kernel, input);
01683         matrix = new QPMatrix2(kernelmatrix);
01684         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
01685 
01686         QpSvmDecomp* solver = new QpSvmDecomp(*cache);
01687         this->solver = solver;
01688         solver->setVerbose(printInfo);
01689         solver->setMaxIterations(maxIter);
01690         solver->setMaxSeconds(maxSeconds);
01691 
01692         ret = solver->Solve(linear, lower, upper, solution);
01693         for (e = 0; e < examples; e++)
01694         {
01695             alpha(e) = solution(e) + solution(e + examples);
01696         }
01697 
01698         // computation of b
01699         Array<double> gradient;
01700         solver->getGradient(gradient);
01701         double lowerBound = -1e100;
01702         double upperBound = 1e100;
01703         double sum = 0.0;
01704         unsigned int freeVars = 0;
01705         double value;
01706         for (e = 0; e < examples; e++)
01707         {
01708             if (solution(e) > 0.0)
01709             {
01710                 value = gradient(e);
01711                 if (solution(e) < C)
01712                 {
01713                     sum += value;
01714                     freeVars++;
01715                 }
01716                 else
01717                 {
01718                     if (value > lowerBound) lowerBound = value;
01719                 }
01720             }
01721             if (solution(e + examples) < 0.0)
01722             {
01723                 value = gradient(e + examples);
01724                 if (solution(e + examples) > -C)
01725                 {
01726                     sum += value;
01727                     freeVars++;
01728                 }
01729                 else
01730                 {
01731                     if (value < upperBound) upperBound = value;
01732                 }
01733             }
01734         }
01735 
01736         if (freeVars > 0)
01737             b = sum / freeVars;                     // stabilized exact value
01738         else
01739             b = 0.5 * (lowerBound + upperBound);    // best estimate
01740     }
01741     else if (mode == e1Class)
01742     {
01743         Array<double> linear(examples);
01744         Array<double> lower(examples);
01745         Array<double> upper(examples);
01746 
01747         unsigned int NrInitialSVs = (unsigned int)(examples * fractionOfOutliers);
01748         double sumAlpha = 0;
01749         double upperBox;
01750 
01751         // NOT NORMALIZED
01752 //      double upperBox = 1 / (examples * fractionOfOutliers);
01753 
01754         // NORMALIZED VERSION, that is \sum_{i}^\ell \alpha_i = NrInitialSVs
01755         upperBox  = 1.0;
01756 
01757         // initialize all patterns 
01758         for (e = 0; e < examples; e++)
01759         {
01760             // linear part of Quadratic Program = 0
01761             linear(e) = 0.0;    
01762 
01763             // both Box constraints (aka. Cplus and Cminus)
01764             lower(e)  = 0.0;
01765             upper(e)  = upperBox;
01766 
01767             // coefficients for optimization
01768             alpha(e)  = upperBox; 
01769         }
01770 
01771         // INITIALIZATION: Check if too much SVs are at bounds
01772         if (NrInitialSVs != examples)
01773         {
01774             int index;
01775             int diff = examples - NrInitialSVs;
01776 
01777             for (int s = 0; s < diff; s++)
01778             {
01779                 // choose randomly a example
01780                 index = Rng::discrete(0, examples-1);
01781 
01782                 while(alpha(index) == 0)
01783                 {
01784                     // try again if the example has already been used
01785                     index = Rng::discrete(0, examples-1);
01786                 }
01787                 // and free the SV
01788                 alpha(index) = 0;
01789             }
01790 
01791             // Check if sumAlpha < e*nu
01792             sumAlpha = 0.0;
01793             for (unsigned int e = 0; e < examples; e++)
01794                 sumAlpha += alpha(e);
01795 
01796             if (sumAlpha < (examples * fractionOfOutliers))
01797             {
01798                 // if yes, find a free example
01799                 index = Rng::discrete(0, examples-1);
01800                 while(alpha(index) != 0)
01801                 {
01802                     // try again if the example already has been used
01803                     index = Rng::discrete(0, examples-1);
01804                 }
01805                 // and now we get sumAlpha = 1
01806                 alpha(index) = (examples * fractionOfOutliers) - sumAlpha;
01807             }
01808         }
01809 
01810         matrix = new KernelMatrix(kernel, input);
01811         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
01812 
01813         QpSvmDecomp* solver = new QpSvmDecomp(*cache);
01814         this->solver = solver;
01815         solver->setVerbose(printInfo);
01816         solver->setMaxIterations(maxIter);
01817         solver->setMaxSeconds(maxSeconds);
01818 
01819         // solve the QP problem
01820         ret = solver->Solve(linear, lower, upper, alpha, accuracy);
01821 
01822         // computation of b
01823         Array<double> gradient;
01824         solver->getGradient(gradient);
01825         double lowerBound = -1e100;
01826         double upperBound = 1e100;
01827         double sum = 0.0;
01828         unsigned int freeVars = 0;
01829         double value;
01830 
01831         for (e = 0; e < examples; e++)
01832         {
01833             value = gradient(e);
01834             if (alpha(e) == lower(e))   // no SVs
01835             {
01836                 if (value > lowerBound) lowerBound = value;
01837             }
01838             else if (alpha(e) == upper(e))  // bounded SVs
01839             {
01840                 if (value < upperBound) upperBound = value;
01841             }
01842             else    // free SVs
01843             {
01844                 sum += value;
01845                 freeVars++;
01846             }
01847         }
01848 
01849         if (freeVars > 0)
01850             b = sum / freeVars; // stabilized exact value
01851         else
01852             b = 0.5 * (lowerBound + upperBound);    // best estimate
01853     }
01854     else if (mode == eRegularizationNetwork)
01855     {
01856         unsigned int f;
01857         Array2D<double> K(examples, examples);
01858         Array2D<double> invK(examples, examples);
01859         for (e = 0; e < examples; e++)
01860         {
01861             for (f = 0; f < e; f++)
01862             {
01863                 double k = kernel->eval(input[e], input[f]);
01864                 K(e, f) = K(f, e) = k;
01865             }
01866             double k = kernel->eval(input[e], input[e]);
01867             K(e, e) = k + gamma;
01868         }
01869         invertSymm(invK, K);
01870         matColVec(alpha, invK, target, 0);
01871 
01872         b = 0.0;
01873         ret = 0.0;
01874     }
01875 
01876     for (e = 0; e < examples; e++) model.setParameter(e, alpha(e));
01877     model.setParameter(examples, b);
01878 
01879     model.MakeSparse();
01880 
01881     return ret;
01882 }
01883 
01884 void SVM_Optimizer::optimize(MultiClassSVM& model, const Array<double>& input, const Array<double>& target, bool copy)
01885 {
01886     if (solver != NULL)
01887     {
01888         delete solver;
01889         solver = NULL;
01890     }
01891     if (matrix != NULL)
01892     {
01893         delete matrix;
01894         matrix = NULL;
01895     }
01896     if (cache != NULL)
01897     {
01898         delete cache;
01899         cache = NULL;
01900     }
01901 
01902     unsigned int i;
01903     unsigned int examples = input.dim(0);
01904     unsigned int classes = model.getClasses();
01905     unsigned int variables = examples * classes;
01906 
01907     model.SetTrainingData(input, copy);
01908     KernelFunction* kernel = model.getKernel();
01909 
01910     if (mode == eAllInOne)
01911     {
01912         Array<double> alpha(variables);
01913         for (i=0; i < variables; i++) alpha(i) = model.getParameter(i);
01914 
01915         matrix = new KernelMatrix(kernel, input);
01916         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
01917         Array<double> prototypes(Matrix::unitmatrix(classes));
01918 
01919         QpBoxAllInOneDecomp* solver = new QpBoxAllInOneDecomp(*cache);
01920         this->solver = solver;
01921         solver->setMaxIterations(maxIter);
01922         solver->Solve(classes, target, prototypes, C, alpha, accuracy);
01923 
01924         i = 0;
01925         unsigned int e, c, p, m;
01926         for (e=0; e<examples; e++)
01927         {
01928             double sum = 0.0;
01929             p = (unsigned int)target(e, 0);
01930             for (c=0; c<classes; c++)
01931             {
01932                 if (c == p)
01933                 {
01934                     i++;
01935                     continue;
01936                 }
01937                 double len2 = 0.0;
01938                 for (m=0; m<classes; m++)
01939                 {
01940                     double diff = prototypes(p, m) - prototypes(c, m);
01941                     len2 += diff * diff;
01942                 }
01943                 double value = 0.5 * alpha(i) / sqrt(len2);
01944                 model.setParameter(i, -value);
01945                 sum += value;
01946                 i++;
01947             }
01948 
01949             model.setParameter(e * classes + p, sum);
01950         }
01951     }
01952     else if (mode == eCrammerSinger)
01953     {
01954         Array<double> alpha(variables);
01955         for (i=0; i < variables; i++) alpha(i) = model.getParameter(i);
01956 
01957         matrix = new KernelMatrix(kernel, input);
01958         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
01959 
01960         QpCrammerSingerDecomp* solver = new QpCrammerSingerDecomp(*cache, target, classes);
01961         solver->setMaxIterations(maxIter);
01962         this->solver = solver;
01963 
01964         // The (beta/2 * accuracy) rule gives the standard accuracy in the binary case.
01965         solver->Solve(alpha, beta, beta / 2.0 * accuracy);
01966 
01967         for (i=0; i < variables; i++) model.setParameter(i, alpha(i));
01968     }
01969     else if (mode == eOVA)
01970     {
01971         Array<double> alpha(examples);
01972         Array<double> linear(examples);
01973         Array<double> lower(examples);
01974         Array<double> upper(examples);
01975 
01976         // train a set of binary classifiers
01977         unsigned int c, e;
01978         matrix = new KernelMatrix(kernel, input);
01979         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
01980 
01981         QpBoxDecomp* solver = new QpBoxDecomp(*cache);
01982         solver->setMaxIterations(maxIter);
01983         this->solver = solver;
01984 
01985         for (c=0; c<classes; c++)
01986         {
01987             for (e=0; e<examples; e++)
01988             {
01989                 alpha(e) = model.getParameter(e * classes + c);
01990                 if (target(e, 0) == c)
01991                 {
01992                     linear(e) = 1.0;
01993                     lower(e) = 0.0;
01994                     upper(e) = C;
01995                 }
01996                 else
01997                 {
01998                     linear(e) = -1.0;
01999                     lower(e) = -C;
02000                     upper(e) = 0.0;
02001                 }
02002             }
02003             solver->Solve(linear, lower, upper, alpha, accuracy);
02004 
02005             for (e=0; e<examples; e++)
02006             {
02007                 model.setParameter(e * classes + c, alpha(e));
02008             }
02009         }
02010     }
02011     else if (mode == eOCC)
02012     {
02013         Array<double> alpha(examples);
02014         Array<double> linear(examples);
02015         Array<double> lower(examples);
02016         Array<double> upper(examples);
02017 
02018         unsigned int c, e;
02019         Array<double> prototypes(Matrix::unitmatrix(classes));
02020         matrix = new InputLabelMatrix(kernel, input, target, prototypes);
02021         cache = new CachedMatrix(matrix, 1048576 * cacheMB / sizeof(float));
02022 
02023         QpBoxDecomp* solver = new QpBoxDecomp(*cache);
02024         solver->setMaxIterations(maxIter);
02025         this->solver = solver;
02026 
02027         for (e=0; e<examples; e++)
02028         {
02029             c = (unsigned int)target(e, 0);
02030             alpha(e) = model.getParameter(e * classes + c);
02031             linear(e) = 1.0;
02032             lower(e) = 0.0;
02033             upper(e) = C;
02034         }
02035         solver->Solve(linear, lower, upper, alpha, accuracy);
02036 
02037         for (e=0; e<examples; e++)
02038         {
02039             unsigned int label = (unsigned int)target(e, 0);
02040             for (c=0; c<classes; c++)
02041             {
02042                 double value = (label == c) ? alpha(e) : 0.0;
02043                 model.setParameter(e * classes + c, value);
02044             }
02045         }
02046     }
02047 
02048     // set the bias term to zero
02049     for (i=0; i < classes; i++) model.setParameter(variables + i, 0.0);
02050 
02051     // TODO:
02052 //  model.MakeSparse();
02053 }
02054 
02055 // static dummy member
02056 ErrorFunction& SVM_Optimizer::dummyError = * ((ErrorFunction*) NULL);
02057 
  • Shark Main Page
  • Array
  • Rng
  • LinAlg
  • FileUtil
  • EALib
  • MOO-EALib
  • ReClaM
  • Fuzzy
  • Mixture
  • Tutorials
  • FAQ