IT++ Logo
Using Mixture of Gaussians (MOG) module to model data

This example demonstrates how to find the parameters of a MOG model via using the kmeans and EM based optimisers. Synthetic data is utilised.

#include <itpp/itstat.h>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <ios>
using std::cout;
using std::endl;
using std::fixed;
using std::setprecision;
using namespace itpp;
int main()
{
bool print_progress = false;
//
// first, let's generate some synthetic data
int N = 100000; // number of vectors
int D = 3; // number of dimensions
int K = 5; // number of Gaussians
Array<vec> X(N);
for (int n = 0;n < N;n++) { X(n).set_size(D); X(n) = 0.0; }
// the means
Array<vec> mu(K);
mu(0) = "-6, -4, -2";
mu(1) = "-4, -2, 0";
mu(2) = "-2, 0, 2";
mu(3) = " 0, +2, +4";
mu(4) = "+2, +4, +6";
// the diagonal variances
Array<vec> var(K);
var(0) = "0.1, 0.2, 0.3";
var(1) = "0.2, 0.3, 0.1";
var(2) = "0.3, 0.1, 0.2";
var(3) = "0.1, 0.2, 0.3";
var(4) = "0.2, 0.3, 0.1";
cout << fixed << setprecision(3);
cout << "user configured means and variances:" << endl;
cout << "mu = " << mu << endl;
cout << "var = " << var << endl;
// randomise the order of Gaussians "generating" the vectors
I_Uniform_RNG rnd_uniform(0, K - 1);
ivec gaus_id = rnd_uniform(N);
ivec gaus_count(K);
gaus_count = 0;
Array<vec> mu_test(K);
for (int k = 0;k < K;k++) { mu_test(k).set_size(D); mu_test(k) = 0.0; }
Array<vec> var_test(K);
for (int k = 0;k < K;k++) { var_test(k).set_size(D); var_test(k) = 0.0; }
Normal_RNG rnd_normal;
for (int n = 0;n < N;n++) {
int k = gaus_id(n);
gaus_count(k)++;
for (int d = 0;d < D;d++) {
rnd_normal.setup(mu(k)(d), var(k)(d));
double tmp = rnd_normal();
X(n)(d) = tmp;
mu_test(k)(d) += tmp;
}
}
//
// find the stats for the generated data
for (int k = 0;k < K;k++) mu_test(k) /= gaus_count(k);
for (int n = 0;n < N;n++) {
int k = gaus_id(n);
for (int d = 0;d < D;d++) {
double tmp = X(n)(d) - mu_test(k)(d);
var_test(k)(d) += tmp * tmp;
}
}
for (int k = 0;k < K;k++) var_test(k) /= (gaus_count(k) - 1.0);
cout << endl << endl;
cout << fixed << setprecision(3);
cout << "stats for X:" << endl;
for (int k = 0;k < K;k++) {
cout << "k = " << k << " count = " << gaus_count(k) << " weight = " << gaus_count(k) / double(N) << endl;
for (int d = 0;d < D;d++) cout << " d = " << d << " mu_test = " << mu_test(k)(d) << " var_test = " << var_test(k)(d) << endl;
cout << endl;
}
// make a model with initial values (zero mean and unit variance)
// the number of gaussians and dimensions of the model is specified here
MOG_diag mog(K, D);
cout << endl;
cout << fixed << setprecision(3);
cout << "mog.avg_log_lhood(X) = " << mog.avg_log_lhood(X) << endl;
//
// find initial parameters via k-means (which are then used as seeds for EM based optimisation)
cout << endl << endl;
cout << "running kmeans optimiser" << endl << endl;
MOG_diag_kmeans(mog, X, 10, 0.5, true, print_progress);
cout << fixed << setprecision(3);
cout << "mog.get_means() = " << endl << mog.get_means() << endl;
cout << "mog.get_diag_covs() = " << endl << mog.get_diag_covs() << endl;
cout << "mog.get_weights() = " << endl << mog.get_weights() << endl;
cout << endl;
cout << "mog.avg_log_lhood(X) = " << mog.avg_log_lhood(X) << endl;
//
// EM ML based optimisation
cout << endl << endl;
cout << "running ML optimiser" << endl << endl;
MOG_diag_ML(mog, X, 10, 0.0, 0.0, print_progress);
cout << fixed << setprecision(3);
cout << "mog.get_means() = " << endl << mog.get_means() << endl;
cout << "mog.get_diag_covs() = " << endl << mog.get_diag_covs() << endl;
cout << "mog.get_weights() = " << endl << mog.get_weights() << endl;
cout << endl;
cout << "mog.avg_log_lhood(X) = " << mog.avg_log_lhood(X) << endl;
return 0;
}
SourceForge Logo

Generated on Sat May 25 2013 16:32:26 for IT++ by Doxygen 1.8.2