50 GMM::GMM(std::string filename)
55 GMM::GMM(
int M_in,
int d_in)
63 for (
int i = 0;i < M;i++) {
69 void GMM::init_from_vq(
const vec &codebook,
int dim)
77 M = codebook.length() / dim;
80 w =
ones(M) / double(M);
83 for (i = 0;i < M;i++) {
84 v = codebook.mid(i * d, d);
88 sigma.set_length(M*d);
89 for (i = 0;i < M;i++) {
90 sigma.replace_mid(i*d,
diag(C));
96 void GMM::init(
const vec &w_in,
const mat &m_in,
const mat &sigma_in)
103 sigma.set_length(M*d);
104 for (i = 0;i < M;i++) {
105 for (j = 0;j < d;j++) {
106 m(i*d + j) = m_in(j, i);
107 sigma(i*d + j) = sigma_in(j, i);
115 void GMM::set_mean(
const mat &m_in)
123 for (i = 0;i < M;i++) {
124 for (j = 0;j < d;j++) {
125 m(i*d + j) = m_in(j, i);
131 void GMM::set_mean(
int i,
const vec &means,
bool compflag)
133 m.replace_mid(i*
length(means), means);
134 if (compflag) compute_internals();
137 void GMM::set_covariance(
const mat &sigma_in)
144 sigma.set_length(M*d);
145 for (i = 0;i < M;i++) {
146 for (j = 0;j < d;j++) {
147 sigma(i*d + j) = sigma_in(j, i);
153 void GMM::set_covariance(
int i,
const vec &covariances,
bool compflag)
155 sigma.replace_mid(i*
length(covariances), covariances);
156 if (compflag) compute_internals();
159 void GMM::marginalize(
int d_new)
161 it_error_if(d_new > d,
"GMM.marginalize: cannot change to a larger dimension");
163 vec mnew(d_new*M), sigmanew(d_new*M);
166 for (i = 0;i < M;i++) {
167 for (j = 0;j < d_new;j++) {
168 mnew(i*d_new + j) = m(i * d + j);
169 sigmanew(i*d_new + j) = sigma(i * d + j);
179 void GMM::join(
const GMM &newgmm)
184 sigma = newgmm.sigma;
189 it_error_if(d != newgmm.d,
"GMM.join: cannot join GMMs of different dimension");
191 w =
concat(
double(M) / (M + newgmm.M) * w,
double(newgmm.M) / (M + newgmm.M) * newgmm.w);
194 sigma =
concat(sigma, newgmm.sigma);
210 void GMM::save(std::string filename)
212 std::ofstream f(filename.c_str());
215 f << M <<
" " << d << std::endl ;
216 for (i = 0;i < w.length();i++) {
217 f << w(i) << std::endl ;
219 for (i = 0;i < M;i++) {
221 for (j = 1;j < d;j++) {
222 f <<
" " << m(i*d + j) ;
226 for (i = 0;i < M;i++) {
228 for (j = 1;j < d;j++) {
229 f <<
" " << sigma(i*d + j) ;
235 void GMM::load(std::string filename)
237 std::ifstream GMMFile(filename.c_str());
240 it_error_if(!GMMFile, std::string(
"GMM::load : cannot open file ") + filename);
246 for (i = 0;i < M;i++) {
250 for (i = 0;i < M;i++) {
251 for (j = 0;j < d;j++) {
252 GMMFile >> m(i*d + j) ;
255 sigma.set_length(M*d);
256 for (i = 0;i < M;i++) {
257 for (j = 0;j < d;j++) {
258 GMMFile >> sigma(i*d + j) ;
262 std::cout <<
" mixtures:" << M <<
" dim:" << d << std::endl ;
265 double GMM::likelihood(
const vec &x)
270 for (i = 0;i < M;i++) {
271 fx += w(i) * likelihood_aposteriori(x, i);
276 vec GMM::likelihood_aposteriori(
const vec &x)
281 for (i = 0;i < M;i++) {
282 v(i) = w(i) * likelihood_aposteriori(x, i);
287 double GMM::likelihood_aposteriori(
const vec &x,
int mixture)
292 it_error_if(d != x.length(),
"GMM::likelihood_aposteriori : dimensions does not match");
294 for (j = 0;j < d;j++) {
295 s += normexp(mixture * d + j) *
sqr(x(j) - m(mixture * d + j));
297 return normweight(mixture)*
std::exp(s);;
300 void GMM::compute_internals()
304 double constant = 1.0 /
std::pow(2 *
pi, d / 2.0);
306 normweight.set_length(M);
307 normexp.set_length(M*d);
309 for (i = 0;i < M;i++) {
311 for (j = 0;j < d;j++) {
312 normexp(i*d + j) = -0.5 / sigma(i * d + j);
313 s *= sigma(i * d + j);
320 vec GMM::draw_sample()
322 static bool first =
true;
323 static vec cumweight;
331 cumweight(
length(cumweight) - 1) = 1;
334 while (u > cumweight(k)) k++;
339 GMM gmmtrain(Array<vec> &TrainingData,
int M,
int NOITER,
bool VERBOSE)
342 int i, j, d = TrainingData(0).length();
350 double LL = 0, LLold, fx;
351 double constant = 1.0 /
std::pow(2 *
pi, d / 2.0);
352 int T = TrainingData.length();
358 vec p_aposteriori(M);
366 mean =
vqtrain(TrainingData, M, 200000, 0.5, VERBOSE);
367 for (i = 0;i < M;i++) gmm.set_mean(i, mean.get_col(i),
false);
370 for (i = 0;i < TrainingData.length();i++) sig +=
sqr(TrainingData(i));
371 sig /= TrainingData.length();
372 for (i = 0;i < M;i++) gmm.set_covariance(i, 0.5*sig,
false);
374 gmm.set_weight(1.0 / M*
ones(M));
379 for (i = 0;i < M;i++) {
380 temp1 = gmm.get_mean(i);
381 temp2 = gmm.get_covariance(i);
382 for (j = 0;j < d;j++) {
383 m(i*d + j) = temp1(j);
384 sigma(i*d + j) = temp2(j);
386 w(i) = gmm.get_weight(i);
388 for (n = 0;n < NOITER;n++) {
389 for (i = 0;i < M;i++) {
391 for (j = 0;j < d;j++) {
392 normexp(i*d + j) = -0.5 / sigma(i * d + j);
393 s *= sigma(i * d + j);
395 normweight(i) = constant * w(i) /
std::sqrt(s);
402 for (t = 0;t < T;t++) {
403 x1 = TrainingData(t);
406 for (i = 0;i < M;i++) {
408 for (j = 0;j < d;j++) {
409 s += normexp(i * d + j) *
sqr(x1(j) - m(i * d + j));
411 p_aposteriori(i) = normweight(i) *
std::exp(s);
412 fx += p_aposteriori(i);
417 for (i = 0;i < M;i++) {
418 wsum(i) += p_aposteriori(i);
419 for (j = 0;j < d;j++) {
420 msum(i*d + j) += p_aposteriori(i) * x1(j);
421 sigmasum(i*d + j) += p_aposteriori(i) * x2(j);
425 for (i = 0;i < M;i++) {
426 for (j = 0;j < d;j++) {
427 m(i*d + j) = msum(i * d + j) / wsum(i);
428 sigma(i*d + j) = sigmasum(i * d + j) / wsum(i) -
sqr(m(i * d + j));
434 if (
std::abs((LL - LLold) / LL) < 1e-6)
break;
436 std::cout << n <<
": " << LL <<
" " <<
std::abs((LL - LLold) / LL) <<
" " <<
toc() << std::endl ;
437 std::cout <<
"---------------------------------------" << std::endl ;
441 std::cout << n <<
": LL = " << LL <<
" " <<
std::abs((LL - LLold) / LL) <<
"\r" ;
445 for (i = 0;i < M;i++) {
446 gmm.set_mean(i, m.mid(i*d, d),
false);
447 gmm.set_covariance(i, sigma.mid(i*d, d),
false);