IT++ Logo
mog_diag_kmeans.cpp
Go to the documentation of this file.
1 
31 #include <iostream>
32 
33 namespace itpp
34 {
35 
36 inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const
37 {
38  double acc = 0.0;
39  for (int d = 0;d < D;d++) { double tmp = x[d] - y[d]; acc += tmp * tmp; }
40  return(acc);
41 }
42 
44 {
45 
46  for (int k = 0;k < K;k++) c_count[k] = 0;
47 
48  for (int n = 0;n < N;n++) {
49 
50  int k = 0;
51  double min_dist = dist(c_means[k], c_X[n]);
52  int k_winner = k;
53 
54  for (int k = 1;k < K;k++) {
55  double tmp_dist = dist(c_means[k], c_X[n]);
56  if (tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; }
57  }
58 
59  c_partitions[ k_winner ][ count[k_winner] ] = n;
60  c_count[k_winner]++;
61  }
62 }
63 
64 
66 {
67 
68  for (int k = 0;k < K;k++) {
69 
70  for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
71 
72  int Nk = c_count[k];
73 
74  for (int n = 0;n < Nk;n++) {
75  double * x = c_X[ c_partitions[k][n] ];
76  for (int d = 0;d < D;d++) c_tmpvec[d] += x[d];
77  }
78 
79  if (Nk > 0) {
80  double * c_mean = c_means[k];
81  for (int d = 0;d < D;d++) c_mean[d] = c_tmpvec[d] / Nk;
82  }
83  }
84 
85 }
86 
87 
89 {
90 
91  static int counter = 0;
92 
93  bool zombie_mean = false;
94 
95  int k = 0;
96  int max_count = count[k];
97  int k_hog = k;
98 
99  for (int k = 1;k < K;k++) if (c_count[k] > max_count) { max_count = c_count[k]; k_hog = k; }
100 
101  for (int k = 0;k < K;k++) {
102  if (c_count[k] == 0) {
103 
104  zombie_mean = true;
105  if (verbose) {
106  it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean");
107  }
108  if (k_hog == k) {
109  it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k");
110  return(false);
111  }
112 
113  if (counter >= c_count[k_hog]) counter = 0;
114 
115  double * c_mean = c_means[k];
116  double * c_x = c_X[ c_partitions[k_hog][counter] ];
117 
118  for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_means[k_hog][d] + c_x[d]);
119  counter++;
120  }
121 
122  }
123 
124  if (zombie_mean) assign_to_means();
125 
126  return(true);
127 }
128 
129 
131 {
132 
133  double tmp_dist = 0.0;
134  for (int k = 0;k < K;k++) tmp_dist += dist(c_means[k], c_means_old[k]);
135  return(tmp_dist);
136 }
137 
138 
140 {
141 
142  for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
143 
144  for (int n = 0;n < N;n++) {
145  double * c_x = c_X[n];
146  for (int d = 0;d < D;d++) c_tmpvec[d] += c_x[d];
147  }
148 
149  for (int d = 0;d < D;d++) c_tmpvec[d] /= N;
150 
151  int step = int(floor(double(N) / double(K)));
152  for (int k = 0;k < K;k++) {
153  double * c_mean = c_means[k];
154  double * c_x = c_X[k*step];
155 
156  for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_tmpvec[d] + c_x[d]);
157  }
158 }
159 
160 
162 {
163 
164  for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d];
165 
166  for (int i = 0;i < max_iter;i++) {
167 
168  assign_to_means();
169  if (!dezombify_means()) return;
171 
172  double change = measure_change();
173 
174  if (verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << " change = " << change << std::endl;
175  if (change == 0) break;
176 
177  for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d];
178  }
179 
180 }
181 
182 
184 {
185  initial_means();
186  iterate();
187 }
188 
189 
191 {
192 
193  for (int k = 0;k < K;k++) {
194  int Nk = c_count[k];
195 
196  if (Nk >= 2) {
197  double * c_mean = c_means[k];
198 
199  for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
200 
201  for (int n = 0;n < Nk;n++) {
202  double * c_x = c_X[ c_partitions[k][n] ];
203  for (int d = 0;d < D;d++) { double tmp = c_x[d] - c_mean[d]; c_tmpvec[d] += tmp * tmp; }
204  }
205 
206  for (int d = 0;d < D;d++) c_diag_covs[k][d] = trust * (c_tmpvec[d] / (Nk - 1.0)) + (1.0 - trust) * (1.0);
207  }
208  else {
209  for (int d = 0;d < D;d++) c_diag_covs[k][d] = 1.0;
210  }
211  }
212 
213 }
214 
215 
217 {
218  for (int k = 0;k < K;k++) c_weights[k] = trust * (c_count[k] / double(N)) + (1.0 - trust) * (1.0 / K);
219 }
220 
221 
223 {
224 
225  for (int d = 0;d < D;d++) {
226  double acc = 0.0;
227  for (int n = 0;n < N;n++) acc += c_X[n][d];
228  c_norm_mu[d] = acc / N;
229  }
230 
231  for (int d = 0;d < D;d++) {
232  double acc = 0.0;
233  for (int n = 0;n < N;n++) { double tmp = c_X[n][d] - c_norm_mu[d]; acc += tmp * tmp; }
234  c_norm_sd[d] = std::sqrt(acc / (N - 1));
235  }
236 
237  for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) {
238  c_X[n][d] -= c_norm_mu[d];
239  if (c_norm_sd[d] > 0.0) c_X[n][d] /= c_norm_sd[d];
240  }
241 }
242 
243 
245 {
246 
247  for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) {
248  if (c_norm_sd[d] > 0.0) c_X[n][d] *= c_norm_sd[d];
249  c_X[n][d] += c_norm_mu[d];
250  }
251 }
252 
253 
255 {
256 
257  for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) {
258  if (norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d];
259  c_means[k][d] += norm_mu[d];
260  }
261 }
262 
263 
264 void MOG_diag_kmeans_sup::run(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in)
265 {
266 
267  it_assert(model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid");
268  it_assert((max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero");
269  it_assert(((trust_in >= 0.0) && (trust_in <= 1.0)), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)");
270 
271  verbose = verbose_in;
272 
273  Array<vec> means_in = model_in.get_means();
274  Array<vec> diag_covs_in = model_in.get_diag_covs();
275  vec weights_in = model_in.get_weights();
276 
277  init(means_in, diag_covs_in, weights_in);
278 
279  means_in.set_size(0);
280  diag_covs_in.set_size(0);
281  weights_in.set_size(0);
282 
283  it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality");
284 
285  N = X_in.size();
286 
287  if (K > N) {
288  it_warning("MOG_diag_kmeans_sup::run(): K > N");
289  }
290  else {
291  if (K > N / 10) {
292  it_warning("MOG_diag_kmeans_sup::run(): K > N/10");
293  }
294  }
295 
296  max_iter = max_iter_in;
297  trust = trust_in;
298 
300  for (int k = 0;k < K;k++) means_old(k).set_size(D);
301  partitions.set_size(K);
302  for (int k = 0;k < K;k++) partitions(k).set_size(N);
303  count.set_size(K);
304  tmpvec.set_size(D);
305  norm_mu.set_size(D);
306  norm_sd.set_size(D);
307 
308  c_X = enable_c_access(X_in);
312  c_tmpvec = enable_c_access(tmpvec);
313  c_norm_mu = enable_c_access(norm_mu);
314  c_norm_sd = enable_c_access(norm_sd);
315 
316  if (normalise_in) normalise_vectors();
317 
318  calc_means();
319  if (normalise_in) { unnormalise_vectors(); unnormalise_means(); }
320  calc_covs();
321  calc_weights();
322 
323  model_in.init(means, diag_covs, weights);
324 
329  disable_c_access(c_tmpvec);
330  disable_c_access(c_norm_mu);
331  disable_c_access(c_norm_sd);
332 
333  means_old.set_size(0);
334  partitions.set_size(0);
335  count.set_size(0);
336  tmpvec.set_size(0);
337  norm_mu.set_size(0);
338  norm_sd.set_size(0);
339 
340  cleanup();
341 
342 }
343 
344 //
345 // convenience functions
346 
347 void MOG_diag_kmeans(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in)
348 {
350  km.run(model_in, X_in, max_iter_in, trust_in, normalise_in, verbose_in);
351 }
352 
353 
354 }
355 
SourceForge Logo

Generated on Sat May 25 2013 16:32:24 for IT++ by Doxygen 1.8.2