IT++ Logo
transforms.cpp
Go to the documentation of this file.
1 
30 #ifndef _MSC_VER
31 # include <itpp/config.h>
32 #else
33 # include <itpp/config_msvc.h>
34 #endif
35 
36 
37 #if defined(HAVE_FFT_MKL)
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 
42 namespace mkl
43 {
44 # include <mkl_dfti.h>
45 # include <mkl_service.h>
46 # undef DftiCreateDescriptor
47 }
48 
49 #elif defined(HAVE_FFT_ACML)
50 
51 namespace acml
52 {
53 # include <acml.h>
54 }
55 
56 #elif defined(HAVE_FFTW3)
57 
58 # include <fftw3.h>
59 
60 #endif
61 
62 #include <itpp/signal/transforms.h>
63 
65 
66 //multithreading mode selector
67 enum MultithreadingTag {SingleThreaded = 1, OmpThreaded};
68 
69 #ifdef _OPENMP
70 
71 #include <omp.h>
72 
73 static const MultithreadingTag ThreadingTag = OmpThreaded;
74 //number of context records kept per transform type
75 //see comments for Transform_Provider class for futher details
76 static const int contexts_per_transform_type = 10;
77 //specialize mutex for multi-threaded code with OMP
78 class Mutex
79 {
80  omp_lock_t _lck;
81  //disable copy-construction and assignment
82  Mutex(const Mutex&);
83  Mutex& operator=(const Mutex&);
84 public:
85  Mutex() {omp_init_lock(&_lck);}
86  ~Mutex() {omp_destroy_lock(&_lck);}
87  //lock the mutex
88  void lock() {omp_set_lock(&_lck);}
89  //try to lock. returns true if ownership is taken
90  bool try_lock() {return (omp_test_lock(&_lck)) != 0;}
91  //unlock
92  void unlock() {omp_unset_lock(&_lck);}
93 };
94 
95 
96 #else
97 
98 static const MultithreadingTag ThreadingTag = SingleThreaded;
99 //number of context records kept per transform type
100 //see comments for Transform_Provider class for futher details
101 static const int contexts_per_transform_type = 1;
102 
103 //specialize mutex for single-threaded code
104 class Mutex
105 {
106  //disable copy-construction and assignment
107  Mutex(const Mutex&);
108  Mutex& operator=(const Mutex&);
109 public:
110  Mutex() {}
111  ~Mutex() {}
112  void lock() {}
113  bool try_lock() {return true;}
114  void unlock() {}
115 };
116 
117 #endif
118 
119 
120 //mutex-based lock
121 class Lock
122 {
123  Mutex& _m;
124  //disable copy-construction and assignment
125  Lock(const Lock&);
126  Lock& operator=(const Lock&);
127 public:
128  Lock(Mutex& m): _m(m) {_m.lock();}
129  ~Lock() {_m.unlock();}
130 };
131 
132 
133 
134 namespace itpp
135 {
136 
137 //define traits for all supported transform types: FFT complex, FFT real, IFFT Complex, IFFT Real, DCT, IDCT
138 struct FFTCplx_Traits {
139  typedef cvec InType;
140  typedef cvec OutType;
141 };
142 
143 struct IFFTCplx_Traits {
144  typedef cvec InType;
145  typedef cvec OutType;
146 };
147 
148 struct FFTReal_Traits {
149  typedef vec InType;
150  typedef cvec OutType;
151 };
152 
153 struct IFFTReal_Traits {
154  typedef cvec InType;
155  typedef vec OutType;
156 };
157 
158 struct DCT_Traits {
159  typedef vec InType;
160  typedef vec OutType;
161 };
162 
163 struct IDCT_Traits {
164  typedef vec InType;
165  typedef vec OutType;
166 };
167 
168 //generic transforms implementation based on transform type and specific FFT library
169 template<typename TransformTraits> class Transform;
170 //FFT library initializer based on mutithreading model
171 template<MultithreadingTag> inline void init_fft_library();
172 
173 #if defined(HAVE_FFT_MKL)
174 //MKL-specific implementations
175 
176 //MKL FFT-related notes:
177 //If multithreading is enabled on ITPP level (and in user's code) MKL FFT descriptors can be created, committed and freed by multiple threads
178 //In this case Intel recommends to use single-threaded FFT implementation:
179 //1. "Intel® MKL 10.x threading", http://software.intel.com/en-us/articles/intel-mkl-10x-threading,
180 //2. "Examples of Using Multi-Threading for FFT Computation", http://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-00422EBE-93C3-4BC9-A621-9BF0A0E93888.htm
181 //Based on examples, provided by Intel, it seems to be safe to create/commit/free and run FFT on per-thread descriptor
182 //without additional locking
183 
184 template<> inline void init_fft_library<SingleThreaded>() {} //assume no actions required. ITPP does not use threading, so FFT library is free to use it's own threading implementation
185 template<> inline void init_fft_library<OmpThreaded>()
186 {
187  //switch FFT domain of MKL to single-threaded mode as Intel suggests
188  //this should work starting from MKL 10.0
189  mkl::mkl_domain_set_num_threads(1, MKL_FFT);
190 }
191 
192 //---------------------------------------------------------------------------
193 // FFT/IFFT based on MKL
194 //---------------------------------------------------------------------------
195 
196 inline void release_descriptor(mkl::DFTI_DESCRIPTOR* h)
197 {
198  if(h != NULL) {
199  MKL_LONG status = mkl::DftiFreeDescriptor(&h);
200  if(status) {
201  it_info(mkl::DftiErrorMessage(status));
202  it_error("MKL library release_descriptor() failed on DftiFreeDescriptor.");
203  }
204  }
205 }
206 
207 template<> class Transform<FFTCplx_Traits>
208 {
209  mkl::DFTI_DESCRIPTOR* _h;
210  int _transform_length;
211 public:
212  Transform(): _h(NULL), _transform_length(0) {}
213 
214  void compute_transform(const cvec &in, cvec &out) {
215  out.set_size(in.size(), false);
216  if(_transform_length != in.size()) {
217  release_descriptor(_h);
218  _transform_length = in.size();
219 
220  MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_COMPLEX, 1, _transform_length);
221  if(status) {
222  it_info(mkl::DftiErrorMessage(status));
223  it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
224  }
225 
226  mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
227 
228  status = mkl::DftiCommitDescriptor(_h);
229  if(status) {
230  it_info(mkl::DftiErrorMessage(status));
231  it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
232  }
233 
234  }
235  mkl::DftiComputeForward(_h, (void *)in._data(), out._data());
236  }
237 
238  void reset() {release_descriptor(_h); *this = Transform();}
239 };
240 
241 template<> class Transform<IFFTCplx_Traits>
242 {
243  mkl::DFTI_DESCRIPTOR* _h;
244  int _transform_length;
245 public:
246  Transform(): _h(NULL), _transform_length(0) {}
247 
248  void compute_transform(const cvec &in, cvec &out) {
249  out.set_size(in.size(), false);
250  if(_transform_length != in.size()) {
251  release_descriptor(_h);
252  _transform_length = in.size();
253  MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_COMPLEX, 1, _transform_length);
254  if(status) {
255  it_info(mkl::DftiErrorMessage(status));
256  it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
257  }
258 
259  mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
260  mkl::DftiSetValue(_h, mkl::DFTI_BACKWARD_SCALE, 1.0 / _transform_length);
261 
262  status = mkl::DftiCommitDescriptor(_h);
263  if(status) {
264  it_info(mkl::DftiErrorMessage(status));
265  it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
266  }
267 
268  }
269  mkl::DftiComputeBackward(_h, (void *)in._data(), out._data());
270  }
271 
272  void reset() {release_descriptor(_h); *this = Transform();}
273 };
274 
275 template<> class Transform<FFTReal_Traits>
276 {
277  mkl::DFTI_DESCRIPTOR* _h;
278  int _transform_length;
279 public:
280  Transform(): _h(NULL), _transform_length(0) {}
281 
282  void compute_transform(const vec &in, cvec &out) {
283  out.set_size(in.size(), false);
284  if(_transform_length != in.size()) {
285  release_descriptor(_h);
286  _transform_length = in.size();
287 
288  MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_REAL, 1, _transform_length);
289  if(status) {
290  it_info(mkl::DftiErrorMessage(status));
291  it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
292  }
293 
294  mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
295 
296  status = mkl::DftiCommitDescriptor(_h);
297  if(status) {
298  it_info(mkl::DftiErrorMessage(status));
299  it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
300  }
301 
302  }
303  mkl::DftiComputeForward(_h, (void *)in._data(), out._data());
304  // Real FFT does not compute the 2nd half of the FFT points because it
305  // is redundant to the 1st half. However, we want all of the data so we
306  // fill it in. This is consistent with Matlab's functionality
307  int istart = ceil_i(in.size() / 2.0);
308  int idelta = in.size() - istart;
309  out.set_subvector(istart, reverse(conj(out(1, idelta))));
310  }
311 
312  void reset() {release_descriptor(_h); *this = Transform();}
313 };
314 
315 template<> class Transform<IFFTReal_Traits>
316 {
317  mkl::DFTI_DESCRIPTOR* _h;
318  int _transform_length;
319 public:
320  Transform(): _h(NULL), _transform_length(0) {}
321 
322  void compute_transform(const cvec &in, vec &out) {
323  out.set_size(in.size(), false);
324  if(_transform_length != in.size()) {
325  release_descriptor(_h);
326  _transform_length = in.size();
327 
328  MKL_LONG status = mkl::DftiCreateDescriptor(&_h, mkl::DFTI_DOUBLE, mkl::DFTI_REAL, 1, _transform_length);
329  if(status) {
330  it_info(mkl::DftiErrorMessage(status));
331  it_error("MKL library compute_transform() failed on DftiCreateDescriptor.");
332  }
333 
334  mkl::DftiSetValue(_h, mkl::DFTI_PLACEMENT, mkl::DFTI_NOT_INPLACE);
335  mkl::DftiSetValue(_h, mkl::DFTI_BACKWARD_SCALE, 1.0 / _transform_length);
336 
337  status = mkl::DftiCommitDescriptor(_h);
338  if(status) {
339  it_info(mkl::DftiErrorMessage(status));
340  it_error("MKL library compute_transform() failed on DftiCommitDescriptor.");
341  }
342 
343  }
344  mkl::DftiComputeBackward(_h, (void *)in._data(), out._data());
345  }
346 
347  void reset() {release_descriptor(_h); *this = Transform();}
348 };
349 
350 #endif // #ifdef HAVE_FFT_MKL
351 
352 
353 #if defined(HAVE_FFT_ACML)
354 //ACML-specific implementations
355 
356 //ACML FFT-related notes:
357 //ACML documentation is not very verbose regarding the multithreaded use of the library, but multithreaded ifort-built ACML uses
358 //OMP internally. AMD recommends linking with SINGLE-THREADED library if OMP is enabled in user's code. Also, they claim that
359 //single-threaded functions can be used from the multiple threads simultaniously (see http://devgurus.amd.com/thread/141592 for
360 //multi-threading discussion on AMD dev forums) The thread-safety of library functions is also mentioned in ACML release notes (ver 4.4.0).
361 //In the following implementation we assume that ACML transform functions can be run simultaneously from different threads safely if they operate
362 //on different data sets.
363 template<> inline void init_fft_library<SingleThreaded>() {} //assume no actions required.
364 template<> inline void init_fft_library<OmpThreaded>() {}
365 
366 //---------------------------------------------------------------------------
367 // FFT/IFFT based on ACML
368 //---------------------------------------------------------------------------
369 template<> class Transform<FFTCplx_Traits>
370 {
371  cvec _scratchpad;
372  int _transform_length;
373 public:
374  Transform(): _transform_length(0) {}
375 
376  void compute_transform(const cvec &in, cvec &out) {
377  int info;
378  out.set_size(in.size(), false);
379  if(_transform_length != in.size()) {
380  _transform_length = in.size();
381 
382  int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
383  if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
384 
385  acml::zfft1dx(0, 1.0, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
386  (acml::doublecomplex *)out._data(), 1,
387  (acml::doublecomplex *)_scratchpad._data(), &info);
388  }
389  acml::zfft1dx(-1, 1.0, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
390  (acml::doublecomplex *)out._data(), 1,
391  (acml::doublecomplex *)_scratchpad._data(), &info);
392  }
393 
394  void reset() {*this = Transform();}
395 };
396 
397 template<> class Transform<IFFTCplx_Traits>
398 {
399  cvec _scratchpad;
400  int _transform_length;
401 public:
402  Transform(): _transform_length(0) {}
403 
404  void compute_transform(const cvec &in, cvec &out) {
405  int info;
406  out.set_size(in.size(), false);
407  if(_transform_length != in.size()) {
408  _transform_length = in.size();
409 
410  int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
411  if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
412 
413  acml::zfft1dx(0, 1.0 / _transform_length, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
414  (acml::doublecomplex *)out._data(), 1,
415  (acml::doublecomplex *)_scratchpad._data(), &info);
416  }
417  acml::zfft1dx(1, 1.0 / _transform_length, false, _transform_length, (acml::doublecomplex *)in._data(), 1,
418  (acml::doublecomplex *)out._data(), 1,
419  (acml::doublecomplex *)_scratchpad._data(), &info);
420  }
421 
422  void reset() {*this = Transform();}
423 };
424 
425 template<> class Transform<FFTReal_Traits>
426 {
427  vec _scratchpad;
428  int _transform_length;
429 public:
430  Transform(): _transform_length(0) {}
431 
432  void compute_transform(const vec &in, cvec &out) {
433  vec out_re = in;
434 
435  int info;
436  if(_transform_length != in.size()) {
437  _transform_length = in.size();
438 
439  int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
440  if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
441 
442  acml::dzfft(0, _transform_length, out_re._data(), _scratchpad._data(), &info);
443  }
444  acml::dzfft(1, _transform_length, out_re._data(), _scratchpad._data(), &info);
445 
446  // Normalise output data
447  double factor = std::sqrt(static_cast<double>(_transform_length));
448  out_re *= factor;
449 
450  // Convert the real Hermitian DZFFT's output to the Matlab's complex form
451  vec out_im(_transform_length);
452  out_im(0) = 0.0;
453  if(!(_transform_length % 2)) out_im(_transform_length / 2) = 0.0; //even transform length
454  out_im.set_subvector(1, reverse(out_re(_transform_length / 2 + 1, _transform_length - 1)));
455  out_im.set_subvector(_transform_length / 2 + 1, -out_re(_transform_length / 2 + 1, _transform_length - 1));
456  out_re.set_subvector(_transform_length / 2 + 1, reverse(out_re(1, (_transform_length - 1) / 2)));
457 
458  out.set_size(_transform_length, false);
459  out = to_cvec(out_re, out_im);
460  }
461 
462  void reset() {*this = Transform();}
463 };
464 
465 template<> class Transform<IFFTReal_Traits>
466 {
467  vec _scratchpad;
468  int _transform_length;
469 public:
470  Transform(): _transform_length(0) {}
471 
472  void compute_transform(const cvec &in, vec &out) {
473  // Convert Matlab's complex input to the real Hermitian form
474  out.set_size(in.size());
475  out.set_subvector(0, real(in(0, in.size() / 2)));
476  out.set_subvector(in.size() / 2 + 1, -imag(in(in.size() / 2 + 1, in.size() - 1)));
477 
478  int info;
479  if(_transform_length != in.size()) {
480  _transform_length = in.size();
481 
482  int min_required_size = 5 * _transform_length + 100; //ACML guides suggest 3*size + 100 here, but ITPP code uses 5.
483  if(_scratchpad.size() < min_required_size) _scratchpad.set_size(min_required_size);
484 
485  acml::zdfft(0, _transform_length, out._data(), _scratchpad._data(), &info);
486  }
487  acml::zdfft(1, _transform_length, out._data(), _scratchpad._data(), &info);
488  out.set_subvector(1, reverse(out(1, _transform_length - 1)));
489 
490  // Normalise output data
491  double factor = 1.0 / std::sqrt(static_cast<double>(_transform_length));
492  out *= factor;
493  }
494 
495  void reset() {*this = Transform();}
496 };
497 
498 
499 
500 #endif // defined(HAVE_FFT_ACML)
501 
502 
503 #if defined(HAVE_FFTW3)
504 //FFTW3-specific implementations
505 
506 //FFTW3-related notes:
507 //Based on the FFtW3 documentation, it is thread-safe to call fftw_execute family functions simultaniously from several threads assuming that data sets are different in each thread.
508 //FFTW plans creation-destruction is not thread-safe and should be serialized by the caller. FFTW provides some functions to execute transforms with multiple threads (assuming FFTW
509 // is compiled and linked with multithreading support). Current ITPP implementation does not use any of them.
510 
511 template<> inline void init_fft_library<SingleThreaded>() {} //assume no actions required.
512 template<> inline void init_fft_library<OmpThreaded>() {}
513 //define global lock for operations with FFTW plans.
514 Mutex& get_library_lock()
515 {
516  static Mutex FFTW3LibraryLock;
517  return FFTW3LibraryLock;
518 }
519 //---------------------------------------------------------------------------
520 // FFT/IFFT based on FFTW
521 //---------------------------------------------------------------------------
522 inline void destroy_plan(fftw_plan p)
523 {
524  if(p != NULL) fftw_destroy_plan(p); // destroy the plan
525 }
526 
527 template<> class Transform<FFTCplx_Traits>
528 {
529  fftw_plan _p;
530  int _transform_length;
531 public:
532  Transform(): _p(NULL), _transform_length(0) {}
533 
534  void compute_transform(const cvec &in, cvec &out) {
535  out.set_size(in.size(), false);
536  if(_transform_length != in.size()) {
537  Lock l(get_library_lock()); //apply global library lock on plan changes
538 
539  _transform_length = in.size();
540  destroy_plan(_p); // destroy the previous plan
541  // create a new plan (creation of plan guarantees not to return NULL)
542  _p = fftw_plan_dft_1d(_transform_length, (fftw_complex *)in._data(),
543  (fftw_complex *)out._data(),
544  FFTW_FORWARD, FFTW_ESTIMATE);
545  }
546  //compute FFT using the GURU FFTW interface
547  fftw_execute_dft(_p, (fftw_complex *)in._data(),
548  (fftw_complex *)out._data());
549  }
550 
551  void reset() {destroy_plan(_p); *this = Transform();}
552 };
553 
554 template<> class Transform<IFFTCplx_Traits>
555 {
556  fftw_plan _p;
557  int _transform_length;
558 public:
559  Transform(): _p(NULL), _transform_length(0) {}
560 
561  void compute_transform(const cvec &in, cvec &out) {
562  out.set_size(in.size(), false);
563  if(_transform_length != in.size()) {
564  Lock l(get_library_lock()); //apply global library lock on plan changes
565 
566  _transform_length = in.size();
567  destroy_plan(_p); // destroy the previous plan
568 
569  // create a new plan (creation of plan guarantees not to return NULL)
570  _p = fftw_plan_dft_1d(_transform_length, (fftw_complex *)in._data(),
571  (fftw_complex *)out._data(),
572  FFTW_BACKWARD, FFTW_ESTIMATE);
573  }
574  //compute FFT using the GURU FFTW interface
575  fftw_execute_dft(_p, (fftw_complex *)in._data(),
576  (fftw_complex *)out._data());
577  // scale output
578  double inv_N = 1.0 / _transform_length;
579  out *= inv_N;
580 
581  }
582 
583  void reset() {destroy_plan(_p); *this = Transform();}
584 };
585 
586 template<> class Transform<FFTReal_Traits>
587 {
588  fftw_plan _p;
589  int _transform_length;
590 public:
591  Transform(): _p(NULL), _transform_length(0) {}
592 
593  void compute_transform(const vec &in, cvec &out) {
594  out.set_size(in.size(), false);
595  if(_transform_length != in.size()) {
596  Lock l(get_library_lock()); //apply global library lock on plan changes
597 
598  _transform_length = in.size();
599  destroy_plan(_p); // destroy the previous plan
600 
601  // create a new plan (creation of plan guarantees not to return NULL)
602  _p = fftw_plan_dft_r2c_1d(_transform_length, (double *)in._data(),
603  (fftw_complex *)out._data(),
604  FFTW_ESTIMATE);
605  }
606  //compute FFT using the GURU FFTW interface
607  fftw_execute_dft_r2c(_p, (double *)in._data(),
608  (fftw_complex *)out._data());
609  // Real FFT does not compute the 2nd half of the FFT points because it
610  // is redundant to the 1st half. However, we want all of the data so we
611  // fill it in. This is consistent with Matlab's functionality
612  int offset = ceil_i(_transform_length / 2.0);
613  int n_elem = _transform_length - offset;
614  for(int i = 0; i < n_elem; ++i) {
615  out(offset + i) = std::conj(out(n_elem - i));
616  }
617  }
618 
619  void reset() {destroy_plan(_p); *this = Transform();}
620 };
621 
622 template<> class Transform<IFFTReal_Traits>
623 {
624  fftw_plan _p;
625  int _transform_length;
626 public:
627  Transform(): _p(NULL), _transform_length(0) {}
628 
629  void compute_transform(const cvec &in, vec &out) {
630  out.set_size(in.size(), false);
631  if(_transform_length != in.size()) {
632  Lock l(get_library_lock()); //apply global library lock on plan changes
633 
634  _transform_length = in.size();
635  destroy_plan(_p); // destroy the previous plan
636 
637  // create a new plan (creation of plan guarantees not to return NULL)
638  _p = fftw_plan_dft_c2r_1d(_transform_length, (fftw_complex *)in._data(),
639  (double *)out._data(),
640  FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
641  }
642  //compute FFT using the GURU FFTW interface
643  fftw_execute_dft_c2r(_p, (fftw_complex *)in._data(),
644  (double *)out._data());
645  // scale output
646  double inv_N = 1.0 / _transform_length;
647  out *= inv_N;
648 
649  }
650 
651  void reset() {destroy_plan(_p); *this = Transform();}
652 };
653 
654 //---------------------------------------------------------------------------
655 // DCT/IDCT based on FFTW
656 //---------------------------------------------------------------------------
657 template<> class Transform<DCT_Traits>
658 {
659  fftw_plan _p;
660  int _transform_length;
661 public:
662  Transform(): _p(NULL), _transform_length(0) {}
663 
664  void compute_transform(const vec &in, vec &out) {
665  out.set_size(in.size(), false);
666  if(_transform_length != in.size()) {
667  Lock l(get_library_lock()); //apply global library lock on plan changes
668 
669  _transform_length = in.size();
670  destroy_plan(_p); // destroy the previous plan
671 
672  // create a new plan (creation of plan guarantees not to return NULL)
673  _p = fftw_plan_r2r_1d(_transform_length, (double *)in._data(),
674  (double *)out._data(),
675  FFTW_REDFT10, FFTW_ESTIMATE);
676  }
677  // compute FFT using the GURU FFTW interface
678  fftw_execute_r2r(_p, (double *)in._data(), (double *)out._data());
679 
680  // Scale to matlab definition format
681  out /= std::sqrt(2.0 * _transform_length);
682  out(0) /= std::sqrt(2.0);
683  }
684 
685  void reset() {destroy_plan(_p); *this = Transform();}
686 };
687 
688 template<> class Transform<IDCT_Traits>
689 {
690  fftw_plan _p;
691  int _transform_length;
692 public:
693  Transform(): _p(NULL), _transform_length(0) {}
694 
695  void compute_transform(const vec &in, vec &out) {
696  out = in;
697 
698  // Rescale to FFTW format
699  out(0) *= std::sqrt(2.0);
700  out /= std::sqrt(2.0 * in.size());
701 
702  if(_transform_length != in.size()) {
703  Lock l(get_library_lock()); //apply global library lock on plan changes
704 
705  _transform_length = in.size();
706  destroy_plan(_p); // destroy the previous plan
707 
708  // create a new plan (creation of plan guarantees not to return NULL)
709  _p = fftw_plan_r2r_1d(_transform_length, (double *)in._data(),
710  (double *)out._data(),
711  FFTW_REDFT01, FFTW_ESTIMATE);
712  }
713  // compute FFT using the GURU FFTW interface
714  fftw_execute_r2r(_p, (double *)out._data(), (double *)out._data());
715  }
716 
717  void reset() {destroy_plan(_p); *this = Transform();}
718 };
719 
720 #endif // defined(HAVE_FFTW3)
721 
722 #if defined(HAVE_FFT_MKL) || defined(HAVE_FFT_ACML)
723 
724 //---------------------------------------------------------------------------
725 // DCT/IDCT based on MKL or ACML
726 //---------------------------------------------------------------------------
727 
728 //use FFT on real values to perform DCT
729 template<> class Transform<DCT_Traits>
730 {
731  Transform<FFTReal_Traits> _tr;
732 public:
733  Transform() {}
734 
735  void compute_transform(const vec &in, vec &out) {
736  int N = in.size();
737  if(N == 1)
738  out = in;
739  else {
740  cvec c;
741  _tr.compute_transform(concat(in, reverse(in)), c);
742  c.set_size(N, true);
743  for(int i = 0; i < N; i++) {
744  c(i) *= std::complex<double>(std::cos(pi * i / N / 2), std::sin(-pi * i / N / 2))
745  / std::sqrt(2.0 * N);
746  }
747  out = real(c);
748  out(0) /= std::sqrt(2.0);
749  }
750  }
751 
752  void reset() {_tr.reset();}
753 };
754 
755 //use IFFT with real output to perform IDCT
756 template<> class Transform<IDCT_Traits>
757 {
758  Transform<IFFTReal_Traits> _tr;
759 public:
760  Transform() {}
761 
762  void compute_transform(const vec &in, vec &out) {
763  int N = in.size();
764  if(N == 1)
765  out = in;
766  else {
767  cvec c = to_cvec(in);
768  c.set_size(2 * N, true);
769  c(0) *= std::sqrt(2.0);
770  for(int i = 0; i < N; i++) {
771  c(i) *= std::complex<double>(std::cos(pi * i / N / 2), std::sin(pi * i / N / 2))
772  * std::sqrt(2.0 * N);
773  }
774  for(int i = N - 1; i >= 1; i--) {
775  c(c.size() - i) = c(i) * std::complex<double>(std::cos(pi * i / N),
776  std::sin(-pi * i / N));
777  }
778  _tr.compute_transform(c, out);
779  out.set_size(N, true);
780  }
781  }
782 
783  void reset() {_tr.reset();}
784 };
785 
786 #endif
787 
788 #if defined(HAVE_FFT)
789 //lock-protected transform to serialize accesses to the context from several threads
790 template<typename TransformTraits> class Locked_Transform : private Transform<TransformTraits>
791 {
792  typedef Transform<TransformTraits> Base;
793  Mutex _m;
794 public:
795  Locked_Transform() {}
796  //release context
797  void release_context() {Lock l(_m); Base::reset();}
798  void run_transform(const typename TransformTraits::InType& in, typename TransformTraits::OutType& out) {Lock l(_m); Base::compute_transform(in, out);}
799 };
800 
801 //Typical multithreaded application creates several threads upon entry to parallel region and join them upon exit from it.
802 //Threads used to perform parallel computations can either be terminated upon exit from the parallel region or left in the
803 //parked state, so in the next parallel region application can reuse already created threads from the team instead of the
804 //time-consuming creation of new threads. There is no way to control threads creation-destruction with OMP and, therefore
805 //there is no way to implement automatic clean-up of transform computation contexts for each thread (basically, this means that
806 //we can not appropriately release FFT library resources and this results in memory leak)
807 
808 //In order to solve this problem and implement the FFT transforms in multithreded environment library relyes on the statically
809 //created pool of transform contexts.
810 
811 //Each thread willing to run the transfrom queries the context index from transfrom provider. Thread uses assigned index and
812 //corresponding context to compute the transforms during it's lifetime. Provider assigns contexts in round-robbin fashion, so
813 //context used by the exited threads are reused by newly created ones.
814 
815 //Single context can be reused by multiple threads if application created more then contexts_per_transform_type threads performing
816 //some type of transform.
817 static bool is_library_initialized = false;
818 
819 template<typename TransformTraits> class Transform_Provider
820 {
821  typedef Locked_Transform<TransformTraits> Transform;
822  Transform _transforms[contexts_per_transform_type];
823  int _id;
824 public:
825  Transform_Provider(): _id(0) {
826  if(!is_library_initialized) {
827  //initialize FFT library on first conctruction of any of Transform_Provider objects
828  init_fft_library<ThreadingTag>();
829  is_library_initialized = true;
830  }
831  }
832  int get_context_id() {
833  //assign id in round-robin fashion.
834  int ret = _id + 1;
835  if(ret == contexts_per_transform_type)
836  _id = 0;
837  else
838  _id = ret;
839  return ret;
840  }
841  void run_transform(int id, const typename TransformTraits::InType& in, typename TransformTraits::OutType& out) {
842  _transforms[id - 1].run_transform(in, out);
843  }
844  //provider destructor. releases context resources.
845  //destructor is called after the main() exits, so there is no need to protect context release with mutex
846  ~Transform_Provider() {
847  for(int i = 0; i < contexts_per_transform_type; ++i)
848  _transforms[i].release_context();
849  }
850 };
851 
852 //Transform_Provider is constructed upon the first request
853 template<typename TransformTraits> Transform_Provider<TransformTraits>& get_transform_provider()
854 {
855  static Transform_Provider<TransformTraits> p;
856  return p;
857 }
858 
859 void fft(const cvec &in, cvec &out)
860 {
861  static int context_id = 0;
862  #pragma omp threadprivate(context_id)
863 
864  if(context_id == 0) {
865  //first-time transform call
866  #pragma omp critical
867  {
868  //serialize access to transform provider to get the id
869  context_id = get_transform_provider<FFTCplx_Traits>().get_context_id();
870  }
871  }
872  it_assert(in.size() > 0, "fft(): zero-sized input detected");
873  //there is no need to serialize here, since provider is constructed at this point
874  get_transform_provider<FFTCplx_Traits>().run_transform(context_id, in, out);
875 }
876 
877 void ifft(const cvec &in, cvec &out)
878 {
879  static int context_id = 0;
880  #pragma omp threadprivate(context_id)
881 
882  if(context_id == 0) {
883  //first-time transform call
884  #pragma omp critical
885  {
886  //serialize access to transform provider to get the id
887  context_id = get_transform_provider<IFFTCplx_Traits>().get_context_id();
888  }
889  }
890  it_assert(in.size() > 0, "ifft(): zero-sized input detected");
891  //there is no need to serialize here, since provider is constructed at this point
892  get_transform_provider<IFFTCplx_Traits>().run_transform(context_id, in, out);
893 }
894 
895 void fft_real(const vec &in, cvec &out)
896 {
897  static int context_id = 0;
898  #pragma omp threadprivate(context_id)
899 
900  if(context_id == 0) {
901  //first-time transform call
902  #pragma omp critical
903  {
904  //serialize access to transform provider to get the id
905  context_id = get_transform_provider<FFTReal_Traits>().get_context_id();
906  }
907  }
908  it_assert(in.size() > 0, "fft_real(): zero-sized input detected");
909  //there is no need to serialize here, since provider is constructed at this point
910  get_transform_provider<FFTReal_Traits>().run_transform(context_id, in, out);
911 }
912 
913 void ifft_real(const cvec &in, vec &out)
914 {
915  static int context_id = 0;
916  #pragma omp threadprivate(context_id)
917 
918  if(context_id == 0) {
919  //first-time transform call
920  #pragma omp critical
921  {
922  //serialize access to transform provider to get the id
923  context_id = get_transform_provider<IFFTReal_Traits>().get_context_id();
924  }
925  }
926  it_assert(in.size() > 0, "ifft_real(): zero-sized input detected");
927  //there is no need to serialize here, since provider is constructed at this point
928  get_transform_provider<IFFTReal_Traits>().run_transform(context_id, in, out);
929 }
930 
931 void dct(const vec &in, vec &out)
932 {
933  static int context_id = 0;
934  #pragma omp threadprivate(context_id)
935 
936  if(context_id == 0) {
937  //first-time transform call
938  #pragma omp critical
939  {
940  //serialize access to transform provider to get the id
941  context_id = get_transform_provider<DCT_Traits>().get_context_id();
942  }
943  }
944  it_assert(in.size() > 0, "dct(): zero-sized input detected");
945  //there is no need to serialize here, since provider is definitely constructed at this point
946  get_transform_provider<DCT_Traits>().run_transform(context_id, in, out);
947 }
948 
949 void idct(const vec &in, vec &out)
950 {
951  static int context_id = 0;
952  #pragma omp threadprivate(context_id)
953 
954  if(context_id == 0) {
955  //first-time transform call
956  #pragma omp critical
957  {
958  //serialize access to transform provider to get the id
959  context_id = get_transform_provider<IDCT_Traits>().get_context_id();
960  }
961  }
962  it_assert(in.size() > 0, "dct(): zero-sized input detected");
963  //there is no need to serialize here, since provider is definitely constructed at this point
964  get_transform_provider<IDCT_Traits>().run_transform(context_id, in, out);
965 }
966 
967 bool have_fourier_transforms() {return true;}
968 bool have_cosine_transforms() {return true;}
969 #else
970 
971 void fft(const cvec &in, cvec &out)
972 {
973  it_error("FFT library is needed to use fft() function");
974 }
975 
976 void ifft(const cvec &in, cvec &out)
977 {
978  it_error("FFT library is needed to use ifft() function");
979 }
980 
981 void fft_real(const vec &in, cvec &out)
982 {
983  it_error("FFT library is needed to use fft_real() function");
984 }
985 
986 void ifft_real(const cvec &in, vec & out)
987 {
988  it_error("FFT library is needed to use ifft_real() function");
989 }
990 
991 void dct(const vec &in, vec &out)
992 {
993  it_error("FFT library is needed to use dct() function");
994 }
995 
996 void idct(const vec &in, vec &out)
997 {
998  it_error("FFT library is needed to use idct() function");
999 }
1000 
1001 bool have_fourier_transforms() {return false;}
1002 bool have_cosine_transforms() {return false;}
1003 
1004 #endif // defined(HAVE_FFT)
1005 
1006 
1007 cvec fft(const cvec &in)
1008 {
1009  cvec out;
1010  fft(in, out);
1011  return out;
1012 }
1013 
1014 cvec fft(const cvec &in, const int N)
1015 {
1016  cvec in2 = in;
1017  cvec out;
1018  in2.set_size(N, true);
1019  fft(in2, out);
1020  return out;
1021 }
1022 
1023 cvec ifft(const cvec &in)
1024 {
1025  cvec out;
1026  ifft(in, out);
1027  return out;
1028 }
1029 
1030 cvec ifft(const cvec &in, const int N)
1031 {
1032  cvec in2 = in;
1033  cvec out;
1034  in2.set_size(N, true);
1035  ifft(in2, out);
1036  return out;
1037 }
1038 
1039 cvec fft_real(const vec& in)
1040 {
1041  cvec out;
1042  fft_real(in, out);
1043  return out;
1044 }
1045 
1046 cvec fft_real(const vec& in, const int N)
1047 {
1048  vec in2 = in;
1049  cvec out;
1050  in2.set_size(N, true);
1051  fft_real(in2, out);
1052  return out;
1053 }
1054 
1055 vec ifft_real(const cvec &in)
1056 {
1057  vec out;
1058  ifft_real(in, out);
1059  return out;
1060 }
1061 
1062 vec ifft_real(const cvec &in, const int N)
1063 {
1064  cvec in2 = in;
1065  in2.set_size(N, true);
1066  vec out;
1067  ifft_real(in2, out);
1068  return out;
1069 }
1070 
1071 vec dct(const vec &in)
1072 {
1073  vec out;
1074  dct(in, out);
1075  return out;
1076 }
1077 
1078 vec dct(const vec &in, const int N)
1079 {
1080  vec in2 = in;
1081  vec out;
1082  in2.set_size(N, true);
1083  dct(in2, out);
1084  return out;
1085 }
1086 
1087 
1088 vec idct(const vec &in)
1089 {
1090  vec out;
1091  idct(in, out);
1092  return out;
1093 }
1094 
1095 vec idct(const vec &in, const int N)
1096 {
1097  vec in2 = in;
1098  vec out;
1099  in2.set_size(N, true);
1100  idct(in2, out);
1101  return out;
1102 }
1103 // ----------------------------------------------------------------------
1104 // Instantiation
1105 // ----------------------------------------------------------------------
1106 
1107 template ITPP_EXPORT vec dht(const vec &v);
1108 template ITPP_EXPORT cvec dht(const cvec &v);
1109 
1110 template ITPP_EXPORT void dht(const vec &vin, vec &vout);
1111 template ITPP_EXPORT void dht(const cvec &vin, cvec &vout);
1112 
1113 template ITPP_EXPORT void self_dht(vec &v);
1114 template ITPP_EXPORT void self_dht(cvec &v);
1115 
1116 template ITPP_EXPORT vec dwht(const vec &v);
1117 template ITPP_EXPORT cvec dwht(const cvec &v);
1118 
1119 template ITPP_EXPORT void dwht(const vec &vin, vec &vout);
1120 template ITPP_EXPORT void dwht(const cvec &vin, cvec &vout);
1121 
1122 template ITPP_EXPORT void self_dwht(vec &v);
1123 template ITPP_EXPORT void self_dwht(cvec &v);
1124 
1125 template ITPP_EXPORT mat dht2(const mat &m);
1126 template ITPP_EXPORT cmat dht2(const cmat &m);
1127 
1128 template ITPP_EXPORT mat dwht2(const mat &m);
1129 template ITPP_EXPORT cmat dwht2(const cmat &m);
1130 
1131 } // namespace itpp
1132 
SourceForge Logo

Generated on Sat May 25 2013 16:32:23 for IT++ by Doxygen 1.8.2