TensorFunctors.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
12 
13 namespace Eigen {
14 namespace internal {
15 
16 
20 template <typename Scalar>
21 struct scalar_mod_op {
22  EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {}
23  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; }
24  const Scalar m_divisor;
25 };
26 template <typename Scalar>
27 struct functor_traits<scalar_mod_op<Scalar> >
28 { enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
29 
30 
35 template <typename T>
36 struct scalar_sigmoid_op {
37  EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
38  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
39  const T one = T(1);
40  return one / (one + std::exp(-x));
41  }
42 
43  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
44  Packet packetOp(const Packet& x) const {
45  const Packet one = pset1<Packet>(1);
46  return pdiv(one, padd(one, pexp(pnegate(x))));
47  }
48 };
49 
50 template <typename T>
51 struct functor_traits<scalar_sigmoid_op<T> > {
52  enum {
53  Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
54  PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
55  packet_traits<T>::HasNegate && packet_traits<T>::HasExp
56  };
57 };
58 
59 
60 // Standard reduction functors
61 template <typename T> struct SumReducer
62 {
63  static const bool PacketAccess = true;
64  static const bool IsStateful = false;
65 
66  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
67  (*accum) += t;
68  }
69  template <typename Packet>
70  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
71  (*accum) = padd<Packet>(*accum, p);
72  }
73 
74  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
75  return static_cast<T>(0);
76  }
77  template <typename Packet>
78  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
79  return pset1<Packet>(0);
80  }
81  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
82  return accum;
83  }
84  template <typename Packet>
85  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
86  return vaccum;
87  }
88  template <typename Packet>
89  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
90  return saccum + predux(vaccum);
91  }
92 };
93 
94 template <typename T> struct MeanReducer
95 {
96  static const bool PacketAccess = true;
97  static const bool IsStateful = true;
98 
99  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
100  MeanReducer() : scalarCount_(0), packetCount_(0) { }
101 
102  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
103  (*accum) += t;
104  scalarCount_++;
105  }
106  template <typename Packet>
107  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) {
108  (*accum) = padd<Packet>(*accum, p);
109  packetCount_++;
110  }
111 
112  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
113  return static_cast<T>(0);
114  }
115  template <typename Packet>
116  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
117  return pset1<Packet>(0);
118  }
119  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
120  return accum / scalarCount_;
121  }
122  template <typename Packet>
123  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
124  return pdiv(vaccum, pset1<Packet>(packetCount_));
125  }
126  template <typename Packet>
127  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
128  return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
129  }
130 
131  protected:
132  int scalarCount_;
133  int packetCount_;
134 };
135 
136 template <typename T> struct MaxReducer
137 {
138  static const bool PacketAccess = true;
139  static const bool IsStateful = false;
140 
141  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
142  if (t > *accum) { *accum = t; }
143  }
144  template <typename Packet>
145  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
146  (*accum) = pmax<Packet>(*accum, p);
147  }
148 
149  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
150  return -(std::numeric_limits<T>::max)();
151  }
152  template <typename Packet>
153  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
154  return pset1<Packet>(-(std::numeric_limits<T>::max)());
155  }
156  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
157  return accum;
158  }
159  template <typename Packet>
160  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
161  return vaccum;
162  }
163  template <typename Packet>
164  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
165  return numext::maxi(saccum, predux_max(vaccum));
166  }
167 };
168 
169 template <typename T> struct MinReducer
170 {
171  static const bool PacketAccess = true;
172  static const bool IsStateful = false;
173 
174  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
175  if (t < *accum) { *accum = t; }
176  }
177  template <typename Packet>
178  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
179  (*accum) = pmin<Packet>(*accum, p);
180  }
181 
182  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
183  return (std::numeric_limits<T>::max)();
184  }
185  template <typename Packet>
186  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
187  return pset1<Packet>((std::numeric_limits<T>::max)());
188  }
189  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
190  return accum;
191  }
192  template <typename Packet>
193  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
194  return vaccum;
195  }
196  template <typename Packet>
197  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
198  return numext::mini(saccum, predux_min(vaccum));
199  }
200 };
201 
202 
203 template <typename T> struct ProdReducer
204 {
205  static const bool PacketAccess = true;
206  static const bool IsStateful = false;
207 
208  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
209  (*accum) *= t;
210  }
211  template <typename Packet>
212  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
213  (*accum) = pmul<Packet>(*accum, p);
214  }
215 
216  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
217  return static_cast<T>(1);
218  }
219  template <typename Packet>
220  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
221  return pset1<Packet>(1);
222  }
223  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
224  return accum;
225  }
226  template <typename Packet>
227  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
228  return vaccum;
229  }
230  template <typename Packet>
231  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
232  return saccum * predux_mul(vaccum);
233  }
234 };
235 
236 
237 struct AndReducer
238 {
239  static const bool PacketAccess = false;
240  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
241  *accum = *accum && t;
242  }
243  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
244  return true;
245  }
246  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
247  return accum;
248  }
249 };
250 
251 struct OrReducer {
252  static const bool PacketAccess = false;
253  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
254  *accum = *accum || t;
255  }
256  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
257  return false;
258  }
259  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
260  return accum;
261  }
262 };
263 
264 // Argmin/Argmax reducers
265 template <typename T> struct ArgMaxTupleReducer
266 {
267  static const bool PacketAccess = false;
268  static const bool IsStateful = false;
269 
270  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
271  if (t.second > accum->second) { *accum = t; }
272  }
273  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
274  return T(0, NumTraits<typename T::second_type>::lowest());
275  }
276  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
277  return accum;
278  }
279 };
280 
281 template <typename T> struct ArgMinTupleReducer
282 {
283  static const bool PacketAccess = false;
284  static const bool IsStateful = false;
285 
286  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
287  if (t.second < accum->second) { *accum = t; }
288  }
289  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
290  return T(0, NumTraits<typename T::second_type>::highest());
291  }
292  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
293  return accum;
294  }
295 };
296 
297 
298 // Random number generation
299 namespace {
300 #ifdef __CUDA_ARCH__
301 __device__ int get_random_seed() {
302  return clock();
303 }
304 #else
305 int get_random_seed() {
306 #ifdef _WIN32
307  SYSTEMTIME st;
308  GetSystemTime(&st);
309  return st.wSecond + 1000 * st.wMilliseconds;
310 #elif defined __APPLE__
311  return static_cast<int>(mach_absolute_time());
312 #else
313  timespec ts;
314  clock_gettime(CLOCK_REALTIME, &ts);
315  return static_cast<int>(ts.tv_nsec);
316 #endif
317 }
318 #endif
319 }
320 
321 #if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
322 // We're not compiling a cuda kernel
323 template <typename T> class UniformRandomGenerator {
324 
325  public:
326  static const bool PacketAccess = true;
327 
328  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
329  if (!deterministic) {
330  srand(get_random_seed());
331  }
332  }
333  UniformRandomGenerator(const UniformRandomGenerator& other) {
334  m_deterministic = other.m_deterministic;
335  }
336 
337  template<typename Index>
338  T operator()(Index, Index = 0) const {
339  return random<T>();
340  }
341  template<typename Index>
342  typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
343  const int packetSize = internal::packet_traits<T>::size;
344  EIGEN_ALIGN_MAX T values[packetSize];
345  for (int i = 0; i < packetSize; ++i) {
346  values[i] = random<T>();
347  }
348  return internal::pload<typename internal::packet_traits<T>::type>(values);
349  }
350 
351  private:
352  bool m_deterministic;
353 };
354 
355 #if __cplusplus > 199711
356 template <> class UniformRandomGenerator<float> {
357  public:
358  static const bool PacketAccess = true;
359 
360  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
361  if (!deterministic) {
362  m_generator.seed(get_random_seed());
363  }
364  }
365  UniformRandomGenerator(const UniformRandomGenerator<float>& other) {
366  m_generator.seed(other(0, 0) * UINT_MAX);
367  m_deterministic = other.m_deterministic;
368  }
369 
370  template<typename Index>
371  float operator()(Index, Index = 0) const {
372  return m_distribution(m_generator);
373  }
374  template<typename Index>
375  typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const {
376  const int packetSize = internal::packet_traits<float>::size;
377  EIGEN_ALIGN_MAX float values[packetSize];
378  for (int k = 0; k < packetSize; ++k) {
379  values[k] = this->operator()(i, j);
380  }
381  return internal::pload<typename internal::packet_traits<float>::type>(values);
382  }
383 
384  private:
385  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
386  // Make sure m_deterministic comes first to match the layout of the cpu
387  // version of the code.
388  bool m_deterministic;
389  mutable std::mt19937 m_generator;
390  mutable std::uniform_real_distribution<float> m_distribution;
391 };
392 
393 template <> class UniformRandomGenerator<double> {
394  public:
395  static const bool PacketAccess = true;
396 
397  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
398  if (!deterministic) {
399  m_generator.seed(get_random_seed());
400  }
401  }
402  UniformRandomGenerator(const UniformRandomGenerator<double>& other) {
403  m_generator.seed(other(0, 0) * UINT_MAX);
404  m_deterministic = other.m_deterministic;
405  }
406 
407  template<typename Index>
408  double operator()(Index, Index = 0) const {
409  return m_distribution(m_generator);
410  }
411  template<typename Index>
412  typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const {
413  const int packetSize = internal::packet_traits<double>::size;
414  EIGEN_ALIGN_MAX double values[packetSize];
415  for (int k = 0; k < packetSize; ++k) {
416  values[k] = this->operator()(i, j);
417  }
418  return internal::pload<typename internal::packet_traits<double>::type>(values);
419  }
420 
421  private:
422  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
423  // Make sure m_deterministic comes first to match the layout of the cpu
424  // version of the code.
425  bool m_deterministic;
426  mutable std::mt19937 m_generator;
427  mutable std::uniform_real_distribution<double> m_distribution;
428 };
429 #endif
430 
431 #else
432 
433 // We're compiling a cuda kernel
434 template <typename T> class UniformRandomGenerator;
435 
436 template <> class UniformRandomGenerator<float> {
437  public:
438  static const bool PacketAccess = true;
439 
440  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
441  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
442  const int seed = deterministic ? 0 : get_random_seed();
443  curand_init(seed, tid, 0, &m_state);
444  }
445 
446  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
447  m_deterministic = other.m_deterministic;
448  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
449  const int seed = m_deterministic ? 0 : get_random_seed();
450  curand_init(seed, tid, 0, &m_state);
451  }
452 
453  template<typename Index>
454  __device__ float operator()(Index, Index = 0) const {
455  return curand_uniform(&m_state);
456  }
457  template<typename Index>
458  __device__ float4 packetOp(Index, Index = 0) const {
459  return curand_uniform4(&m_state);
460  }
461 
462  private:
463  bool m_deterministic;
464  mutable curandStatePhilox4_32_10_t m_state;
465 };
466 
467 template <> class UniformRandomGenerator<double> {
468  public:
469  static const bool PacketAccess = true;
470 
471  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
472  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
473  const int seed = deterministic ? 0 : get_random_seed();
474  curand_init(seed, tid, 0, &m_state);
475  }
476  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
477  m_deterministic = other.m_deterministic;
478  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
479  const int seed = m_deterministic ? 0 : get_random_seed();
480  curand_init(seed, tid, 0, &m_state);
481  }
482  template<typename Index>
483  __device__ double operator()(Index, Index = 0) const {
484  return curand_uniform_double(&m_state);
485  }
486  template<typename Index>
487  __device__ double2 packetOp(Index, Index = 0) const {
488  return curand_uniform2_double(&m_state);
489  }
490 
491  private:
492  bool m_deterministic;
493  mutable curandStatePhilox4_32_10_t m_state;
494 };
495 
496 template <> class UniformRandomGenerator<std::complex<float> > {
497  public:
498  static const bool PacketAccess = false;
499 
500  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
501  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
502  const int seed = deterministic ? 0 : get_random_seed();
503  curand_init(seed, tid, 0, &m_state);
504  }
505  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
506  m_deterministic = other.m_deterministic;
507  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
508  const int seed = m_deterministic ? 0 : get_random_seed();
509  curand_init(seed, tid, 0, &m_state);
510  }
511  template<typename Index>
512  __device__ std::complex<float> operator()(Index, Index = 0) const {
513  float4 vals = curand_uniform4(&m_state);
514  return std::complex<float>(vals.x, vals.y);
515  }
516 
517  private:
518  bool m_deterministic;
519  mutable curandStatePhilox4_32_10_t m_state;
520 };
521 
522 template <> class UniformRandomGenerator<std::complex<double> > {
523  public:
524  static const bool PacketAccess = false;
525 
526  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
527  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
528  const int seed = deterministic ? 0 : get_random_seed();
529  curand_init(seed, tid, 0, &m_state);
530  }
531  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
532  m_deterministic = other.m_deterministic;
533  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
534  const int seed = m_deterministic ? 0 : get_random_seed();
535  curand_init(seed, tid, 0, &m_state);
536  }
537  template<typename Index>
538  __device__ std::complex<double> operator()(Index, Index = 0) const {
539  double2 vals = curand_uniform2_double(&m_state);
540  return std::complex<double>(vals.x, vals.y);
541  }
542 
543  private:
544  bool m_deterministic;
545  mutable curandStatePhilox4_32_10_t m_state;
546 };
547 
548 #endif
549 
550 
551 #if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711
552 // We're not compiling a cuda kernel
553 template <typename T> class NormalRandomGenerator {
554  public:
555  static const bool PacketAccess = true;
556 
557  NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1) {
558  if (!deterministic) {
559  m_generator.seed(get_random_seed());
560  }
561  }
562  NormalRandomGenerator(const NormalRandomGenerator& other)
563  : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution) {
564  m_generator.seed(other(0, 0) * UINT_MAX);
565  }
566 
567  template<typename Index>
568  T operator()(Index, Index = 0) const {
569  return m_distribution(m_generator);
570  }
571  template<typename Index>
572  typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
573  const int packetSize = internal::packet_traits<T>::size;
574  EIGEN_ALIGN_MAX T values[packetSize];
575  for (int i = 0; i < packetSize; ++i) {
576  values[i] = m_distribution(m_generator);
577  }
578  return internal::pload<typename internal::packet_traits<T>::type>(values);
579  }
580 
581  private:
582  bool m_deterministic;
583  mutable std::normal_distribution<T> m_distribution;
584  mutable std::mt19937 m_generator;
585 };
586 
587 #elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
588 
589 // We're compiling a cuda kernel
590 template <typename T> class NormalRandomGenerator;
591 
592 template <> class NormalRandomGenerator<float> {
593  public:
594  static const bool PacketAccess = true;
595 
596  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
597  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
598  const int seed = deterministic ? 0 : get_random_seed();
599  curand_init(seed, tid, 0, &m_state);
600  }
601  __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) {
602  m_deterministic = other.m_deterministic;
603  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
604  const int seed = m_deterministic ? 0 : get_random_seed();
605  curand_init(seed, tid, 0, &m_state);
606  }
607  template<typename Index>
608  __device__ float operator()(Index, Index = 0) const {
609  return curand_normal(&m_state);
610  }
611  template<typename Index>
612  __device__ float4 packetOp(Index, Index = 0) const {
613  return curand_normal4(&m_state);
614  }
615 
616  private:
617  bool m_deterministic;
618  mutable curandStatePhilox4_32_10_t m_state;
619 };
620 
621 template <> class NormalRandomGenerator<double> {
622  public:
623  static const bool PacketAccess = true;
624 
625  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
626  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
627  const int seed = deterministic ? 0 : get_random_seed();
628  curand_init(seed, tid, 0, &m_state);
629  }
630  __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) {
631  m_deterministic = other.m_deterministic;
632  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
633  const int seed = m_deterministic ? 0 : get_random_seed();
634  curand_init(seed, tid, 0, &m_state);
635  }
636  template<typename Index>
637  __device__ double operator()(Index, Index = 0) const {
638  return curand_normal_double(&m_state);
639  }
640  template<typename Index>
641  __device__ double2 packetOp(Index, Index = 0) const {
642  return curand_normal2_double(&m_state);
643  }
644 
645  private:
646  bool m_deterministic;
647  mutable curandStatePhilox4_32_10_t m_state;
648 };
649 
650 template <> class NormalRandomGenerator<std::complex<float> > {
651  public:
652  static const bool PacketAccess = false;
653 
654  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
655  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
656  const int seed = deterministic ? 0 : get_random_seed();
657  curand_init(seed, tid, 0, &m_state);
658  }
659  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
660  m_deterministic = other.m_deterministic;
661  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
662  const int seed = m_deterministic ? 0 : get_random_seed();
663  curand_init(seed, tid, 0, &m_state);
664  }
665  template<typename Index>
666  __device__ std::complex<float> operator()(Index, Index = 0) const {
667  float4 vals = curand_normal4(&m_state);
668  return std::complex<float>(vals.x, vals.y);
669  }
670 
671  private:
672  bool m_deterministic;
673  mutable curandStatePhilox4_32_10_t m_state;
674 };
675 
676 template <> class NormalRandomGenerator<std::complex<double> > {
677  public:
678  static const bool PacketAccess = false;
679 
680  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
681  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
682  const int seed = deterministic ? 0 : get_random_seed();
683  curand_init(seed, tid, 0, &m_state);
684  }
685  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
686  m_deterministic = other.m_deterministic;
687  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
688  const int seed = m_deterministic ? 0 : get_random_seed();
689  curand_init(seed, tid, 0, &m_state);
690  }
691  template<typename Index>
692  __device__ std::complex<double> operator()(Index, Index = 0) const {
693  double2 vals = curand_normal2_double(&m_state);
694  return std::complex<double>(vals.x, vals.y);
695  }
696 
697  private:
698  bool m_deterministic;
699  mutable curandStatePhilox4_32_10_t m_state;
700 };
701 
702 #else
703 
704 template <typename T> class NormalRandomGenerator {
705  public:
706  NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {}
707 
708  private:
709  bool m_deterministic;
710 };
711 
712 #endif
713 
714 
715 template <typename T, typename Index, size_t NumDims>
716 class GaussianGenerator {
717  public:
718  static const bool PacketAccess = false;
719 
720  EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
721  const array<T, NumDims>& std_devs)
722  : m_means(means)
723  {
724  for (size_t i = 0; i < NumDims; ++i) {
725  m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
726  }
727  }
728 
729  T operator()(const array<Index, NumDims>& coordinates) const {
730  T tmp = T(0);
731  for (size_t i = 0; i < NumDims; ++i) {
732  T offset = coordinates[i] - m_means[i];
733  tmp += offset * offset / m_two_sigmas[i];
734  }
735  return std::exp(-tmp);
736  }
737 
738  private:
739  array<T, NumDims> m_means;
740  array<T, NumDims> m_two_sigmas;
741 };
742 
743 
744 } // end namespace internal
745 } // end namespace Eigen
746 
747 #endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
Namespace containing all symbols from the Eigen library.
Definition: CXX11Meta.h:13