Eigen  3.2.92
ProductEvaluators.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 
13 #ifndef EIGEN_PRODUCTEVALUATORS_H
14 #define EIGEN_PRODUCTEVALUATORS_H
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
28 template<typename Lhs, typename Rhs, int Options>
29 struct evaluator<Product<Lhs, Rhs, Options> >
30  : public product_evaluator<Product<Lhs, Rhs, Options> >
31 {
32  typedef Product<Lhs, Rhs, Options> XprType;
33  typedef product_evaluator<XprType> Base;
34 
35  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
36 };
37 
38 // Catch scalar * ( A * B ) and transform it to (A*scalar) * B
39 // TODO we should apply that rule only if that's really helpful
40 template<typename Lhs, typename Rhs, typename Scalar>
41 struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
42  : evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
43 {
44  enum { AssumeAliasing = 1 };
45 };
46 template<typename Lhs, typename Rhs, typename Scalar>
47 struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > >
48  : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> >
49 {
50  typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType;
51  typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base;
52 
53  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
54  : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs())
55  {}
56 };
57 
58 
59 template<typename Lhs, typename Rhs, int DiagIndex>
60 struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
61  : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
62 {
63  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
64  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
65 
66  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
67  : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
68  Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
69  xpr.index() ))
70  {}
71 };
72 
73 
74 // Helper class to perform a matrix product with the destination at hand.
75 // Depending on the sizes of the factors, there are different evaluation strategies
76 // as controlled by internal::product_type.
77 template< typename Lhs, typename Rhs,
78  typename LhsShape = typename evaluator_traits<Lhs>::Shape,
79  typename RhsShape = typename evaluator_traits<Rhs>::Shape,
80  int ProductType = internal::product_type<Lhs,Rhs>::value>
81 struct generic_product_impl;
82 
83 template<typename Lhs, typename Rhs>
84 struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> >
85  : evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> >
86 {
87  enum { AssumeAliasing = 1 };
88 };
89 
90 template<typename Lhs, typename Rhs>
91 struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
92  : evaluator_traits_base<Product<Lhs, Rhs, AliasFreeProduct> >
93 {
94  enum { AssumeAliasing = 0 };
95 };
96 
97 // This is the default evaluator implementation for products:
98 // It creates a temporary and call generic_product_impl
99 template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
100 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
101  : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
102 {
103  typedef Product<Lhs, Rhs, Options> XprType;
104  typedef typename XprType::PlainObject PlainObject;
105  typedef evaluator<PlainObject> Base;
106  enum {
107  Flags = Base::Flags | EvalBeforeNestingBit
108  };
109 
110  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
111  : m_result(xpr.rows(), xpr.cols())
112  {
113  ::new (static_cast<Base*>(this)) Base(m_result);
114 
115 // FIXME shall we handle nested_eval here?,
116 // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
117 // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
118 // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
119 // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
120 // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
121 //
122 // const LhsNested lhs(xpr.lhs());
123 // const RhsNested rhs(xpr.rhs());
124 //
125 // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
126 
127  generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
128  }
129 
130 protected:
131  PlainObject m_result;
132 };
133 
134 // Dense = Product
135 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
136 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense,
137  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
138 {
139  typedef Product<Lhs,Rhs,Options> SrcXprType;
140  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
141  {
142  // FIXME shall we handle nested_eval here?
143  generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
144  }
145 };
146 
147 // Dense += Product
148 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
149 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense,
150  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
151 {
152  typedef Product<Lhs,Rhs,Options> SrcXprType;
153  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
154  {
155  // FIXME shall we handle nested_eval here?
156  generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
157  }
158 };
159 
160 // Dense -= Product
161 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
162 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense,
163  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
164 {
165  typedef Product<Lhs,Rhs,Options> SrcXprType;
166  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
167  {
168  // FIXME shall we handle nested_eval here?
169  generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
170  }
171 };
172 
173 
174 // Dense ?= scalar * Product
175 // TODO we should apply that rule if that's really helpful
176 // for instance, this is not good for inner products
177 template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis>
178 struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
179  const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar>
180 {
181  typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
182  const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
183  static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
184  {
185  call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func);
186  }
187 };
188 
189 //----------------------------------------
190 // Catch "Dense ?= xpr + Product<>" expression to save one temporary
191 // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
192 
193 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
194 struct assignment_from_xpr_plus_product
195 {
196  typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType;
197  static void run(DstXprType &dst, const SrcXprType &src, const Func1& func)
198  {
199  call_assignment_no_alias(dst, src.lhs(), func);
200  call_assignment_no_alias(dst, src.rhs(), Func2());
201  }
202 };
203 
204 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
205 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
206  const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense>
207  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> >
208 {};
209 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
210 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
211  const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense>
212  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> >
213 {};
214 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
215 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
216  const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense>
217  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> >
218 {};
219 //----------------------------------------
220 
221 template<typename Lhs, typename Rhs>
222 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
223 {
224  template<typename Dst>
225  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
226  {
227  dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
228  }
229 
230  template<typename Dst>
231  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
232  {
233  dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
234  }
235 
236  template<typename Dst>
237  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
238  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
239 };
240 
241 
242 /***********************************************************************
243 * Implementation of outer dense * dense vector product
244 ***********************************************************************/
245 
246 // Column major result
247 template<typename Dst, typename Lhs, typename Rhs, typename Func>
248 EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
249 {
250  evaluator<Rhs> rhsEval(rhs);
251  typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
252  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
253  // FIXME not very good if rhs is real and lhs complex while alpha is real too
254  const Index cols = dst.cols();
255  for (Index j=0; j<cols; ++j)
256  func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs);
257 }
258 
259 // Row major result
260 template<typename Dst, typename Lhs, typename Rhs, typename Func>
261 EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
262 {
263  evaluator<Lhs> lhsEval(lhs);
264  typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
265  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
266  // FIXME not very good if lhs is real and rhs complex while alpha is real too
267  const Index rows = dst.rows();
268  for (Index i=0; i<rows; ++i)
269  func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs);
270 }
271 
272 template<typename Lhs, typename Rhs>
273 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
274 {
275  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
276  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
277 
278  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
279  struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
280  struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
281  struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
282  struct adds {
283  Scalar m_scale;
284  explicit adds(const Scalar& s) : m_scale(s) {}
285  template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
286  dst.const_cast_derived() += m_scale * src;
287  }
288  };
289 
290  template<typename Dst>
291  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
292  {
293  internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
294  }
295 
296  template<typename Dst>
297  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
298  {
299  internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
300  }
301 
302  template<typename Dst>
303  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
304  {
305  internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
306  }
307 
308  template<typename Dst>
309  static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
310  {
311  internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
312  }
313 
314 };
315 
316 
317 // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
318 template<typename Lhs, typename Rhs, typename Derived>
319 struct generic_product_impl_base
320 {
321  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
322 
323  template<typename Dst>
324  static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
325  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
326 
327  template<typename Dst>
328  static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
329  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
330 
331  template<typename Dst>
332  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
333  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
334 
335  template<typename Dst>
336  static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
337  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
338 
339 };
340 
341 template<typename Lhs, typename Rhs>
342 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
343  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
344 {
345  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
346  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
347  typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
348 
349  template<typename Dest>
350  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
351  {
352  internal::gemv_dense_selector<Side,
353  (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
354  bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
355  >::run(lhs, rhs, dst, alpha);
356  }
357 };
358 
359 template<typename Lhs, typename Rhs>
360 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
361 {
362  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
363 
364  template<typename Dst>
365  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
366  {
367  // Same as: dst.noalias() = lhs.lazyProduct(rhs);
368  // but easier on the compiler side
369  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<Scalar>());
370  }
371 
372  template<typename Dst>
373  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
374  {
375  // dst.noalias() += lhs.lazyProduct(rhs);
376  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<Scalar>());
377  }
378 
379  template<typename Dst>
380  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
381  {
382  // dst.noalias() -= lhs.lazyProduct(rhs);
383  call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<Scalar>());
384  }
385 
386 // template<typename Dst>
387 // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
388 // { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
389 };
390 
391 // This specialization enforces the use of a coefficient-based evaluation strategy
392 template<typename Lhs, typename Rhs>
393 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
394  : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
395 
396 // Case 2: Evaluate coeff by coeff
397 //
398 // This is mostly taken from CoeffBasedProduct.h
399 // The main difference is that we add an extra argument to the etor_product_*_impl::run() function
400 // for the inner dimension of the product, because evaluator object do not know their size.
401 
402 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
403 struct etor_product_coeff_impl;
404 
405 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
406 struct etor_product_packet_impl;
407 
408 template<typename Lhs, typename Rhs, int ProductTag>
409 struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
410  : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
411 {
412  typedef Product<Lhs, Rhs, LazyProduct> XprType;
413  typedef typename XprType::Scalar Scalar;
414  typedef typename XprType::CoeffReturnType CoeffReturnType;
415  typedef typename XprType::PacketScalar PacketScalar;
416  typedef typename XprType::PacketReturnType PacketReturnType;
417 
418  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
419  : m_lhs(xpr.lhs()),
420  m_rhs(xpr.rhs()),
421  m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
422  m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
423  // or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
424  m_innerDim(xpr.lhs().cols())
425  {
426  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
427  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
428  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
429  }
430 
431  // Everything below here is taken from CoeffBasedProduct.h
432 
433  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
434  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
435 
436  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
437  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
438 
439  typedef evaluator<LhsNestedCleaned> LhsEtorType;
440  typedef evaluator<RhsNestedCleaned> RhsEtorType;
441 
442  enum {
443  RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
444  ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
445  InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
446  MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
447  MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
448 
449  PacketSize = packet_traits<Scalar>::size,
450 
451  LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
452  RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
453  CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
454  : InnerSize == Dynamic ? HugeCost
455  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
456  + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
457 
458  Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
459 
460  LhsFlags = LhsEtorType::Flags,
461  RhsFlags = RhsEtorType::Flags,
462 
463  LhsAlignment = LhsEtorType::Alignment,
464  RhsAlignment = RhsEtorType::Alignment,
465 
466  LhsRowMajor = LhsFlags & RowMajorBit,
467  RhsRowMajor = RhsFlags & RowMajorBit,
468 
469  SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
470 
471  CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
472  && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ),
473 
474  CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
475  && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ),
476 
477  EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
478  : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
479  : (RhsRowMajor && !CanVectorizeLhs),
480 
481  Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
482  | (EvalToRowMajor ? RowMajorBit : 0)
483  // TODO enable vectorization for mixed types
484  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
485  | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
486 
487  LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
488  RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
489 
490  Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
491  : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
492  : 0,
493 
494  /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
495  * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
496  * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
497  * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
498  */
499  CanVectorizeInner = SameType
500  && LhsRowMajor
501  && (!RhsRowMajor)
502  && (LhsFlags & RhsFlags & ActualPacketAccessBit)
503  && (InnerSize % packet_traits<Scalar>::size == 0)
504  };
505 
506  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
507  {
508  return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
509  }
510 
511  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
512  * which is why we don't set the LinearAccessBit.
513  * TODO: this seems possible when the result is a vector
514  */
515  EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
516  {
517  const Index row = RowsAtCompileTime == 1 ? 0 : index;
518  const Index col = RowsAtCompileTime == 1 ? index : 0;
519  return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
520  }
521 
522  template<int LoadMode, typename PacketType>
523  const PacketType packet(Index row, Index col) const
524  {
525  PacketType res;
526  typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
527  Unroll ? int(InnerSize) : Dynamic,
528  LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
529  PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
530  return res;
531  }
532 
533  template<int LoadMode, typename PacketType>
534  const PacketType packet(Index index) const
535  {
536  const Index row = RowsAtCompileTime == 1 ? 0 : index;
537  const Index col = RowsAtCompileTime == 1 ? index : 0;
538  return packet<LoadMode,PacketType>(row,col);
539  }
540 
541 protected:
542  const LhsNested m_lhs;
543  const RhsNested m_rhs;
544 
545  LhsEtorType m_lhsImpl;
546  RhsEtorType m_rhsImpl;
547 
548  // TODO: Get rid of m_innerDim if known at compile time
549  Index m_innerDim;
550 };
551 
552 template<typename Lhs, typename Rhs>
553 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
554  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
555 {
556  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
557  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
558  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
559  enum {
560  Flags = Base::Flags | EvalBeforeNestingBit
561  };
562  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
563  : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
564  {}
565 };
566 
567 /****************************************
568 *** Coeff based product, Packet path ***
569 ****************************************/
570 
571 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
572 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
573 {
574  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
575  {
576  etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
577  res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
578  }
579 };
580 
581 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
582 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
583 {
584  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
585  {
586  etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
587  res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
588  }
589 };
590 
591 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
592 struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
593 {
594  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
595  {
596  res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
597  }
598 };
599 
600 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
601 struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
602 {
603  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
604  {
605  res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
606  }
607 };
608 
609 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
610 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
611 {
612  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
613  {
614  res = pset1<Packet>(0);
615  }
616 };
617 
618 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
619 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
620 {
621  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
622  {
623  res = pset1<Packet>(0);
624  }
625 };
626 
627 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
628 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
629 {
630  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
631  {
632  res = pset1<Packet>(0);
633  for(Index i = 0; i < innerDim; ++i)
634  res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
635  }
636 };
637 
638 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
639 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
640 {
641  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
642  {
643  res = pset1<Packet>(0);
644  for(Index i = 0; i < innerDim; ++i)
645  res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
646  }
647 };
648 
649 
650 /***************************************************************************
651 * Triangular products
652 ***************************************************************************/
653 template<int Mode, bool LhsIsTriangular,
654  typename Lhs, bool LhsIsVector,
655  typename Rhs, bool RhsIsVector>
656 struct triangular_product_impl;
657 
658 template<typename Lhs, typename Rhs, int ProductTag>
659 struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
660  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
661 {
662  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
663 
664  template<typename Dest>
665  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
666  {
667  triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
668  ::run(dst, lhs.nestedExpression(), rhs, alpha);
669  }
670 };
671 
672 template<typename Lhs, typename Rhs, int ProductTag>
673 struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
674 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
675 {
676  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
677 
678  template<typename Dest>
679  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
680  {
681  triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
682  }
683 };
684 
685 
686 /***************************************************************************
687 * SelfAdjoint products
688 ***************************************************************************/
689 template <typename Lhs, int LhsMode, bool LhsIsVector,
690  typename Rhs, int RhsMode, bool RhsIsVector>
691 struct selfadjoint_product_impl;
692 
693 template<typename Lhs, typename Rhs, int ProductTag>
694 struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
695  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
696 {
697  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
698 
699  template<typename Dest>
700  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
701  {
702  selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
703  }
704 };
705 
706 template<typename Lhs, typename Rhs, int ProductTag>
707 struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
708 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
709 {
710  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
711 
712  template<typename Dest>
713  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
714  {
715  selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
716  }
717 };
718 
719 
720 /***************************************************************************
721 * Diagonal products
722 ***************************************************************************/
723 
724 template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
725 struct diagonal_product_evaluator_base
726  : evaluator_base<Derived>
727 {
728  typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
729 public:
730  enum {
731  CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
732 
733  MatrixFlags = evaluator<MatrixType>::Flags,
734  DiagFlags = evaluator<DiagonalType>::Flags,
735  _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
736  _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
737  ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
738  _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
739  // FIXME currently we need same types, but in the future the next rule should be the one
740  //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
741  _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
742  _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
743  Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
744  Alignment = evaluator<MatrixType>::Alignment
745  };
746 
747  diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
748  : m_diagImpl(diag), m_matImpl(mat)
749  {
750  EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
751  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
752  }
753 
754  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
755  {
756  return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
757  }
758 
759 protected:
760  template<int LoadMode,typename PacketType>
761  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
762  {
763  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
764  internal::pset1<PacketType>(m_diagImpl.coeff(id)));
765  }
766 
767  template<int LoadMode,typename PacketType>
768  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
769  {
770  enum {
771  InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
772  DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
773  };
774  return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
775  m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
776  }
777 
778  evaluator<DiagonalType> m_diagImpl;
779  evaluator<MatrixType> m_matImpl;
780 };
781 
782 // diagonal * dense
783 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
784 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
785  : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
786 {
787  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
788  using Base::m_diagImpl;
789  using Base::m_matImpl;
790  using Base::coeff;
791  typedef typename Base::Scalar Scalar;
792 
793  typedef Product<Lhs, Rhs, ProductKind> XprType;
794  typedef typename XprType::PlainObject PlainObject;
795 
796  enum {
797  StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
798  };
799 
800  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
801  : Base(xpr.rhs(), xpr.lhs().diagonal())
802  {
803  }
804 
805  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
806  {
807  return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
808  }
809 
810 #ifndef __CUDACC__
811  template<int LoadMode,typename PacketType>
812  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
813  {
814  // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
815  // See also similar calls below.
816  return this->template packet_impl<LoadMode,PacketType>(row,col, row,
817  typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
818  }
819 
820  template<int LoadMode,typename PacketType>
821  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
822  {
823  return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
824  }
825 #endif
826 };
827 
828 // dense * diagonal
829 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
830 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
831  : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
832 {
833  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
834  using Base::m_diagImpl;
835  using Base::m_matImpl;
836  using Base::coeff;
837  typedef typename Base::Scalar Scalar;
838 
839  typedef Product<Lhs, Rhs, ProductKind> XprType;
840  typedef typename XprType::PlainObject PlainObject;
841 
842  enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
843 
844  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
845  : Base(xpr.lhs(), xpr.rhs().diagonal())
846  {
847  }
848 
849  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
850  {
851  return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
852  }
853 
854 #ifndef __CUDACC__
855  template<int LoadMode,typename PacketType>
856  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
857  {
858  return this->template packet_impl<LoadMode,PacketType>(row,col, col,
859  typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
860  }
861 
862  template<int LoadMode,typename PacketType>
863  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
864  {
865  return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
866  }
867 #endif
868 };
869 
870 /***************************************************************************
871 * Products with permutation matrices
872 ***************************************************************************/
873 
879 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
880 struct permutation_matrix_product;
881 
882 template<typename ExpressionType, int Side, bool Transposed>
883 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
884 {
885  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
886  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
887 
888  template<typename Dest, typename PermutationType>
889  static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
890  {
891  MatrixType mat(xpr);
892  const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
893  // FIXME we need an is_same for expression that is not sensitive to constness. For instance
894  // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
895  //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
896  if(is_same_dense(dst, mat))
897  {
898  // apply the permutation inplace
899  Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
900  mask.fill(false);
901  Index r = 0;
902  while(r < perm.size())
903  {
904  // search for the next seed
905  while(r<perm.size() && mask[r]) r++;
906  if(r>=perm.size())
907  break;
908  // we got one, let's follow it until we are back to the seed
909  Index k0 = r++;
910  Index kPrev = k0;
911  mask.coeffRef(k0) = true;
912  for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
913  {
914  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
915  .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
916  (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
917 
918  mask.coeffRef(k) = true;
919  kPrev = k;
920  }
921  }
922  }
923  else
924  {
925  for(Index i = 0; i < n; ++i)
926  {
927  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
928  (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
929 
930  =
931 
932  Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
933  (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
934  }
935  }
936  }
937 };
938 
939 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
940 struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
941 {
942  template<typename Dest>
943  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
944  {
945  permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
946  }
947 };
948 
949 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
950 struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
951 {
952  template<typename Dest>
953  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
954  {
955  permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
956  }
957 };
958 
959 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
960 struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
961 {
962  template<typename Dest>
963  static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
964  {
965  permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
966  }
967 };
968 
969 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
970 struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
971 {
972  template<typename Dest>
973  static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
974  {
975  permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
976  }
977 };
978 
979 
980 /***************************************************************************
981 * Products with transpositions matrices
982 ***************************************************************************/
983 
984 // FIXME could we unify Transpositions and Permutation into a single "shape"??
985 
990 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
991 struct transposition_matrix_product
992 {
993  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
994  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
995 
996  template<typename Dest, typename TranspositionType>
997  static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
998  {
999  MatrixType mat(xpr);
1000  typedef typename TranspositionType::StorageIndex StorageIndex;
1001  const Index size = tr.size();
1002  StorageIndex j = 0;
1003 
1004  if(!(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)))
1005  dst = mat;
1006 
1007  for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
1008  if(Index(j=tr.coeff(k))!=k)
1009  {
1010  if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
1011  else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
1012  }
1013  }
1014 };
1015 
1016 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1017 struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1018 {
1019  template<typename Dest>
1020  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1021  {
1022  transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1023  }
1024 };
1025 
1026 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1027 struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
1028 {
1029  template<typename Dest>
1030  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1031  {
1032  transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1033  }
1034 };
1035 
1036 
1037 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1038 struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1039 {
1040  template<typename Dest>
1041  static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1042  {
1043  transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1044  }
1045 };
1046 
1047 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1048 struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
1049 {
1050  template<typename Dest>
1051  static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1052  {
1053  transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1054  }
1055 };
1056 
1057 } // end namespace internal
1058 
1059 } // end namespace Eigen
1060 
1061 #endif // EIGEN_PRODUCT_EVALUATORS_H
Definition: Constants.h:320
Definition: LDLT.h:16
Definition: Constants.h:230
const unsigned int RowMajorBit
Definition: Constants.h:61
const unsigned int PacketAccessBit
Definition: Constants.h:88
Definition: Constants.h:335
Definition: Constants.h:322
Definition: Eigen_Colamd.h:54
const unsigned int EvalBeforeNestingBit
Definition: Constants.h:65
const unsigned int ActualPacketAccessBit
Definition: Constants.h:99
const unsigned int LinearAccessBit
Definition: Constants.h:124
Definition: Constants.h:333