10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 27 template<
typename Derived,
typename Device>
30 typedef typename Derived::Index Index;
31 typedef typename Derived::Scalar Scalar;
32 typedef typename Derived::Scalar CoeffReturnType;
33 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
34 typedef typename Derived::Dimensions Dimensions;
37 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
38 internal::traits<Derived>::NumDimensions : 0;
41 IsAligned = Derived::IsAligned,
42 PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
43 Layout = Derived::Layout,
44 CoordAccess = NumCoords > 0,
49 : m_data(
const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type
>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m)
53 const Derived& derived()
const {
return m_impl; }
54 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
56 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
58 m_device.memcpy((
void*)dest, m_data,
sizeof(Scalar) * m_dims.TotalSize());
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
71 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
76 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
77 PacketReturnType packet(Index index)
const 79 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
82 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
83 void writePacket(Index index,
const PacketReturnType& x)
85 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
88 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
90 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
91 return m_data[m_dims.IndexOfColMajor(coords)];
93 return m_data[m_dims.IndexOfRowMajor(coords)];
97 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(
const array<DenseIndex, NumCoords>& coords) {
99 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
100 return m_data[m_dims.IndexOfColMajor(coords)];
102 return m_data[m_dims.IndexOfRowMajor(coords)];
106 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
107 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
108 internal::unpacket_traits<PacketReturnType>::size);
111 EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::template MakePointer<Scalar>::Type data()
const {
return m_data; }
114 const Device&
device()
const{
return m_device;}
117 typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data;
119 const Device& m_device;
120 const Derived& m_impl;
124 template <
typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
125 T loadConstant(
const T* address) {
129 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 130 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
131 float loadConstant(
const float* address) {
132 return __ldg(address);
134 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
135 double loadConstant(
const double* address) {
136 return __ldg(address);
138 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
139 Eigen::half loadConstant(
const Eigen::half* address) {
140 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
147 template<
typename Derived,
typename Device>
150 typedef typename Derived::Index Index;
151 typedef typename Derived::Scalar Scalar;
152 typedef typename Derived::Scalar CoeffReturnType;
153 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
154 typedef typename Derived::Dimensions Dimensions;
157 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
158 internal::traits<Derived>::NumDimensions : 0;
161 IsAligned = Derived::IsAligned,
162 PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
163 Layout = Derived::Layout,
164 CoordAccess = NumCoords > 0,
169 const Derived& derived()
const {
return m_impl; }
172 : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
175 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
177 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType* data) {
178 if (!NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
179 m_device.memcpy((
void*)data, m_data, m_dims.TotalSize() *
sizeof(Scalar));
185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
187 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
188 eigen_assert(m_data);
189 return loadConstant(m_data+index);
192 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
193 PacketReturnType packet(Index index)
const 195 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
198 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
199 eigen_assert(m_data);
200 const Index index = (
static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
201 : m_dims.IndexOfRowMajor(coords);
202 return loadConstant(m_data+index);
205 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
206 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
207 internal::unpacket_traits<PacketReturnType>::size);
210 EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data()
const {
return m_data; }
213 const Device&
device()
const{
return m_device;}
216 typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data;
218 const Device& m_device;
219 const Derived& m_impl;
227 template<
typename NullaryOp,
typename ArgType,
typename Device>
228 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
230 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
234 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
242 : m_functor(op.functor()), m_argImpl(op.nestedExpression(),
device), m_wrapper()
245 typedef typename XprType::Index Index;
246 typedef typename XprType::Scalar Scalar;
247 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
248 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
249 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
252 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
254 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
return true; }
255 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
257 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 259 return m_wrapper(m_functor, index);
262 template<
int LoadMode>
263 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 265 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
268 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
269 costPerCoeff(
bool vectorized)
const {
270 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
271 internal::unpacket_traits<PacketReturnType>::size);
274 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
279 NullaryOp functor()
const {
return m_functor; }
283 const NullaryOp m_functor;
285 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
292 template<
typename UnaryOp,
typename ArgType,
typename Device>
293 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
295 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
306 : m_functor(op.functor()),
307 m_argImpl(op.nestedExpression(),
device)
310 typedef typename XprType::Index Index;
311 typedef typename XprType::Scalar Scalar;
312 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
313 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
314 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
317 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
319 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(Scalar*) {
320 m_argImpl.evalSubExprsIfNeeded(NULL);
323 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
327 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 329 return m_functor(m_argImpl.coeff(index));
332 template<
int LoadMode>
333 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 335 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
338 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
339 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
340 return m_argImpl.costPerCoeff(vectorized) +
341 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
344 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
349 UnaryOp functor()
const {
return m_functor; }
353 const UnaryOp m_functor;
360 template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
361 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
363 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
368 internal::functor_traits<BinaryOp>::PacketAccess,
375 : m_functor(op.functor()),
376 m_leftImpl(op.lhsExpression(),
device),
377 m_rightImpl(op.rhsExpression(),
device)
380 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
383 typedef typename XprType::Index Index;
384 typedef typename XprType::Scalar Scalar;
385 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
386 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
387 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
390 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 393 return m_leftImpl.dimensions();
396 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
397 m_leftImpl.evalSubExprsIfNeeded(NULL);
398 m_rightImpl.evalSubExprsIfNeeded(NULL);
401 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
402 m_leftImpl.cleanup();
403 m_rightImpl.cleanup();
406 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 408 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
410 template<
int LoadMode>
411 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 413 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
416 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
417 costPerCoeff(
bool vectorized)
const {
418 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
419 return m_leftImpl.costPerCoeff(vectorized) +
420 m_rightImpl.costPerCoeff(vectorized) +
421 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
424 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
430 BinaryOp functor()
const {
return m_functor; }
433 const BinaryOp m_functor;
440 template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
441 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
443 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
448 internal::functor_traits<TernaryOp>::PacketAccess,
455 : m_functor(op.functor()),
456 m_arg1Impl(op.arg1Expression(),
device),
457 m_arg2Impl(op.arg2Expression(),
device),
458 m_arg3Impl(op.arg3Expression(),
device)
462 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
463 typename internal::traits<Arg2Type>::StorageKind>::value),
464 STORAGE_KIND_MUST_MATCH)
465 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
466 typename internal::traits<Arg3Type>::StorageKind>::value),
467 STORAGE_KIND_MUST_MATCH)
468 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
469 typename internal::traits<Arg2Type>::Index>::value),
470 STORAGE_INDEX_MUST_MATCH)
471 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
472 typename internal::traits<Arg3Type>::Index>::value),
473 STORAGE_INDEX_MUST_MATCH)
475 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
478 typedef typename XprType::Index Index;
479 typedef typename XprType::Scalar Scalar;
480 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
481 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
482 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
485 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 488 return m_arg1Impl.dimensions();
491 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
492 m_arg1Impl.evalSubExprsIfNeeded(NULL);
493 m_arg2Impl.evalSubExprsIfNeeded(NULL);
494 m_arg3Impl.evalSubExprsIfNeeded(NULL);
497 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
498 m_arg1Impl.cleanup();
499 m_arg2Impl.cleanup();
500 m_arg3Impl.cleanup();
503 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 505 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
507 template<
int LoadMode>
508 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 510 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
511 m_arg2Impl.template packet<LoadMode>(index),
512 m_arg3Impl.template packet<LoadMode>(index));
515 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
516 costPerCoeff(
bool vectorized)
const {
517 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
518 return m_arg1Impl.costPerCoeff(vectorized) +
519 m_arg2Impl.costPerCoeff(vectorized) +
520 m_arg3Impl.costPerCoeff(vectorized) +
521 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
524 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
534 const TernaryOp m_functor;
543 template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
544 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
546 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
547 typedef typename XprType::Scalar Scalar;
552 internal::packet_traits<Scalar>::HasBlend,
559 : m_condImpl(op.ifExpression(),
device),
560 m_thenImpl(op.thenExpression(),
device),
561 m_elseImpl(op.elseExpression(),
device)
565 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
566 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
569 typedef typename XprType::Index Index;
570 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
571 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
572 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
575 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 578 return m_condImpl.dimensions();
581 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
582 m_condImpl.evalSubExprsIfNeeded(NULL);
583 m_thenImpl.evalSubExprsIfNeeded(NULL);
584 m_elseImpl.evalSubExprsIfNeeded(NULL);
587 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
588 m_condImpl.cleanup();
589 m_thenImpl.cleanup();
590 m_elseImpl.cleanup();
593 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 595 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
597 template<
int LoadMode>
598 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index)
const 600 internal::Selector<PacketSize> select;
601 for (Index i = 0; i < PacketSize; ++i) {
602 select.select[i] = m_condImpl.coeff(index+i);
604 return internal::pblend(select,
605 m_thenImpl.template packet<LoadMode>(index),
606 m_elseImpl.template packet<LoadMode>(index));
609 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
610 costPerCoeff(
bool vectorized)
const {
611 return m_condImpl.costPerCoeff(vectorized) +
612 m_thenImpl.costPerCoeff(vectorized)
613 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
616 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data()
const {
return NULL; }
633 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H Namespace containing all symbols from the Eigen library.
Definition: AdolcForward:45
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
const Device & device() const
required by sycl in order to construct sycl buffer from raw pointer
Definition: TensorEvaluator.h:114