10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H 23 template<
typename TargetType,
typename XprType>
24 struct traits<TensorConversionOp<TargetType, XprType> >
27 typedef TargetType Scalar;
28 typedef typename traits<XprType>::StorageKind StorageKind;
29 typedef typename traits<XprType>::Index Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = traits<XprType>::NumDimensions;
33 static const int Layout = traits<XprType>::Layout;
37 template<
typename TargetType,
typename XprType>
38 struct eval<TensorConversionOp<TargetType, XprType>,
Eigen::Dense>
40 typedef const TensorConversionOp<TargetType, XprType>& type;
43 template<
typename TargetType,
typename XprType>
44 struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
46 typedef TensorConversionOp<TargetType, XprType> type;
52 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket,
int SrcCoeffRatio,
int TgtCoeffRatio>
53 struct PacketConverter {
54 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
55 PacketConverter(
const TensorEvaluator& impl)
58 template<
int LoadMode,
typename Index>
59 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index)
const {
60 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
64 const TensorEvaluator& m_impl;
68 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
69 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
70 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
71 PacketConverter(
const TensorEvaluator& impl)
74 template<
int LoadMode,
typename Index>
75 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index)
const {
76 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
78 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
79 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
80 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
85 const TensorEvaluator& m_impl;
88 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
89 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
90 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
91 PacketConverter(
const TensorEvaluator& impl)
94 template<
int LoadMode,
typename Index>
95 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index)
const {
96 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
98 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
99 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
100 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
101 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
102 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
107 const TensorEvaluator& m_impl;
110 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
111 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
112 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
113 PacketConverter(
const TensorEvaluator& impl)
114 : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
116 template<
int LoadMode,
typename Index>
117 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index)
const {
118 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
122 if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
124 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
126 const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
127 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
128 typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
129 internal::scalar_cast_op<SrcType, TgtType> converter;
130 EIGEN_ALIGN_MAX
typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
131 for (
int i = 0; i < TgtPacketSize; ++i) {
132 values[i] = converter(m_impl.coeff(index+i));
134 TgtPacket rslt = internal::pload<TgtPacket>(values);
140 const TensorEvaluator& m_impl;
141 const typename TensorEvaluator::Index m_maxIndex;
144 template<
typename TargetType,
typename XprType>
148 typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
149 typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
150 typedef typename internal::traits<TensorConversionOp>::Index Index;
151 typedef typename internal::nested<TensorConversionOp>::type Nested;
152 typedef Scalar CoeffReturnType;
153 typedef typename NumTraits<Scalar>::Real RealScalar;
159 const typename internal::remove_all<typename XprType::Nested>::type&
160 expression()
const {
return m_xpr; }
163 typename XprType::Nested m_xpr;
166 template <
bool SameType,
typename Eval,
typename Scalar>
struct ConversionSubExprEval {
167 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool run(Eval& impl, Scalar*) {
168 impl.evalSubExprsIfNeeded(NULL);
173 template <
typename Eval,
typename Scalar>
struct ConversionSubExprEval<true, Eval, Scalar> {
174 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool run(Eval& impl, Scalar* data) {
175 return impl.evalSubExprsIfNeeded(data);
181 template<
typename TargetType,
typename ArgType,
typename Device>
182 struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
185 typedef typename XprType::Index Index;
187 typedef TargetType Scalar;
188 typedef TargetType CoeffReturnType;
189 typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
190 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
191 typedef typename PacketType<SrcType, Device>::type PacketSourceType;
192 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorEvaluator(
const XprType& op,
const Device& device)
202 : m_impl(op.expression(), device)
206 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_impl.dimensions(); }
208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(Scalar* data)
213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup()
218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const 220 internal::scalar_cast_op<SrcType, TargetType> converter;
221 return converter(m_impl.coeff(index));
224 template<
int LoadMode>
225 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 228 internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
229 return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
232 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
233 costPerCoeff(
bool vectorized)
const {
234 const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
236 const double SrcCoeffRatio =
237 internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
238 const double TgtCoeffRatio =
239 internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
240 return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
241 TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
243 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
247 EIGEN_DEVICE_FUNC Scalar* data()
const {
return NULL; }
250 template <
int LoadMode,
bool ActuallyVectorize>
253 internal::scalar_cast_op<SrcType, TargetType> converter;
254 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
255 for (
int i = 0; i < PacketSize; ++i) {
256 values[i] = converter(impl.coeff(index+i));
258 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
263 template <
int LoadMode>
264 struct PacketConv<LoadMode, true> {
266 const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
267 const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
268 PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
269 SrcCoeffRatio, TgtCoeffRatio> converter(impl);
270 return converter.template packet<LoadMode>(index);
279 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H Namespace containing all symbols from the Eigen library.
Definition: AdolcForward:45
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
Definition: TensorConversion.h:145
The tensor base class.
Definition: TensorBase.h:827