11 #ifndef EIGEN_COMPLEX_NEON_H 12 #define EIGEN_COMPLEX_NEON_H 18 inline uint32x4_t p4ui_CONJ_XOR() {
21 uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
24 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
25 return vld1q_u32( conj_XOR_DATA );
29 inline uint32x2_t p2ui_CONJ_XOR() {
30 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
31 return vld1_u32( conj_XOR_DATA );
37 EIGEN_STRONG_INLINE Packet2cf() {}
38 EIGEN_STRONG_INLINE
explicit Packet2cf(
const Packet4f& a) : v(a) {}
42 template<>
struct packet_traits<
std::complex<float> > : default_packet_traits
44 typedef Packet2cf type;
45 typedef Packet2cf half;
65 template<>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
enum {size=2, alignment=
Aligned16};
typedef Packet2cf half; };
67 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(
const std::complex<float>& from)
70 r64 = vld1_f32((
float *)&from);
72 return Packet2cf(vcombine_f32(r64, r64));
75 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(padd<Packet4f>(a.v,b.v)); }
76 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(psub<Packet4f>(a.v,b.v)); }
77 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(
const Packet2cf& a) {
return Packet2cf(pnegate<Packet4f>(a.v)); }
78 template<> EIGEN_STRONG_INLINE Packet2cf pconj(
const Packet2cf& a)
80 Packet4ui b = vreinterpretq_u32_f32(a.v);
81 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
84 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
89 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
91 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
93 v1 = vmulq_f32(v1, b.v);
95 v2 = vmulq_f32(v2, b.v);
97 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
101 return Packet2cf(vaddq_f32(v1, v2));
104 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
106 return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
108 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
110 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
112 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
114 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
116 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
118 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
121 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cf(pload<Packet4f>((
const float*)from)); }
122 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cf(ploadu<Packet4f>((
const float*)from)); }
124 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(
const std::complex<float>* from) {
return pset1<Packet2cf>(*from); }
126 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
float*)to, from.v); }
127 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
float*)to, from.v); }
129 template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from,
Index stride)
131 Packet4f res = pset1<Packet4f>(0.f);
132 res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
133 res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
134 res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
135 res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
136 return Packet2cf(res);
139 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from,
Index stride)
141 to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
142 to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
145 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<float> >(
const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((
float *)addr); }
147 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(
const Packet2cf& a)
149 std::complex<float> EIGEN_ALIGN16 x[2];
150 vst1q_f32((
float *)x, a.v);
154 template<> EIGEN_STRONG_INLINE Packet2cf preverse(
const Packet2cf& a)
156 float32x2_t a_lo, a_hi;
159 a_lo = vget_low_f32(a.v);
160 a_hi = vget_high_f32(a.v);
161 a_r128 = vcombine_f32(a_hi, a_lo);
163 return Packet2cf(a_r128);
166 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(
const Packet2cf& a)
168 return Packet2cf(vrev64q_f32(a.v));
171 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(
const Packet2cf& a)
174 std::complex<float> s;
176 a1 = vget_low_f32(a.v);
177 a2 = vget_high_f32(a.v);
178 a2 = vadd_f32(a1, a2);
179 vst1_f32((
float *)&s, a2);
184 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(
const Packet2cf* vecs)
186 Packet4f sum1, sum2, sum;
189 sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
190 sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
191 sum = vaddq_f32(sum1, sum2);
193 return Packet2cf(sum);
196 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(
const Packet2cf& a)
198 float32x2_t a1, a2, v1, v2, prod;
199 std::complex<float> s;
201 a1 = vget_low_f32(a.v);
202 a2 = vget_high_f32(a.v);
204 v1 = vdup_lane_f32(a1, 0);
206 v2 = vdup_lane_f32(a1, 1);
208 v1 = vmul_f32(v1, a2);
210 v2 = vmul_f32(v2, a2);
212 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
216 prod = vadd_f32(v1, v2);
218 vst1_f32((
float *)&s, prod);
224 struct palign_impl<Offset,Packet2cf>
226 EIGEN_STRONG_INLINE
static void run(Packet2cf& first,
const Packet2cf& second)
230 first.v = vextq_f32(first.v, second.v, 2);
235 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
237 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 238 {
return padd(pmul(x,y),c); }
240 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 242 return internal::pmul(a, pconj(b));
246 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
248 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 249 {
return padd(pmul(x,y),c); }
251 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 253 return internal::pmul(pconj(a), b);
257 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
259 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 260 {
return padd(pmul(x,y),c); }
262 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 264 return pconj(internal::pmul(a, b));
268 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
270 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
273 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
277 s = vmulq_f32(b.v, b.v);
278 rev_s = vrev64q_f32(s);
280 return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
283 EIGEN_DEVICE_FUNC
inline void 284 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
285 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
286 kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
287 kernel.packet[1].v = tmp;
291 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG 295 static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
297 const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
298 static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
303 EIGEN_STRONG_INLINE Packet1cd() {}
304 EIGEN_STRONG_INLINE
explicit Packet1cd(
const Packet2d& a) : v(a) {}
308 template<>
struct packet_traits<
std::complex<double> > : default_packet_traits
310 typedef Packet1cd type;
311 typedef Packet1cd half;
331 template<>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
enum {size=1, alignment=
Aligned16};
typedef Packet1cd half; };
333 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet1cd(pload<Packet2d>((
const double*)from)); }
334 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet1cd(ploadu<Packet2d>((
const double*)from)); }
336 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(
const std::complex<double>& from)
337 {
return ploadu<Packet1cd>(&from); }
339 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(padd<Packet2d>(a.v,b.v)); }
340 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(psub<Packet2d>(a.v,b.v)); }
341 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(
const Packet1cd& a) {
return Packet1cd(pnegate<Packet2d>(a.v)); }
342 template<> EIGEN_STRONG_INLINE Packet1cd pconj(
const Packet1cd& a) {
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
344 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
349 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
351 v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
353 v1 = vmulq_f64(v1, b.v);
355 v2 = vmulq_f64(v2, b.v);
357 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
359 v2 = preverse<Packet2d>(v2);
361 return Packet1cd(vaddq_f64(v1, v2));
364 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
366 return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
368 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
370 return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
372 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
374 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
376 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
378 return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
381 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(
const std::complex<double>* from) {
return pset1<Packet1cd>(*from); }
383 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v); }
384 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, from.v); }
386 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<double> >(
const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((
double *)addr); }
388 template<> EIGEN_DEVICE_FUNC
inline Packet1cd pgather<std::complex<double>, Packet1cd>(
const std::complex<double>* from,
Index stride)
390 Packet2d res = pset1<Packet2d>(0.0);
391 res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
392 res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
393 return Packet1cd(res);
396 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to,
const Packet1cd& from,
Index stride)
398 to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
402 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(
const Packet1cd& a)
404 std::complex<double> EIGEN_ALIGN16 res;
405 pstore<std::complex<double> >(&res, a);
410 template<> EIGEN_STRONG_INLINE Packet1cd preverse(
const Packet1cd& a) {
return a; }
412 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
414 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(
const Packet1cd* vecs) {
return vecs[0]; }
416 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
419 struct palign_impl<Offset,Packet1cd>
421 static EIGEN_STRONG_INLINE
void run(Packet1cd& ,
const Packet1cd& )
428 template<>
struct conj_helper<Packet1cd, Packet1cd, false,true>
430 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const 431 {
return padd(pmul(x,y),c); }
433 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const 435 return internal::pmul(a, pconj(b));
439 template<>
struct conj_helper<Packet1cd, Packet1cd, true,false>
441 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const 442 {
return padd(pmul(x,y),c); }
444 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const 446 return internal::pmul(pconj(a), b);
450 template<>
struct conj_helper<Packet1cd, Packet1cd, true,true>
452 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const 453 {
return padd(pmul(x,y),c); }
455 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const 457 return pconj(internal::pmul(a, b));
461 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
463 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
466 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
467 Packet2d s = pmul<Packet2d>(b.v, b.v);
468 Packet2d rev_s = preverse<Packet2d>(s);
470 return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
473 EIGEN_STRONG_INLINE Packet1cd pcplxflip(
const Packet1cd& x)
475 return Packet1cd(preverse(Packet2d(x.v)));
478 EIGEN_STRONG_INLINE
void ptranspose(PacketBlock<Packet1cd,2>& kernel)
480 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
481 kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
482 kernel.packet[1].v = tmp;
484 #endif // EIGEN_ARCH_ARM64 490 #endif // EIGEN_COMPLEX_NEON_H Definition: Constants.h:230
Namespace containing all symbols from the Eigen library.
Definition: Core:287
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
Definition: Eigen_Colamd.h:50