Vector Optimized Library of Kernels  2.5.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_64f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
74 #define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
75 
76 #include <inttypes.h>
77 #include <stdio.h>
78 
79 #ifdef LV_HAVE_AVX
80 #include <immintrin.h>
81 
82 static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer,
83  double* qBuffer,
84  const lv_32fc_t* complexVector,
85  unsigned int num_points)
86 {
87  unsigned int number = 0;
88 
89  const float* complexVectorPtr = (float*)complexVector;
90  double* iBufferPtr = iBuffer;
91  double* qBufferPtr = qBuffer;
92 
93  const unsigned int quarterPoints = num_points / 4;
94  __m256 cplxValue;
95  __m128 complexH, complexL, fVal;
96  __m256d dVal;
97 
98  for (; number < quarterPoints; number++) {
99 
100  cplxValue = _mm256_loadu_ps(complexVectorPtr);
101  complexVectorPtr += 8;
102 
103  complexH = _mm256_extractf128_ps(cplxValue, 1);
104  complexL = _mm256_extractf128_ps(cplxValue, 0);
105 
106  // Arrange in i1i2i1i2 format
107  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
108  dVal = _mm256_cvtps_pd(fVal);
109  _mm256_storeu_pd(iBufferPtr, dVal);
110 
111  // Arrange in q1q2q1q2 format
112  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
113  dVal = _mm256_cvtps_pd(fVal);
114  _mm256_storeu_pd(qBufferPtr, dVal);
115 
116  iBufferPtr += 4;
117  qBufferPtr += 4;
118  }
119 
120  number = quarterPoints * 4;
121  for (; number < num_points; number++) {
122  *iBufferPtr++ = *complexVectorPtr++;
123  *qBufferPtr++ = *complexVectorPtr++;
124  }
125 }
126 #endif /* LV_HAVE_AVX */
127 
128 #ifdef LV_HAVE_SSE2
129 #include <emmintrin.h>
130 
131 static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer,
132  double* qBuffer,
133  const lv_32fc_t* complexVector,
134  unsigned int num_points)
135 {
136  unsigned int number = 0;
137 
138  const float* complexVectorPtr = (float*)complexVector;
139  double* iBufferPtr = iBuffer;
140  double* qBufferPtr = qBuffer;
141 
142  const unsigned int halfPoints = num_points / 2;
143  __m128 cplxValue, fVal;
144  __m128d dVal;
145 
146  for (; number < halfPoints; number++) {
147 
148  cplxValue = _mm_loadu_ps(complexVectorPtr);
149  complexVectorPtr += 4;
150 
151  // Arrange in i1i2i1i2 format
152  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
153  dVal = _mm_cvtps_pd(fVal);
154  _mm_storeu_pd(iBufferPtr, dVal);
155 
156  // Arrange in q1q2q1q2 format
157  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
158  dVal = _mm_cvtps_pd(fVal);
159  _mm_storeu_pd(qBufferPtr, dVal);
160 
161  iBufferPtr += 2;
162  qBufferPtr += 2;
163  }
164 
165  number = halfPoints * 2;
166  for (; number < num_points; number++) {
167  *iBufferPtr++ = *complexVectorPtr++;
168  *qBufferPtr++ = *complexVectorPtr++;
169  }
170 }
171 #endif /* LV_HAVE_SSE */
172 
173 #ifdef LV_HAVE_GENERIC
174 
175 static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer,
176  double* qBuffer,
177  const lv_32fc_t* complexVector,
178  unsigned int num_points)
179 {
180  unsigned int number = 0;
181  const float* complexVectorPtr = (float*)complexVector;
182  double* iBufferPtr = iBuffer;
183  double* qBufferPtr = qBuffer;
184 
185  for (number = 0; number < num_points; number++) {
186  *iBufferPtr++ = (double)*complexVectorPtr++;
187  *qBufferPtr++ = (double)*complexVectorPtr++;
188  }
189 }
190 #endif /* LV_HAVE_GENERIC */
191 
192 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
193 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
194 #define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
195 
196 #include <inttypes.h>
197 #include <stdio.h>
198 
199 #ifdef LV_HAVE_AVX
200 #include <immintrin.h>
201 
202 static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer,
203  double* qBuffer,
204  const lv_32fc_t* complexVector,
205  unsigned int num_points)
206 {
207  unsigned int number = 0;
208 
209  const float* complexVectorPtr = (float*)complexVector;
210  double* iBufferPtr = iBuffer;
211  double* qBufferPtr = qBuffer;
212 
213  const unsigned int quarterPoints = num_points / 4;
214  __m256 cplxValue;
215  __m128 complexH, complexL, fVal;
216  __m256d dVal;
217 
218  for (; number < quarterPoints; number++) {
219 
220  cplxValue = _mm256_load_ps(complexVectorPtr);
221  complexVectorPtr += 8;
222 
223  complexH = _mm256_extractf128_ps(cplxValue, 1);
224  complexL = _mm256_extractf128_ps(cplxValue, 0);
225 
226  // Arrange in i1i2i1i2 format
227  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
228  dVal = _mm256_cvtps_pd(fVal);
229  _mm256_store_pd(iBufferPtr, dVal);
230 
231  // Arrange in q1q2q1q2 format
232  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
233  dVal = _mm256_cvtps_pd(fVal);
234  _mm256_store_pd(qBufferPtr, dVal);
235 
236  iBufferPtr += 4;
237  qBufferPtr += 4;
238  }
239 
240  number = quarterPoints * 4;
241  for (; number < num_points; number++) {
242  *iBufferPtr++ = *complexVectorPtr++;
243  *qBufferPtr++ = *complexVectorPtr++;
244  }
245 }
246 #endif /* LV_HAVE_AVX */
247 
248 #ifdef LV_HAVE_SSE2
249 #include <emmintrin.h>
250 
251 static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer,
252  double* qBuffer,
253  const lv_32fc_t* complexVector,
254  unsigned int num_points)
255 {
256  unsigned int number = 0;
257 
258  const float* complexVectorPtr = (float*)complexVector;
259  double* iBufferPtr = iBuffer;
260  double* qBufferPtr = qBuffer;
261 
262  const unsigned int halfPoints = num_points / 2;
263  __m128 cplxValue, fVal;
264  __m128d dVal;
265 
266  for (; number < halfPoints; number++) {
267 
268  cplxValue = _mm_load_ps(complexVectorPtr);
269  complexVectorPtr += 4;
270 
271  // Arrange in i1i2i1i2 format
272  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
273  dVal = _mm_cvtps_pd(fVal);
274  _mm_store_pd(iBufferPtr, dVal);
275 
276  // Arrange in q1q2q1q2 format
277  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
278  dVal = _mm_cvtps_pd(fVal);
279  _mm_store_pd(qBufferPtr, dVal);
280 
281  iBufferPtr += 2;
282  qBufferPtr += 2;
283  }
284 
285  number = halfPoints * 2;
286  for (; number < num_points; number++) {
287  *iBufferPtr++ = *complexVectorPtr++;
288  *qBufferPtr++ = *complexVectorPtr++;
289  }
290 }
291 #endif /* LV_HAVE_SSE */
292 
293 #ifdef LV_HAVE_GENERIC
294 
295 static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer,
296  double* qBuffer,
297  const lv_32fc_t* complexVector,
298  unsigned int num_points)
299 {
300  unsigned int number = 0;
301  const float* complexVectorPtr = (float*)complexVector;
302  double* iBufferPtr = iBuffer;
303  double* qBufferPtr = qBuffer;
304 
305  for (number = 0; number < num_points; number++) {
306  *iBufferPtr++ = (double)*complexVectorPtr++;
307  *qBufferPtr++ = (double)*complexVectorPtr++;
308  }
309 }
310 #endif /* LV_HAVE_GENERIC */
311 
312 #ifdef LV_HAVE_NEONV8
313 #include <arm_neon.h>
314 
315 static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer,
316  double* qBuffer,
317  const lv_32fc_t* complexVector,
318  unsigned int num_points)
319 {
320  unsigned int number = 0;
321  unsigned int half_points = num_points / 2;
322  const float* complexVectorPtr = (float*)complexVector;
323  double* iBufferPtr = iBuffer;
324  double* qBufferPtr = qBuffer;
325  float32x2x2_t complexInput;
326  float64x2_t iVal, qVal;
327 
328  for (number = 0; number < half_points; number++) {
329  complexInput = vld2_f32(complexVectorPtr);
330 
331  iVal = vcvt_f64_f32(complexInput.val[0]);
332  qVal = vcvt_f64_f32(complexInput.val[1]);
333 
334  vst1q_f64(iBufferPtr, iVal);
335  vst1q_f64(qBufferPtr, qVal);
336 
337  complexVectorPtr += 4;
338  iBufferPtr += 2;
339  qBufferPtr += 2;
340  }
341 
342  for (number = half_points * 2; number < num_points; number++) {
343  *iBufferPtr++ = (double)*complexVectorPtr++;
344  *qBufferPtr++ = (double)*complexVectorPtr++;
345  }
346 }
347 #endif /* LV_HAVE_NEONV8 */
348 
349 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
static void volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:202
static void volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:131
static void volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:175
static void volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:251
static void volk_32fc_deinterleave_64f_x2_a_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:295
static void volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:82
float complex lv_32fc_t
Definition: volk_complex.h:65