Vector Optimized Library of Kernels  2.5.0
Architecture-tuned implementations of math kernels
volk_32f_s32f_normalize.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
70 #ifndef INCLUDED_volk_32f_s32f_normalize_a_H
71 #define INCLUDED_volk_32f_s32f_normalize_a_H
72 
73 #include <inttypes.h>
74 #include <stdio.h>
75 
76 #ifdef LV_HAVE_AVX
77 #include <immintrin.h>
78 
79 static inline void volk_32f_s32f_normalize_a_avx(float* vecBuffer,
80  const float scalar,
81  unsigned int num_points)
82 {
83  unsigned int number = 0;
84  float* inputPtr = vecBuffer;
85 
86  const float invScalar = 1.0 / scalar;
87  __m256 vecScalar = _mm256_set1_ps(invScalar);
88 
89  __m256 input1;
90 
91  const uint64_t eighthPoints = num_points / 8;
92  for (; number < eighthPoints; number++) {
93 
94  input1 = _mm256_load_ps(inputPtr);
95 
96  input1 = _mm256_mul_ps(input1, vecScalar);
97 
98  _mm256_store_ps(inputPtr, input1);
99 
100  inputPtr += 8;
101  }
102 
103  number = eighthPoints * 8;
104  for (; number < num_points; number++) {
105  *inputPtr *= invScalar;
106  inputPtr++;
107  }
108 }
109 #endif /* LV_HAVE_AVX */
110 
111 #ifdef LV_HAVE_SSE
112 #include <xmmintrin.h>
113 
114 static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer,
115  const float scalar,
116  unsigned int num_points)
117 {
118  unsigned int number = 0;
119  float* inputPtr = vecBuffer;
120 
121  const float invScalar = 1.0 / scalar;
122  __m128 vecScalar = _mm_set_ps1(invScalar);
123 
124  __m128 input1;
125 
126  const uint64_t quarterPoints = num_points / 4;
127  for (; number < quarterPoints; number++) {
128 
129  input1 = _mm_load_ps(inputPtr);
130 
131  input1 = _mm_mul_ps(input1, vecScalar);
132 
133  _mm_store_ps(inputPtr, input1);
134 
135  inputPtr += 4;
136  }
137 
138  number = quarterPoints * 4;
139  for (; number < num_points; number++) {
140  *inputPtr *= invScalar;
141  inputPtr++;
142  }
143 }
144 #endif /* LV_HAVE_SSE */
145 
146 #ifdef LV_HAVE_GENERIC
147 
148 static inline void volk_32f_s32f_normalize_generic(float* vecBuffer,
149  const float scalar,
150  unsigned int num_points)
151 {
152  unsigned int number = 0;
153  float* inputPtr = vecBuffer;
154  const float invScalar = 1.0 / scalar;
155  for (number = 0; number < num_points; number++) {
156  *inputPtr *= invScalar;
157  inputPtr++;
158  }
159 }
160 #endif /* LV_HAVE_GENERIC */
161 
162 #ifdef LV_HAVE_ORC
163 
164 extern void volk_32f_s32f_normalize_a_orc_impl(float* dst,
165  float* src,
166  const float scalar,
167  unsigned int num_points);
168 static inline void volk_32f_s32f_normalize_u_orc(float* vecBuffer,
169  const float scalar,
170  unsigned int num_points)
171 {
172  float invscalar = 1.0 / scalar;
173  volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
174 }
175 #endif /* LV_HAVE_GENERIC */
176 
177 #endif /* INCLUDED_volk_32f_s32f_normalize_a_H */
178 
179 #ifndef INCLUDED_volk_32f_s32f_normalize_u_H
180 #define INCLUDED_volk_32f_s32f_normalize_u_H
181 
182 #include <inttypes.h>
183 #include <stdio.h>
184 #ifdef LV_HAVE_AVX
185 #include <immintrin.h>
186 
187 static inline void volk_32f_s32f_normalize_u_avx(float* vecBuffer,
188  const float scalar,
189  unsigned int num_points)
190 {
191  unsigned int number = 0;
192  float* inputPtr = vecBuffer;
193 
194  const float invScalar = 1.0 / scalar;
195  __m256 vecScalar = _mm256_set1_ps(invScalar);
196 
197  __m256 input1;
198 
199  const uint64_t eighthPoints = num_points / 8;
200  for (; number < eighthPoints; number++) {
201 
202  input1 = _mm256_loadu_ps(inputPtr);
203 
204  input1 = _mm256_mul_ps(input1, vecScalar);
205 
206  _mm256_storeu_ps(inputPtr, input1);
207 
208  inputPtr += 8;
209  }
210 
211  number = eighthPoints * 8;
212  for (; number < num_points; number++) {
213  *inputPtr *= invScalar;
214  inputPtr++;
215  }
216 }
217 #endif /* LV_HAVE_AVX */
218 
219 
220 #endif /* INCLUDED_volk_32f_s32f_normalize_u_H */
static void volk_32f_s32f_normalize_a_avx(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:79
static void volk_32f_s32f_normalize_u_avx(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:187
static void volk_32f_s32f_normalize_generic(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:148
static void volk_32f_s32f_normalize_a_sse(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:114