71 #ifndef INCLUDED_volk_32f_index_min_16u_a_H
72 #define INCLUDED_volk_32f_index_min_16u_a_H
80 #include <immintrin.h>
85 num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
86 const uint32_t eighthPoints = num_points / 8;
88 float* inputPtr = (
float*)source;
90 __m256 indexIncrementValues = _mm256_set1_ps(8);
91 __m256 currentIndexes = _mm256_set_ps(-1, -2, -3, -4, -5, -6, -7, -8);
93 float min = source[0];
95 __m256 minValues = _mm256_set1_ps(min);
96 __m256 minValuesIndex = _mm256_setzero_ps();
97 __m256 compareResults;
103 for (uint32_t number = 0; number < eighthPoints; number++) {
105 currentValues = _mm256_load_ps(inputPtr);
107 currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
109 compareResults = _mm256_cmp_ps(currentValues, minValues, _CMP_LT_OS);
111 minValuesIndex = _mm256_blendv_ps(minValuesIndex, currentIndexes, compareResults);
112 minValues = _mm256_blendv_ps(minValues, currentValues, compareResults);
116 _mm256_store_ps(minValuesBuffer, minValues);
117 _mm256_store_ps(minIndexesBuffer, minValuesIndex);
119 for (uint32_t number = 0; number < 8; number++) {
120 if (minValuesBuffer[number] < min) {
121 index = minIndexesBuffer[number];
122 min = minValuesBuffer[number];
123 }
else if (minValuesBuffer[number] == min) {
124 if (index > minIndexesBuffer[number])
125 index = minIndexesBuffer[number];
129 for (uint32_t number = eighthPoints * 8; number < num_points; number++) {
130 if (source[number] < min) {
132 min = source[number];
135 target[0] = (uint16_t)index;
140 #ifdef LV_HAVE_SSE4_1
141 #include <smmintrin.h>
143 static inline void volk_32f_index_min_16u_a_sse4_1(uint16_t* target,
147 num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
148 const uint32_t quarterPoints = num_points / 4;
150 float* inputPtr = (
float*)source;
152 __m128 indexIncrementValues = _mm_set1_ps(4);
153 __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
155 float min = source[0];
157 __m128 minValues = _mm_set1_ps(min);
158 __m128 minValuesIndex = _mm_setzero_ps();
159 __m128 compareResults;
160 __m128 currentValues;
165 for (uint32_t number = 0; number < quarterPoints; number++) {
167 currentValues = _mm_load_ps(inputPtr);
169 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
171 compareResults = _mm_cmplt_ps(currentValues, minValues);
173 minValuesIndex = _mm_blendv_ps(minValuesIndex, currentIndexes, compareResults);
174 minValues = _mm_blendv_ps(minValues, currentValues, compareResults);
178 _mm_store_ps(minValuesBuffer, minValues);
179 _mm_store_ps(minIndexesBuffer, minValuesIndex);
181 for (uint32_t number = 0; number < 4; number++) {
182 if (minValuesBuffer[number] < min) {
183 index = minIndexesBuffer[number];
184 min = minValuesBuffer[number];
185 }
else if (minValuesBuffer[number] == min) {
186 if (index > minIndexesBuffer[number])
187 index = minIndexesBuffer[number];
191 for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
192 if (source[number] < min) {
194 min = source[number];
197 target[0] = (uint16_t)index;
205 #include <xmmintrin.h>
210 num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
211 const uint32_t quarterPoints = num_points / 4;
213 float* inputPtr = (
float*)source;
215 __m128 indexIncrementValues = _mm_set1_ps(4);
216 __m128 currentIndexes = _mm_set_ps(-1, -2, -3, -4);
218 float min = source[0];
220 __m128 minValues = _mm_set1_ps(min);
221 __m128 minValuesIndex = _mm_setzero_ps();
222 __m128 compareResults;
223 __m128 currentValues;
228 for (uint32_t number = 0; number < quarterPoints; number++) {
230 currentValues = _mm_load_ps(inputPtr);
232 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
234 compareResults = _mm_cmplt_ps(currentValues, minValues);
236 minValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, currentIndexes),
237 _mm_andnot_ps(compareResults, minValuesIndex));
238 minValues = _mm_or_ps(_mm_and_ps(compareResults, currentValues),
239 _mm_andnot_ps(compareResults, minValues));
243 _mm_store_ps(minValuesBuffer, minValues);
244 _mm_store_ps(minIndexesBuffer, minValuesIndex);
246 for (uint32_t number = 0; number < 4; number++) {
247 if (minValuesBuffer[number] < min) {
248 index = minIndexesBuffer[number];
249 min = minValuesBuffer[number];
250 }
else if (minValuesBuffer[number] == min) {
251 if (index > minIndexesBuffer[number])
252 index = minIndexesBuffer[number];
256 for (uint32_t number = quarterPoints * 4; number < num_points; number++) {
257 if (source[number] < min) {
259 min = source[number];
262 target[0] = (uint16_t)index;
268 #ifdef LV_HAVE_GENERIC
273 num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
275 float min = source[0];
278 for (uint32_t
i = 1;
i < num_points; ++
i) {
279 if (source[
i] < min) {
293 #ifndef INCLUDED_volk_32f_index_min_16u_u_H
294 #define INCLUDED_volk_32f_index_min_16u_u_H
296 #include <inttypes.h>
302 #include <immintrin.h>
307 num_points = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
308 const uint32_t eighthPoints = num_points / 8;
310 float* inputPtr = (
float*)source;
312 __m256 indexIncrementValues = _mm256_set1_ps(8);
313 __m256 currentIndexes = _mm256_set_ps(-1, -2, -3, -4, -5, -6, -7, -8);
315 float min = source[0];
317 __m256 minValues = _mm256_set1_ps(min);
318 __m256 minValuesIndex = _mm256_setzero_ps();
319 __m256 compareResults;
320 __m256 currentValues;
325 for (uint32_t number = 0; number < eighthPoints; number++) {
327 currentValues = _mm256_loadu_ps(inputPtr);
329 currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
331 compareResults = _mm256_cmp_ps(currentValues, minValues, _CMP_LT_OS);
333 minValuesIndex = _mm256_blendv_ps(minValuesIndex, currentIndexes, compareResults);
334 minValues = _mm256_blendv_ps(minValues, currentValues, compareResults);
338 _mm256_storeu_ps(minValuesBuffer, minValues);
339 _mm256_storeu_ps(minIndexesBuffer, minValuesIndex);
341 for (uint32_t number = 0; number < 8; number++) {
342 if (minValuesBuffer[number] < min) {
343 index = minIndexesBuffer[number];
344 min = minValuesBuffer[number];
345 }
else if (minValuesBuffer[number] == min) {
346 if (index > minIndexesBuffer[number])
347 index = minIndexesBuffer[number];
351 for (uint32_t number = eighthPoints * 8; number < num_points; number++) {
352 if (source[number] < min) {
354 min = source[number];
357 target[0] = (uint16_t)index;
static void volk_32f_index_min_16u_a_avx(uint16_t *target, const float *source, uint32_t num_points)
Definition: volk_32f_index_min_16u.h:83
static void volk_32f_index_min_16u_generic(uint16_t *target, const float *source, uint32_t num_points)
Definition: volk_32f_index_min_16u.h:271
static void volk_32f_index_min_16u_a_sse(uint16_t *target, const float *source, uint32_t num_points)
Definition: volk_32f_index_min_16u.h:208
static void volk_32f_index_min_16u_u_avx(uint16_t *target, const float *source, uint32_t num_points)
Definition: volk_32f_index_min_16u.h:305
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
for i
Definition: volk_config_fixed.tmpl.h:25