70 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H 71 #define INCLUDED_volk_32f_s32f_convert_32i_u_H 78 #include <immintrin.h> 82 const float scalar,
unsigned int num_points)
84 unsigned int number = 0;
86 const unsigned int eighthPoints = num_points / 8;
88 const float* inputVectorPtr = (
const float*)inputVector;
89 int32_t* outputVectorPtr = outputVector;
91 float min_val = INT_MIN;
92 float max_val = INT_MAX;
95 __m256 vScalar = _mm256_set1_ps(scalar);
98 __m256 vmin_val = _mm256_set1_ps(min_val);
99 __m256 vmax_val = _mm256_set1_ps(max_val);
101 for(;number < eighthPoints; number++){
102 inputVal1 = _mm256_loadu_ps(inputVectorPtr); inputVectorPtr += 8;
104 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
105 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
107 _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
108 outputVectorPtr += 8;
111 number = eighthPoints * 8;
112 for(; number < num_points; number++){
113 r = inputVector[number] * scalar;
118 outputVector[number] = (int32_t)
rintf(r);
125 #include <emmintrin.h> 129 const float scalar,
unsigned int num_points)
131 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
135 const float* inputVectorPtr = (
const float*)inputVector;
136 int32_t* outputVectorPtr = outputVector;
138 float min_val = INT_MIN;
139 float max_val = INT_MAX;
142 __m128 vScalar = _mm_set_ps1(scalar);
144 __m128i intInputVal1;
145 __m128 vmin_val = _mm_set_ps1(min_val);
146 __m128 vmax_val = _mm_set_ps1(max_val);
148 for(;number < quarterPoints; number++){
149 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
151 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
152 intInputVal1 = _mm_cvtps_epi32(inputVal1);
154 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
155 outputVectorPtr += 4;
158 number = quarterPoints * 4;
159 for(; number < num_points; number++){
160 r = inputVector[number] * scalar;
165 outputVector[number] = (int32_t)
rintf(r);
173 #include <xmmintrin.h> 177 const float scalar,
unsigned int num_points)
179 unsigned int number = 0;
181 const unsigned int quarterPoints = num_points / 4;
183 const float* inputVectorPtr = (
const float*)inputVector;
184 int32_t* outputVectorPtr = outputVector;
186 float min_val = INT_MIN;
187 float max_val = INT_MAX;
190 __m128 vScalar = _mm_set_ps1(scalar);
192 __m128 vmin_val = _mm_set_ps1(min_val);
193 __m128 vmax_val = _mm_set_ps1(max_val);
197 for(;number < quarterPoints; number++){
198 ret = _mm_loadu_ps(inputVectorPtr);
201 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
203 _mm_store_ps(outputFloatBuffer, ret);
204 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
205 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
206 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
207 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
210 number = quarterPoints * 4;
211 for(; number < num_points; number++){
212 r = inputVector[number] * scalar;
217 outputVector[number] = (int32_t)
rintf(r);
224 #ifdef LV_HAVE_GENERIC 228 const float scalar,
unsigned int num_points)
230 int32_t* outputVectorPtr = outputVector;
231 const float* inputVectorPtr = inputVector;
232 unsigned int number = 0;
233 float min_val = INT_MIN;
234 float max_val = INT_MAX;
237 for(number = 0; number < num_points; number++){
238 r = *inputVectorPtr++ * scalar;
243 *outputVectorPtr++ = (int32_t)
rintf(r);
252 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H 253 #define INCLUDED_volk_32f_s32f_convert_32i_a_H 256 #include <inttypes.h> 260 #include <immintrin.h> 264 const float scalar,
unsigned int num_points)
266 unsigned int number = 0;
268 const unsigned int eighthPoints = num_points / 8;
270 const float* inputVectorPtr = (
const float*)inputVector;
271 int32_t* outputVectorPtr = outputVector;
273 float min_val = INT_MIN;
274 float max_val = INT_MAX;
277 __m256 vScalar = _mm256_set1_ps(scalar);
279 __m256i intInputVal1;
280 __m256 vmin_val = _mm256_set1_ps(min_val);
281 __m256 vmax_val = _mm256_set1_ps(max_val);
283 for(;number < eighthPoints; number++){
284 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
286 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
287 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
289 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
290 outputVectorPtr += 8;
293 number = eighthPoints * 8;
294 for(; number < num_points; number++){
295 r = inputVector[number] * scalar;
300 outputVector[number] = (int32_t)
rintf(r);
308 #include <emmintrin.h> 312 const float scalar,
unsigned int num_points)
314 unsigned int number = 0;
316 const unsigned int quarterPoints = num_points / 4;
318 const float* inputVectorPtr = (
const float*)inputVector;
319 int32_t* outputVectorPtr = outputVector;
321 float min_val = INT_MIN;
322 float max_val = INT_MAX;
325 __m128 vScalar = _mm_set_ps1(scalar);
327 __m128i intInputVal1;
328 __m128 vmin_val = _mm_set_ps1(min_val);
329 __m128 vmax_val = _mm_set_ps1(max_val);
331 for(;number < quarterPoints; number++){
332 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
334 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
335 intInputVal1 = _mm_cvtps_epi32(inputVal1);
337 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
338 outputVectorPtr += 4;
341 number = quarterPoints * 4;
342 for(; number < num_points; number++){
343 r = inputVector[number] * scalar;
348 outputVector[number] = (int32_t)
rintf(r);
356 #include <xmmintrin.h> 360 const float scalar,
unsigned int num_points)
362 unsigned int number = 0;
364 const unsigned int quarterPoints = num_points / 4;
366 const float* inputVectorPtr = (
const float*)inputVector;
367 int32_t* outputVectorPtr = outputVector;
369 float min_val = INT_MIN;
370 float max_val = INT_MAX;
373 __m128 vScalar = _mm_set_ps1(scalar);
375 __m128 vmin_val = _mm_set_ps1(min_val);
376 __m128 vmax_val = _mm_set_ps1(max_val);
380 for(;number < quarterPoints; number++){
381 ret = _mm_load_ps(inputVectorPtr);
384 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
386 _mm_store_ps(outputFloatBuffer, ret);
387 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
388 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
389 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
390 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
393 number = quarterPoints * 4;
394 for(; number < num_points; number++){
395 r = inputVector[number] * scalar;
400 outputVector[number] = (int32_t)
rintf(r);
407 #ifdef LV_HAVE_GENERIC 411 const float scalar,
unsigned int num_points)
413 int32_t* outputVectorPtr = outputVector;
414 const float* inputVectorPtr = inputVector;
415 unsigned int number = 0;
416 float min_val = INT_MIN;
417 float max_val = INT_MAX;
420 for(number = 0; number < num_points; number++){
421 r = *inputVectorPtr++ * scalar;
426 *outputVectorPtr++ = (int32_t)
rintf(r);
static void volk_32f_s32f_convert_32i_u_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:81
static float rintf(float x)
Definition: config.h:31
static void volk_32f_s32f_convert_32i_u_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:128
static void volk_32f_s32f_convert_32i_a_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:311
static void volk_32f_s32f_convert_32i_a_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:263
static void volk_32f_s32f_convert_32i_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:227
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:47
static void volk_32f_s32f_convert_32i_u_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:176
static void volk_32f_s32f_convert_32i_a_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:410
static void volk_32f_s32f_convert_32i_a_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:359