28 #ifndef INCLUDE_VOLK_VOLK_AVX2_INTRINSICS_H_ 29 #define INCLUDE_VOLK_VOLK_AVX2_INTRINSICS_H_ 30 #include <immintrin.h> 35 const __m128i zeros = _mm_set1_epi8(0x00);
36 const __m128i sign_extract = _mm_set1_epi8(0x80);
37 const __m256i shuffle_mask = _mm256_setr_epi8(0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x01, 0xff, 0xff, 0xff, 0x02, 0xff, 0xff, 0xff, 0x03,
38 0xff, 0xff, 0xff, 0x04, 0xff, 0xff, 0xff, 0x05, 0xff, 0xff, 0xff, 0x06, 0xff, 0xff, 0xff, 0x07);
39 __m256i sign_bits = _mm256_setzero_si256();
41 fbits = _mm_cmpgt_epi8(fbits, zeros);
42 fbits = _mm_and_si128(fbits, sign_extract);
43 sign_bits = _mm256_insertf128_si256(sign_bits,fbits,0);
44 sign_bits = _mm256_insertf128_si256(sign_bits,fbits,1);
45 sign_bits = _mm256_shuffle_epi8(sign_bits, shuffle_mask);
47 return _mm256_castsi256_ps(sign_bits);
59 llr0 = _mm256_xor_ps(llr0, sign_mask);
60 __m256 dst = _mm256_add_ps(llr0, llr1);
66 const __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
67 const __m256 squared0 = _mm256_mul_ps(cplxValue0, cplxValue0);
68 const __m256 squared1 = _mm256_mul_ps(cplxValue1, cplxValue1);
69 const __m256 complex_result = _mm256_hadd_ps(squared0, squared1);
70 return _mm256_permutevar8x32_ps(complex_result, idx);
80 const __m256 diff0 = _mm256_sub_ps(symbols0, points0);
81 const __m256 diff1 = _mm256_sub_ps(symbols1, points1);
83 return _mm256_mul_ps(norms, scalar);
static __m256 _mm256_polar_sign_mask_avx2(__m128i fbits)
Definition: volk_avx2_intrinsics.h:34
static void _mm256_polar_deinterleave(__m256 *llr0, __m256 *llr1, __m256 src0, __m256 src1)
Definition: volk_avx_intrinsics.h:115
static __m256 _mm256_scaled_norm_dist_ps_avx2(const __m256 symbols0, const __m256 symbols1, const __m256 points0, const __m256 points1, const __m256 scalar)
Definition: volk_avx2_intrinsics.h:74
static __m256 _mm256_magnitudesquared_ps_avx2(const __m256 cplxValue0, const __m256 cplxValue1)
Definition: volk_avx2_intrinsics.h:65
static __m256 _mm256_polar_fsign_add_llrs_avx2(__m256 src0, __m256 src1, __m128i fbits)
Definition: volk_avx2_intrinsics.h:51