70 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H
71 #define INCLUDED_volk_32f_binary_slicer_32i_H
74 #ifdef LV_HAVE_GENERIC
78 unsigned int num_points)
81 const float* aPtr = aVector;
82 unsigned int number = 0;
84 for (number = 0; number < num_points; number++) {
95 #ifdef LV_HAVE_GENERIC
99 unsigned int num_points)
102 const float* aPtr = aVector;
103 unsigned int number = 0;
105 for (number = 0; number < num_points; number++) {
106 *cPtr++ = (*aPtr++ >= 0);
113 #include <emmintrin.h>
116 const float* aVector,
117 unsigned int num_points)
120 const float* aPtr = aVector;
121 unsigned int number = 0;
123 unsigned int quarter_points = num_points / 4;
125 __m128i res_i, binary_i;
127 zero_val = _mm_set1_ps(0.0f);
129 for (number = 0; number < quarter_points; number++) {
130 a_val = _mm_load_ps(aPtr);
132 res_f = _mm_cmpge_ps(a_val, zero_val);
133 res_i = _mm_cvtps_epi32(res_f);
134 binary_i = _mm_srli_epi32(res_i, 31);
136 _mm_store_si128((__m128i*)cPtr, binary_i);
142 for (number = quarter_points * 4; number < num_points; number++) {
154 #include <immintrin.h>
157 const float* aVector,
158 unsigned int num_points)
161 const float* aPtr = aVector;
162 unsigned int number = 0;
164 unsigned int quarter_points = num_points / 8;
165 __m256 a_val, res_f, binary_f;
167 __m256 zero_val, one_val;
168 zero_val = _mm256_set1_ps(0.0f);
169 one_val = _mm256_set1_ps(1.0f);
171 for (number = 0; number < quarter_points; number++) {
172 a_val = _mm256_load_ps(aPtr);
174 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
175 binary_f = _mm256_and_ps(res_f, one_val);
176 binary_i = _mm256_cvtps_epi32(binary_f);
178 _mm256_store_si256((__m256i*)cPtr, binary_i);
184 for (number = quarter_points * 8; number < num_points; number++) {
196 #include <emmintrin.h>
199 const float* aVector,
200 unsigned int num_points)
203 const float* aPtr = aVector;
204 unsigned int number = 0;
206 unsigned int quarter_points = num_points / 4;
208 __m128i res_i, binary_i;
210 zero_val = _mm_set1_ps(0.0f);
212 for (number = 0; number < quarter_points; number++) {
213 a_val = _mm_loadu_ps(aPtr);
215 res_f = _mm_cmpge_ps(a_val, zero_val);
216 res_i = _mm_cvtps_epi32(res_f);
217 binary_i = _mm_srli_epi32(res_i, 31);
219 _mm_storeu_si128((__m128i*)cPtr, binary_i);
225 for (number = quarter_points * 4; number < num_points; number++) {
237 #include <immintrin.h>
240 const float* aVector,
241 unsigned int num_points)
244 const float* aPtr = aVector;
245 unsigned int number = 0;
247 unsigned int quarter_points = num_points / 8;
248 __m256 a_val, res_f, binary_f;
250 __m256 zero_val, one_val;
251 zero_val = _mm256_set1_ps(0.0f);
252 one_val = _mm256_set1_ps(1.0f);
254 for (number = 0; number < quarter_points; number++) {
255 a_val = _mm256_loadu_ps(aPtr);
257 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
258 binary_f = _mm256_and_ps(res_f, one_val);
259 binary_i = _mm256_cvtps_epi32(binary_f);
261 _mm256_storeu_si256((__m256i*)cPtr, binary_i);
267 for (number = quarter_points * 8; number < num_points; number++) {