Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_16ic_s32f_magnitude_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
55 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
56 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
57 
58 #include <inttypes.h>
59 #include <math.h>
60 #include <stdio.h>
61 #include <volk/volk_common.h>
62 
63 #ifdef LV_HAVE_AVX2
64 #include <immintrin.h>
65 
66 static inline void volk_16ic_s32f_magnitude_32f_a_avx2(float* magnitudeVector,
67  const lv_16sc_t* complexVector,
68  const float scalar,
69  unsigned int num_points)
70 {
71  unsigned int number = 0;
72  const unsigned int eighthPoints = num_points / 8;
73 
74  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
75  float* magnitudeVectorPtr = magnitudeVector;
76 
77  __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
78 
79  __m256 cplxValue1, cplxValue2, result;
80  __m256i int1, int2;
81  __m128i short1, short2;
82  __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
83 
84  for (; number < eighthPoints; number++) {
85 
86  int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
87  complexVectorPtr += 16;
88  short1 = _mm256_extracti128_si256(int1, 0);
89  short2 = _mm256_extracti128_si256(int1, 1);
90 
91  int1 = _mm256_cvtepi16_epi32(short1);
92  int2 = _mm256_cvtepi16_epi32(short2);
93  cplxValue1 = _mm256_cvtepi32_ps(int1);
94  cplxValue2 = _mm256_cvtepi32_ps(int2);
95 
96  cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
97  cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
98 
99  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
100  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
101 
102  result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
103  result = _mm256_permutevar8x32_ps(result, idx);
104 
105  result = _mm256_sqrt_ps(result); // Square root the values
106 
107  _mm256_store_ps(magnitudeVectorPtr, result);
108 
109  magnitudeVectorPtr += 8;
110  }
111 
112  number = eighthPoints * 8;
113  magnitudeVectorPtr = &magnitudeVector[number];
114  complexVectorPtr = (const int16_t*)&complexVector[number];
115  for (; number < num_points; number++) {
116  float val1Real = (float)(*complexVectorPtr++) / scalar;
117  float val1Imag = (float)(*complexVectorPtr++) / scalar;
118  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
119  }
120 }
121 #endif /* LV_HAVE_AVX2 */
122 
123 
124 #ifdef LV_HAVE_SSE3
125 #include <pmmintrin.h>
126 
127 static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector,
128  const lv_16sc_t* complexVector,
129  const float scalar,
130  unsigned int num_points)
131 {
132  unsigned int number = 0;
133  const unsigned int quarterPoints = num_points / 4;
134 
135  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
136  float* magnitudeVectorPtr = magnitudeVector;
137 
138  __m128 invScalar = _mm_set_ps1(1.0 / scalar);
139 
140  __m128 cplxValue1, cplxValue2, result;
141 
142  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
143 
144  for (; number < quarterPoints; number++) {
145 
146  inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
147  inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
148  inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
149  inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
150 
151  inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
152  inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
153  inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
154  inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
155 
156  cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
157  cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
158 
159  complexVectorPtr += 8;
160 
161  cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
162  cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
163 
164  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
165  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
166 
167  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
168 
169  result = _mm_sqrt_ps(result); // Square root the values
170 
171  _mm_store_ps(magnitudeVectorPtr, result);
172 
173  magnitudeVectorPtr += 4;
174  }
175 
176  number = quarterPoints * 4;
177  magnitudeVectorPtr = &magnitudeVector[number];
178  complexVectorPtr = (const int16_t*)&complexVector[number];
179  for (; number < num_points; number++) {
180  float val1Real = (float)(*complexVectorPtr++) / scalar;
181  float val1Imag = (float)(*complexVectorPtr++) / scalar;
182  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
183  }
184 }
185 #endif /* LV_HAVE_SSE3 */
186 
187 #ifdef LV_HAVE_SSE
188 #include <xmmintrin.h>
189 
190 static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector,
191  const lv_16sc_t* complexVector,
192  const float scalar,
193  unsigned int num_points)
194 {
195  unsigned int number = 0;
196  const unsigned int quarterPoints = num_points / 4;
197 
198  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
199  float* magnitudeVectorPtr = magnitudeVector;
200 
201  const float iScalar = 1.0 / scalar;
202  __m128 invScalar = _mm_set_ps1(iScalar);
203 
204  __m128 cplxValue1, cplxValue2, result, re, im;
205 
206  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
207 
208  for (; number < quarterPoints; number++) {
209  inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
210  inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
211  inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
212  inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
213 
214  inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
215  inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
216  inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
217  inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
218 
219  cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
220  cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
221 
222  re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
223  im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
224 
225  complexVectorPtr += 8;
226 
227  cplxValue1 = _mm_mul_ps(re, invScalar);
228  cplxValue2 = _mm_mul_ps(im, invScalar);
229 
230  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
231  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
232 
233  result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
234 
235  result = _mm_sqrt_ps(result); // Square root the values
236 
237  _mm_store_ps(magnitudeVectorPtr, result);
238 
239  magnitudeVectorPtr += 4;
240  }
241 
242  number = quarterPoints * 4;
243  magnitudeVectorPtr = &magnitudeVector[number];
244  complexVectorPtr = (const int16_t*)&complexVector[number];
245  for (; number < num_points; number++) {
246  float val1Real = (float)(*complexVectorPtr++) * iScalar;
247  float val1Imag = (float)(*complexVectorPtr++) * iScalar;
248  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
249  }
250 }
251 
252 
253 #endif /* LV_HAVE_SSE */
254 
255 #ifdef LV_HAVE_GENERIC
256 
257 static inline void volk_16ic_s32f_magnitude_32f_generic(float* magnitudeVector,
258  const lv_16sc_t* complexVector,
259  const float scalar,
260  unsigned int num_points)
261 {
262  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
263  float* magnitudeVectorPtr = magnitudeVector;
264  unsigned int number = 0;
265  const float invScalar = 1.0 / scalar;
266  for (number = 0; number < num_points; number++) {
267  float real = ((float)(*complexVectorPtr++)) * invScalar;
268  float imag = ((float)(*complexVectorPtr++)) * invScalar;
269  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
270  }
271 }
272 #endif /* LV_HAVE_GENERIC */
273 
274 #ifdef LV_HAVE_ORC_DISABLED
275 
276 extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector,
277  const lv_16sc_t* complexVector,
278  const float scalar,
279  unsigned int num_points);
280 
281 static inline void volk_16ic_s32f_magnitude_32f_u_orc(float* magnitudeVector,
282  const lv_16sc_t* complexVector,
283  const float scalar,
284  unsigned int num_points)
285 {
286  volk_16ic_s32f_magnitude_32f_a_orc_impl(
287  magnitudeVector, complexVector, scalar, num_points);
288 }
289 #endif /* LV_HAVE_ORC */
290 
291 
292 #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
293 
294 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
295 #define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
296 
297 #include <inttypes.h>
298 #include <math.h>
299 #include <stdio.h>
300 #include <volk/volk_common.h>
301 
302 #ifdef LV_HAVE_AVX2
303 #include <immintrin.h>
304 
305 static inline void volk_16ic_s32f_magnitude_32f_u_avx2(float* magnitudeVector,
306  const lv_16sc_t* complexVector,
307  const float scalar,
308  unsigned int num_points)
309 {
310  unsigned int number = 0;
311  const unsigned int eighthPoints = num_points / 8;
312 
313  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
314  float* magnitudeVectorPtr = magnitudeVector;
315 
316  __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
317 
318  __m256 cplxValue1, cplxValue2, result;
319  __m256i int1, int2;
320  __m128i short1, short2;
321  __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
322 
323  for (; number < eighthPoints; number++) {
324 
325  int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
326  complexVectorPtr += 16;
327  short1 = _mm256_extracti128_si256(int1, 0);
328  short2 = _mm256_extracti128_si256(int1, 1);
329 
330  int1 = _mm256_cvtepi16_epi32(short1);
331  int2 = _mm256_cvtepi16_epi32(short2);
332  cplxValue1 = _mm256_cvtepi32_ps(int1);
333  cplxValue2 = _mm256_cvtepi32_ps(int2);
334 
335  cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
336  cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
337 
338  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
339  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
340 
341  result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
342  result = _mm256_permutevar8x32_ps(result, idx);
343 
344  result = _mm256_sqrt_ps(result); // Square root the values
345 
346  _mm256_storeu_ps(magnitudeVectorPtr, result);
347 
348  magnitudeVectorPtr += 8;
349  }
350 
351  number = eighthPoints * 8;
352  magnitudeVectorPtr = &magnitudeVector[number];
353  complexVectorPtr = (const int16_t*)&complexVector[number];
354  for (; number < num_points; number++) {
355  float val1Real = (float)(*complexVectorPtr++) / scalar;
356  float val1Imag = (float)(*complexVectorPtr++) / scalar;
357  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
358  }
359 }
360 #endif /* LV_HAVE_AVX2 */
361 
362 #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_u_H */
volk_16ic_s32f_magnitude_32f_a_sse3
static void volk_16ic_s32f_magnitude_32f_a_sse3(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:127
volk_16ic_s32f_magnitude_32f_generic
static void volk_16ic_s32f_magnitude_32f_generic(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:257
__VOLK_ATTR_ALIGNED
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
lv_16sc_t
short complex lv_16sc_t
Definition: volk_complex.h:67
volk_common.h
volk_16ic_s32f_magnitude_32f_a_sse
static void volk_16ic_s32f_magnitude_32f_a_sse(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:190