72 unsigned int num_points)
74 unsigned int number = 0;
76 const float* complexVectorPtr = (
float*)complexVector;
77 double* iBufferPtr = iBuffer;
78 double* qBufferPtr = qBuffer;
80 const unsigned int quarterPoints = num_points / 4;
82 __m128 complexH, complexL, fVal;
85 for (; number < quarterPoints; number++) {
87 cplxValue = _mm256_loadu_ps(complexVectorPtr);
88 complexVectorPtr += 8;
90 complexH = _mm256_extractf128_ps(cplxValue, 1);
91 complexL = _mm256_extractf128_ps(cplxValue, 0);
94 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
95 dVal = _mm256_cvtps_pd(fVal);
96 _mm256_storeu_pd(iBufferPtr, dVal);
99 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
100 dVal = _mm256_cvtps_pd(fVal);
101 _mm256_storeu_pd(qBufferPtr, dVal);
107 number = quarterPoints * 4;
108 for (; number < num_points; number++) {
109 *iBufferPtr++ = *complexVectorPtr++;
110 *qBufferPtr++ = *complexVectorPtr++;
121 unsigned int num_points)
123 unsigned int number = 0;
125 const float* complexVectorPtr = (
float*)complexVector;
126 double* iBufferPtr = iBuffer;
127 double* qBufferPtr = qBuffer;
129 const unsigned int halfPoints = num_points / 2;
130 __m128 cplxValue, fVal;
133 for (; number < halfPoints; number++) {
135 cplxValue = _mm_loadu_ps(complexVectorPtr);
136 complexVectorPtr += 4;
139 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
140 dVal = _mm_cvtps_pd(fVal);
141 _mm_storeu_pd(iBufferPtr, dVal);
144 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
145 dVal = _mm_cvtps_pd(fVal);
146 _mm_storeu_pd(qBufferPtr, dVal);
152 number = halfPoints * 2;
153 for (; number < num_points; number++) {
154 *iBufferPtr++ = *complexVectorPtr++;
155 *qBufferPtr++ = *complexVectorPtr++;
192 unsigned int num_points)
194 unsigned int number = 0;
196 const float* complexVectorPtr = (
float*)complexVector;
197 double* iBufferPtr = iBuffer;
198 double* qBufferPtr = qBuffer;
200 const unsigned int quarterPoints = num_points / 4;
202 __m128 complexH, complexL, fVal;
205 for (; number < quarterPoints; number++) {
207 cplxValue = _mm256_load_ps(complexVectorPtr);
208 complexVectorPtr += 8;
210 complexH = _mm256_extractf128_ps(cplxValue, 1);
211 complexL = _mm256_extractf128_ps(cplxValue, 0);
214 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
215 dVal = _mm256_cvtps_pd(fVal);
216 _mm256_store_pd(iBufferPtr, dVal);
219 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
220 dVal = _mm256_cvtps_pd(fVal);
221 _mm256_store_pd(qBufferPtr, dVal);
227 number = quarterPoints * 4;
228 for (; number < num_points; number++) {
229 *iBufferPtr++ = *complexVectorPtr++;
230 *qBufferPtr++ = *complexVectorPtr++;
241 unsigned int num_points)
243 unsigned int number = 0;
245 const float* complexVectorPtr = (
float*)complexVector;
246 double* iBufferPtr = iBuffer;
247 double* qBufferPtr = qBuffer;
249 const unsigned int halfPoints = num_points / 2;
250 __m128 cplxValue, fVal;
253 for (; number < halfPoints; number++) {
255 cplxValue = _mm_load_ps(complexVectorPtr);
256 complexVectorPtr += 4;
259 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
260 dVal = _mm_cvtps_pd(fVal);
261 _mm_store_pd(iBufferPtr, dVal);
264 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
265 dVal = _mm_cvtps_pd(fVal);
266 _mm_store_pd(qBufferPtr, dVal);
272 number = halfPoints * 2;
273 for (; number < num_points; number++) {
274 *iBufferPtr++ = *complexVectorPtr++;
275 *qBufferPtr++ = *complexVectorPtr++;