00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef SHARP_COMPLEX_HACKS_H
00033 #define SHARP_COMPLEX_HACKS_H
00034
00035 #ifdef __cplusplus
00036 #error This header file cannot be included from C++, only from C
00037 #endif
00038
00039 #include <math.h>
00040 #include <complex.h>
00041 #include "sharp_vecsupport.h"
00042
00043 #define UNSAFE_CODE
00044
00045 #if (VLEN==1)
00046
00047 static inline complex double vhsum_cmplx(Tv a, Tv b)
00048 { return a+_Complex_I*b; }
00049
00050 static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
00051 complex double * restrict c1, complex double * restrict c2)
00052 { *c1 += a+_Complex_I*b; *c2 += c+_Complex_I*d; }
00053
00054 #endif
00055
00056 #if (VLEN==2)
00057
00058 static inline complex double vhsum_cmplx (Tv a, Tv b)
00059 {
00060 #if defined(__SSE3__)
00061 Tv tmp = _mm_hadd_pd(a,b);
00062 #else
00063 Tv tmp = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
00064 _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
00065 #endif
00066 union {Tv v; complex double c; } u;
00067 u.v=tmp; return u.c;
00068 }
00069
00070 static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c,
00071 Tv d, complex double * restrict c1, complex double * restrict c2)
00072 {
00073 #ifdef UNSAFE_CODE
00074 #if defined(__SSE3__)
00075 vaddeq(*((__m128d *)c1),_mm_hadd_pd(a,b));
00076 vaddeq(*((__m128d *)c2),_mm_hadd_pd(c,d));
00077 #else
00078 vaddeq(*((__m128d *)c1),vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
00079 _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0))));
00080 vaddeq(*((__m128d *)c2),vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
00081 _mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0))));
00082 #endif
00083 #else
00084 union {Tv v; complex double c; } u1, u2;
00085 #if defined(__SSE3__)
00086 u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d);
00087 #else
00088 u1.v = vadd(_mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)),
00089 _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)));
00090 u2.v = vadd(_mm_shuffle_pd(c,d,_MM_SHUFFLE2(0,1)),
00091 _mm_shuffle_pd(c,d,_MM_SHUFFLE2(1,0)));
00092 #endif
00093 *c1+=u1.c; *c2+=u2.c;
00094 #endif
00095 }
00096
00097 #endif
00098
00099 #if (VLEN==4)
00100
00101 static inline complex double vhsum_cmplx (Tv a, Tv b)
00102 {
00103 Tv tmp=_mm256_hadd_pd(a,b);
00104 Tv tmp2=_mm256_permute2f128_pd(tmp,tmp,1);
00105 tmp=_mm256_add_pd(tmp,tmp2);
00106 #ifdef UNSAFE_CODE
00107 complex double ret;
00108 *((__m128d *)&ret)=_mm256_extractf128_pd(tmp, 0);
00109 return ret;
00110 #else
00111 union {Tv v; complex double c[2]; } u;
00112 u.v=tmp; return u.c[0];
00113 #endif
00114 }
00115
00116 static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d,
00117 complex double * restrict c1, complex double * restrict c2)
00118 {
00119 Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d);
00120 Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49),
00121 tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32);
00122 tmp1=vadd(tmp3,tmp4);
00123 #ifdef UNSAFE_CODE
00124 *((__m128d *)c1)=_mm_add_pd(*((__m128d *)c1),_mm256_extractf128_pd(tmp1, 0));
00125 *((__m128d *)c2)=_mm_add_pd(*((__m128d *)c2),_mm256_extractf128_pd(tmp1, 1));
00126 #else
00127 union {Tv v; complex double c[2]; } u;
00128 u.v=tmp1;
00129 *c1+=u.c[0]; *c2+=u.c[1];
00130 #endif
00131 }
00132
00133 #endif
00134
00135 #endif