sse_utils.h

Go to the documentation of this file.
00001 /*
00002  *  This file is part of libc_utils.
00003  *
00004  *  libc_utils is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  libc_utils is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with libc_utils; if not, write to the Free Software
00016  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00017  */
00018 
00019 /*
00020  *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
00021  *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
00022  *  (DLR).
00023  */
00024 
00025 /*! \file sse_utils.h
00026  *  SSE/SSE2/SSE3-related functionality
00027  *
00028  *  Copyright (C) 2010,2011 Max-Planck-Society
00029  *  \author Martin Reinecke
00030  */
00031 
00032 #ifndef PLANCK_SSE_UTILS_H
00033 #define PLANCK_SSE_UTILS_H
00034 
00035 #if (defined(__SSE__))
00036 
00037 #include <xmmintrin.h>
00038 
00039 #ifdef __cplusplus
00040 extern "C" {
00041 #endif
00042 
00043 typedef __m128 v4sf; /* vector of 4 floats (SSE1) */
00044 
00045 typedef union {
00046   float f[4];
00047   v4sf v;
00048 } V4SF;
00049 
00050 static inline v4sf build_v4sf (float a, float b, float c, float d)
00051   { return _mm_set_ps(d,c,b,a); }
00052 static inline void read_v4sf (v4sf v, float *a, float *b, float *c, float *d)
00053   {
00054   V4SF tmp;
00055   tmp.v = v;
00056   if (a) *a=tmp.f[0];
00057   if (b) *b=tmp.f[1];
00058   if (c) *c=tmp.f[2];
00059   if (d) *d=tmp.f[3];
00060   }
00061 
00062 #ifdef __cplusplus
00063 }
00064 #endif
00065 
00066 #endif
00067 
00068 #if (defined(__SSE2__))
00069 
00070 #include <emmintrin.h>
00071 
00072 #ifdef __cplusplus
00073 extern "C" {
00074 #endif
00075 
00076 typedef __m128d v2df; /* vector of 2 doubles (SSE2) */
00077 
00078 typedef union {
00079   double d[2];
00080   v2df v;
00081 } V2DF;
00082 
00083 typedef struct {
00084   v2df a,b;
00085 } v2df2;
00086 typedef struct {
00087   V2DF a,b;
00088 } V2DF2;
00089 
00090 #define V2DF_SIGNMASK _mm_set1_pd(-0.0)
00091 
00092 static inline v2df build_v2df (double a, double b)
00093   { return _mm_set_pd(b,a); }
00094 static inline void read_v2df (v2df v, double *a, double *b)
00095   { _mm_store_sd(a,v); _mm_storeh_pd(b,v); }
00096 
00097 static inline int v2df_any_gt (v2df a, v2df b)
00098   {
00099   return (_mm_movemask_pd(_mm_cmpgt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))!=0);
00100   }
00101 static inline int v2df_all_ge (v2df a, v2df b)
00102   {
00103   return (_mm_movemask_pd(_mm_cmplt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))==0);
00104   }
00105 static inline V2DF to_V2DF (v2df x)
00106   { V2DF X; X.v=x; return X; }
00107 static inline V2DF2 to_V2DF2 (v2df2 x)
00108   { V2DF2 X; X.a.v=x.a; X.b.v=x.b; return X; }
00109 static inline v2df2 to_v2df2 (V2DF2 X)
00110   { v2df2 x; x.a=X.a.v; x.b=X.b.v; return x; }
00111 static inline v2df2 zero_v2df2(void)
00112   { v2df2 x; x.a=x.b=_mm_setzero_pd(); return x; }
00113 
00114 #ifdef __cplusplus
00115 }
00116 #endif
00117 
00118 #endif
00119 
00120 #if (defined(__SSE3__))
00121 
00122 #include <pmmintrin.h>
00123 
00124 #endif
00125 
00126 #endif

Generated on Thu Oct 8 14:48:49 2015 for LevelS C support library