![]() |
NFFT
3.3.2
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00019 #include <stdio.h> 00020 #include <math.h> 00021 #include <string.h> 00022 #include <stdlib.h> 00023 #include <complex.h> 00024 00025 #include "nfft3.h" 00026 #include "infft.h" 00027 #ifdef _OPENMP 00028 #include <omp.h> 00029 #endif 00030 00031 void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f) 00032 { 00033 double re,im; 00034 int k, n, j, t; 00035 nfsft_plan plan; 00036 00037 fscanf(infile, "%d %d %d", trafo_adjoint, N, M); 00038 *x = (double *)nfft_malloc(2*(*M)*sizeof(double)); 00039 *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C)); 00040 *f = (C*)nfft_malloc((*M)*sizeof(C)); 00041 00042 memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C)); 00043 memset(*f,0U,(*M)*sizeof(C)); 00044 00045 #ifdef _OPENMP 00046 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan"); 00047 #else 00048 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan"); 00049 #endif 00050 00051 nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F | 00052 NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, 00053 PRE_PHI_HUT | FFTW_INIT | FFT_OUT_OF_PLACE, 6); 00054 00055 #ifdef _OPENMP 00056 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan"); 00057 #else 00058 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan"); 00059 #endif 00060 00061 for (j=0; j < *M; j++) 00062 for (t=0; t < 2; t++) 00063 fscanf(infile, "%lg", (*x)+2*j+t); 00064 00065 if (trafo_adjoint==0) 00066 { 00067 for (k = 0; k <= *N; k++) 00068 for (n = -k; n <= k; n++) 00069 { 00070 fscanf(infile, "%lg %lg", &re, &im); 00071 (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im; 00072 } 00073 } 00074 else 00075 { 00076 for (j=0; j < *M; j++) 00077 { 00078 fscanf(infile, "%lg %lg", &re, &im); 00079 (*f)[j] = re + _Complex_I * im; 00080 } 00081 } 00082 00083 nfsft_finalize(&plan); 00084 } 00085 00086 void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags) 00087 { 00088 nfsft_plan plan; 00089 int k, n; 00090 // int N, M, trafo_adjoint; 00091 int t, j; 00092 ticks t0, t1; 00093 double tt_total, tt_pre; 00094 00095 // fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M); 00096 00097 /*#ifdef _OPENMP 00098 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan"); 00099 #else 00100 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan"); 00101 #endif*/ 00102 00103 /* precomputation (for fast polynomial transform) */ 00104 // nfsft_precompute(N,1000.0,0U,0U); 00105 00106 /* Initialize transform plan using the guru interface. All input and output 00107 * arrays are allocated by nfsft_init_guru(). Computations are performed with 00108 * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical 00109 * Fourier coefficients is preserved during transformations. The NFFT uses a 00110 * cut-off parameter m = 6. See the NFFT 3 manual for details. 00111 */ 00112 nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F | 00113 NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, 00114 PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m); 00115 00116 /*#ifdef _OPENMP 00117 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan"); 00118 #else 00119 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan"); 00120 #endif*/ 00121 00122 for (j=0; j < plan.M_total; j++) 00123 { 00124 for (t=0; t < 2; t++) 00125 // fscanf(infile, "%lg", plan.x+2*j+t); 00126 plan.x[2*j+t] = x[2*j+t]; 00127 } 00128 00129 if (trafo_adjoint==0) 00130 { 00131 memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); 00132 for (k = 0; k <= plan.N; k++) 00133 for (n = -k; n <= k; n++) 00134 { 00135 // fscanf(infile, "%lg %lg", &re, &im); 00136 // plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im; 00137 plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)]; 00138 } 00139 } 00140 else 00141 { 00142 for (j=0; j < plan.M_total; j++) 00143 { 00144 // fscanf(infile, "%lg %lg", &re, &im); 00145 // plan.f[j] = re + _Complex_I * im; 00146 plan.f[j] = f[j]; 00147 } 00148 00149 memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); 00150 } 00151 00152 t0 = getticks(); 00153 /* precomputation (for NFFT, node-dependent) */ 00154 nfsft_precompute_x(&plan); 00155 t1 = getticks(); 00156 tt_pre = nfft_elapsed_seconds(t1,t0); 00157 00158 if (trafo_adjoint==0) 00159 nfsft_trafo(&plan); 00160 else 00161 nfsft_adjoint(&plan); 00162 t1 = getticks(); 00163 tt_total = nfft_elapsed_seconds(t1,t0); 00164 00165 #ifndef MEASURE_TIME 00166 plan.MEASURE_TIME_t[0] = 0.0; 00167 plan.MEASURE_TIME_t[2] = 0.0; 00168 #endif 00169 00170 #ifndef MEASURE_TIME_FFTW 00171 plan.MEASURE_TIME_t[1] = 0.0; 00172 #endif 00173 00174 printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total); 00175 00177 nfsft_finalize(&plan); 00178 } 00179 00180 int main(int argc, char **argv) 00181 { 00182 int m, nfsft_flags, psi_flags; 00183 int nrepeat; 00184 int trafo_adjoint, N, M, r; 00185 double *x; 00186 C *f_hat, *f; 00187 #ifdef _OPENMP 00188 int nthreads; 00189 00190 if (argc != 6) 00191 return 1; 00192 00193 nthreads = atoi(argv[5]); 00194 fftw_init_threads(); 00195 omp_set_num_threads(nthreads); 00196 #else 00197 if (argc != 5) 00198 return 1; 00199 #endif 00200 00201 m = atoi(argv[1]); 00202 nfsft_flags = atoi(argv[2]); 00203 psi_flags = atoi(argv[3]); 00204 nrepeat = atoi(argv[4]); 00205 00206 bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f); 00207 00208 /* precomputation (for fast polynomial transform) */ 00209 nfsft_precompute(N,1000.0,0U,0U); 00210 00211 for (r = 0; r < nrepeat; r++) 00212 bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags); 00213 00214 return 0; 00215 }