![]() |
NFFT
3.3.2
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 #include <stdio.h> 00019 #include <stdlib.h> 00020 #include <string.h> 00021 #include <unistd.h> 00022 00023 #include "config.h" 00024 00025 #include "nfft3.h" 00026 #include "infft.h" 00027 00028 #define NREPEAT 5 00029 00030 #if defined(_WIN32) || defined(_WIN64) 00031 const char *CMD_CREATEDATASET = "nfft_benchomp_createdataset.exe"; 00032 const char *CMD_DETAIL_SINGLE = "nfft_benchomp_detail_single.exe"; 00033 const char *CMD_DETAIL_THREADS = "nfft_benchomp_detail_threads.exe"; 00034 #else 00035 const char *CMD_CREATEDATASET = "./nfft_benchomp_createdataset"; 00036 const char *CMD_DETAIL_SINGLE = "./nfft_benchomp_detail_single"; 00037 const char *CMD_DETAIL_THREADS = "./nfft_benchomp_detail_threads"; 00038 #endif 00039 00040 static FILE* file_out_tex = NULL; 00041 00042 int get_nthreads_array(int **arr) 00043 { 00044 int max_threads = NFFT(get_num_threads)(); 00045 int alloc_num = 2; 00046 int k; 00047 int ret_number = 0; 00048 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0; 00049 00050 if (max_threads <= 5) 00051 { 00052 *arr = (int*) malloc(max_threads*sizeof(int)); 00053 for (k = 0; k < max_threads; k++) 00054 *(*arr + k) = k+1; 00055 return max_threads; 00056 } 00057 00058 for (k = 1; k <= max_threads; k*=2, alloc_num++); 00059 00060 *arr = (int*) malloc(alloc_num*sizeof(int)); 00061 00062 for (k = 1; k <= max_threads; k*=2) 00063 { 00064 if (k != max_threads && 2*k > max_threads && max_threads_pw2) 00065 { 00066 *(*arr + ret_number) = max_threads/2; 00067 ret_number++; 00068 } 00069 00070 *(*arr + ret_number) = k; 00071 ret_number++; 00072 00073 if (k != max_threads && 2*k > max_threads) 00074 { 00075 *(*arr + ret_number) = max_threads; 00076 ret_number++; 00077 break; 00078 } 00079 } 00080 00081 return ret_number; 00082 } 00083 00084 00085 void check_result_value(const int val, const int ok, const char *msg) 00086 { 00087 if (val != ok) 00088 { 00089 fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok); 00090 00091 exit(1); 00092 } 00093 } 00094 00095 void run_test_create(int d, int trafo_adjoint, int N, int M, double sigma) 00096 { 00097 char cmd[1025]; 00098 00099 if (d==1) 00100 snprintf(cmd, 1024, "%s %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, M, sigma); 00101 else if (d==2) 00102 snprintf(cmd, 1024, "%s %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, M, sigma); 00103 else if (d==3) 00104 snprintf(cmd, 1024, "%s %d %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, N, M, sigma); 00105 else if (d==4) 00106 snprintf(cmd, 1024, "%s %d %d %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, N, N, M, sigma); 00107 else 00108 exit(1); 00109 fprintf(stderr, "%s\n", cmd); 00110 check_result_value(system(cmd), 0, "createdataset"); 00111 } 00112 00113 void run_test_init_output() 00114 { 00115 FILE *f = fopen("nfft_benchomp_test.result", "w"); 00116 if (f!= NULL) 00117 fclose(f); 00118 } 00119 00120 typedef struct 00121 { 00122 int d; 00123 int trafo_adjoint; 00124 int N; 00125 int M; 00126 double sigma; 00127 int m; 00128 int flags; 00129 } s_param; 00130 00131 typedef struct 00132 { 00133 double avg; 00134 double min; 00135 double max; 00136 } s_resval; 00137 00138 typedef struct 00139 { 00140 int nthreads; 00141 s_resval resval[6]; 00142 } s_result; 00143 00144 typedef struct 00145 { 00146 s_param param; 00147 s_result *results; 00148 int nresults; 00149 } s_testset; 00150 00151 void run_test(s_resval *res, int nrepeat, int m, int flags, int nthreads) 00152 { 00153 char cmd[1025]; 00154 int r,t; 00155 00156 for (t = 0; t < 6; t++) 00157 { 00158 res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0; 00159 } 00160 00161 if (nthreads < 2) 00162 snprintf(cmd, 1024, "%s %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", CMD_DETAIL_SINGLE, m, flags); 00163 else 00164 snprintf(cmd, 1024, "%s %d %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", CMD_DETAIL_THREADS, m, flags, nthreads); 00165 fprintf(stderr, "%s\n", cmd); 00166 check_result_value(system(cmd), 0, cmd); 00167 00168 for (r = 0; r < nrepeat; r++) 00169 { 00170 int retval; 00171 double v[6]; 00172 FILE *f; 00173 check_result_value(system(cmd), 0, cmd); 00174 f = fopen("nfft_benchomp_test.out", "r"); 00175 retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5); 00176 check_result_value(retval, 6, "read nfft_benchomp_test.out"); 00177 fclose(f); 00178 00179 for (t = 0; t < 6; t++) 00180 { 00181 res[t].avg += v[t]; 00182 if (res[t].min > v[t]) 00183 res[t].min = v[t]; 00184 if (res[t].max < v[t]) 00185 res[t].max = v[t]; 00186 } 00187 } 00188 00189 for (t = 0; t < 6; t++) 00190 res[t].avg /= nrepeat; 00191 00192 fprintf(stderr, "%d %d: ", nthreads, nrepeat); 00193 for (t = 0; t < 6; t++) 00194 fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max); 00195 fprintf(stderr, "\n"); 00196 } 00197 00198 const char *get_psi_string(int flags) 00199 { 00200 if (flags & PRE_ONE_PSI) 00201 return "unknownPSI"; 00202 00203 return "nopsi"; 00204 } 00205 const char *get_sort_string(int flags) 00206 { 00207 if (flags & NFFT_SORT_NODES) 00208 return "sorted"; 00209 00210 return "unsorted"; 00211 } 00212 00213 const char *get_adjoint_omp_string(int flags) 00214 { 00215 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT) 00216 return "blockwise"; 00217 00218 return ""; 00219 } 00220 00221 #define MASK_D (1U<<0) 00222 #define MASK_TA (1U<<1) 00223 #define MASK_N (1U<<2) 00224 #define MASK_SIGMA (1U<<3) 00225 #define MASK_M (1U<<4) 00226 #define MASK_WINM (1U<<5) 00227 #define MASK_FLAGS_PSI (1U<<6) 00228 #define MASK_FLAGS_SORT (1U<<7) 00229 #define MASK_FLAGS_BW (1U<<8) 00230 00231 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets) 00232 { 00233 int t; 00234 unsigned int mask = 0; 00235 00236 if (ntestsets < 2) 00237 return 0; 00238 00239 for (t = 1; t < ntestsets; t++) 00240 { 00241 if (testsets[t-1].param.d != testsets[t].param.d) 00242 mask |= MASK_D; 00243 if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint) 00244 mask |= MASK_TA; 00245 if (testsets[t-1].param.N != testsets[t].param.N) 00246 mask |= MASK_N; 00247 if (testsets[t-1].param.sigma != testsets[t].param.sigma) 00248 mask |= MASK_SIGMA; 00249 if (testsets[t-1].param.M != testsets[t].param.M) 00250 mask |= MASK_M; 00251 if (testsets[t-1].param.m != testsets[t].param.m) 00252 mask |= MASK_WINM; 00253 if ((testsets[t-1].param.flags & PRE_ONE_PSI) != (testsets[t].param.flags & PRE_ONE_PSI)) 00254 mask |= MASK_FLAGS_PSI; 00255 if ((testsets[t-1].param.flags & NFFT_SORT_NODES) != (testsets[t].param.flags & NFFT_SORT_NODES)) 00256 mask |= MASK_FLAGS_SORT; 00257 if ((testsets[t-1].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT)) 00258 mask |= MASK_FLAGS_BW; 00259 } 00260 00261 return mask; 00262 } 00263 00264 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask) 00265 { 00266 unsigned int mask = ~diff_mask; 00267 int offset = 0; 00268 int len; 00269 00270 len = snprintf(outstr, maxlen, "%s", hostname); 00271 if (len < 0 || len+offset >= maxlen-1) return; 00272 offset += len; 00273 00274 if (mask & MASK_D) 00275 { 00276 len = snprintf(outstr+offset, maxlen-offset, " %dd", param.d); 00277 if (len < 0 || len+offset >= maxlen-1) return; 00278 offset += len; 00279 } 00280 00281 if (mask & MASK_TA) 00282 { 00283 len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFFT}%s$", param.trafo_adjoint==0?"":"^\\top"); 00284 if (len < 0 || len+offset >= maxlen-1) return; 00285 offset += len; 00286 } 00287 00288 if (mask & MASK_N) 00289 { 00290 len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N); 00291 if (len < 0 || len+offset >= maxlen-1) return; 00292 offset += len; 00293 } 00294 00295 if (mask & MASK_SIGMA) 00296 { 00297 len = snprintf(outstr+offset, maxlen-offset, " N=%g", param.sigma); 00298 if (len < 0 || len+offset >= maxlen-1) return; 00299 offset += len; 00300 } 00301 00302 if (mask & MASK_M) 00303 { 00304 len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M); 00305 if (len < 0 || len+offset >= maxlen-1) return; 00306 offset += len; 00307 } 00308 00309 if (mask & MASK_WINM) 00310 { 00311 len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m); 00312 if (len < 0 || len+offset >= maxlen-1) return; 00313 offset += len; 00314 } 00315 00316 if (mask & MASK_FLAGS_PSI) 00317 { 00318 len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.flags)); 00319 if (len < 0 || len+offset >= maxlen-1) return; 00320 offset += len; 00321 } 00322 00323 if (mask & MASK_FLAGS_SORT) 00324 { 00325 len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.flags)); 00326 if (len < 0 || len+offset >= maxlen-1) return; 00327 offset += len; 00328 } 00329 00330 if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.flags)) > 0) 00331 { 00332 len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.flags)); 00333 if (len < 0 || len+offset >= maxlen-1) return; 00334 offset += len; 00335 } 00336 } 00337 00338 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, double tref) 00339 { 00340 int i, t; 00341 char hostname[1025]; 00342 char plottitle[1025]; 00343 unsigned int diff_mask = determine_different_parameters(testsets, ntestsets); 00344 00345 #ifdef HAVE_GETHOSTNAME 00346 if (gethostname(hostname, 1024) != 0) 00347 #endif 00348 strncpy(hostname, "unnamed", 1024); 00349 00350 get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask | MASK_FLAGS_SORT); 00351 00352 fprintf(out, "\\begin{tikzpicture}\n"); 00353 fprintf(out, "\\begin{axis}["); 00354 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, "); 00355 fprintf(out, " title={%s}", plottitle); 00356 fprintf(out, " ]\n"); 00357 00358 for (t = 0; t < ntestsets; t++) 00359 { 00360 s_testset testset = testsets[t]; 00361 fprintf(stderr, "%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags)); 00362 fprintf(stderr, "\n"); 00363 00364 fprintf(out, "\\addplot coordinates {"); 00365 for (i = 0; i < testset.nresults; i++) 00366 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg); 00367 fprintf(out, "};\n"); 00368 00369 for (i = 0; i < testset.nresults; i++) 00370 { 00371 fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg); 00372 } 00373 fprintf(stderr, "\n\n"); 00374 } 00375 00376 fprintf(out, "\\legend{{"); 00377 for (t = 0; t < ntestsets; t++) 00378 { 00379 char title[256]; 00380 if (t > 0) 00381 fprintf(out, "},{"); 00382 get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask | MASK_FLAGS_SORT)); 00383 fprintf(out, "%s", title); 00384 } 00385 fprintf(out, "}}\n"); 00386 fprintf(out, "\\end{axis}\n"); 00387 fprintf(out, "\\end{tikzpicture}\n"); 00388 fprintf(out, "\n\n"); 00389 00390 fflush(out); 00391 } 00392 00393 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets) 00394 { 00395 double tref = 1.0/0.0; 00396 int t, k; 00397 00398 for (t = 0; t < ntestsets; t++) 00399 for (k = 0; k < testsets[t].nresults; k++) 00400 if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref) 00401 tref = testsets[t].results[k].resval[5].avg; 00402 00403 print_output_speedup_total_tref(out, testsets, ntestsets, tref); 00404 } 00405 00406 void print_output_histo_DFBRT(FILE *out, s_testset testset) 00407 { 00408 int i, size = testset.nresults; 00409 char hostname[1025]; 00410 00411 #ifdef HAVE_GETHOSTNAME 00412 if (gethostname(hostname, 1024) != 0) 00413 #endif 00414 strncpy(hostname, "unnamed", 1024); 00415 00416 fprintf(out, "\\begin{tikzpicture}\n"); 00417 fprintf(out, "\\begin{axis}["); 00418 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, "); 00419 fprintf(out, "symbolic x coords={"); 00420 for (i = 0; i < size; i++) 00421 if (i > 0) 00422 fprintf(out, ",%d", testset.results[i].nthreads); 00423 else 00424 fprintf(out, "%d", testset.results[i].nthreads); 00425 fprintf(stderr, "FLAGS: %d\n", testset.param.flags); 00426 00427 fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, "); 00428 fprintf(out, " title={%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags)); 00429 fprintf(out, " ]\n"); 00430 fprintf(out, "\\addplot coordinates {"); 00431 for (i = 0; i < size; i++) 00432 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg); 00433 fprintf(out, "};\n"); 00434 00435 fprintf(out, "\\addplot coordinates {"); 00436 for (i = 0; i < size; i++) 00437 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg); 00438 fprintf(out, "};\n"); 00439 00440 fprintf(out, "\\addplot coordinates {"); 00441 for (i = 0; i < size; i++) 00442 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg); 00443 fprintf(out, "};\n"); 00444 00445 fprintf(out, "\\addplot coordinates {"); 00446 for (i = 0; i < size; i++) 00447 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg); 00448 fprintf(out, "};\n"); 00449 00450 fprintf(out, "\\addplot coordinates {"); 00451 for (i = 0; i < size; i++) 00452 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg); 00453 fprintf(out, "};\n"); 00454 fprintf(out, "\\legend{D,F,B,rest,total}\n"); 00455 fprintf(out, "\\end{axis}\n"); 00456 fprintf(out, "\\end{tikzpicture}\n"); 00457 fprintf(out, "\n\n"); 00458 00459 fflush(out); 00460 } 00461 00462 void run_testset(s_testset *testset, int d, int trafo_adjoint, int N, int M, double sigma, int m, int flags, int *nthreads_array, int n_threads_array_size) 00463 { 00464 int i; 00465 testset->param.d = d; 00466 testset->param.trafo_adjoint = trafo_adjoint; 00467 testset->param.N = N; 00468 testset->param.M = M; 00469 testset->param.sigma = sigma; 00470 testset->param.m = m; 00471 testset->param.flags = flags; 00472 00473 testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result)); 00474 testset->nresults = n_threads_array_size; 00475 00476 run_test_create(testset->param.d, testset->param.trafo_adjoint, testset->param.N, testset->param.M, testset->param.sigma); 00477 for (i = 0; i < n_threads_array_size; i++) 00478 { 00479 testset->results[i].nthreads = nthreads_array[i]; 00480 run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.flags, testset->results[i].nthreads = nthreads_array[i]); 00481 } 00482 00483 } 00484 00485 void test1(int *nthreads_array, int n_threads_array_size, int m) 00486 { 00487 s_testset testsets[15]; 00488 00489 run_testset(&testsets[0], 1, 0, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00490 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00491 print_output_histo_DFBRT(file_out_tex, testsets[0]); 00492 #endif 00493 00494 run_testset(&testsets[1], 1, 0, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00495 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00496 print_output_histo_DFBRT(file_out_tex, testsets[1]); 00497 #endif 00498 00499 print_output_speedup_total(file_out_tex, testsets, 2); 00500 00501 run_testset(&testsets[2], 1, 1, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00502 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00503 print_output_histo_DFBRT(file_out_tex, testsets[2]); 00504 #endif 00505 00506 run_testset(&testsets[3], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00507 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00508 print_output_histo_DFBRT(file_out_tex, testsets[3]); 00509 #endif 00510 00511 run_testset(&testsets[4], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00512 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00513 print_output_histo_DFBRT(file_out_tex, testsets[4]); 00514 #endif 00515 00516 print_output_speedup_total(file_out_tex, testsets+2, 3); 00517 00518 run_testset(&testsets[5], 2, 0, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size); 00519 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00520 print_output_histo_DFBRT(file_out_tex, testsets[5]); 00521 #endif 00522 00523 run_testset(&testsets[6], 2, 0, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00524 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00525 print_output_histo_DFBRT(file_out_tex, testsets[6]); 00526 #endif 00527 00528 print_output_speedup_total(file_out_tex, testsets+5, 2); 00529 00530 run_testset(&testsets[7], 2, 1, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size); 00531 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00532 print_output_histo_DFBRT(file_out_tex, testsets[7]); 00533 #endif 00534 00535 run_testset(&testsets[8], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00536 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00537 print_output_histo_DFBRT(file_out_tex, testsets[8]); 00538 #endif 00539 00540 run_testset(&testsets[9], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00541 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00542 print_output_histo_DFBRT(file_out_tex, testsets[9]); 00543 #endif 00544 00545 print_output_speedup_total(file_out_tex, testsets+7, 3); 00546 00547 run_testset(&testsets[10], 3, 0, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00548 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00549 print_output_histo_DFBRT(file_out_tex, testsets[10]); 00550 #endif 00551 00552 run_testset(&testsets[11], 3, 0, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00553 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00554 print_output_histo_DFBRT(file_out_tex, testsets[11]); 00555 #endif 00556 00557 print_output_speedup_total(file_out_tex, testsets+10, 2); 00558 00559 run_testset(&testsets[12], 3, 1, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00560 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00561 print_output_histo_DFBRT(file_out_tex, testsets[12]); 00562 #endif 00563 00564 run_testset(&testsets[13], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00565 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00566 print_output_histo_DFBRT(file_out_tex, testsets[13]); 00567 #endif 00568 00569 run_testset(&testsets[14], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00570 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00571 print_output_histo_DFBRT(file_out_tex, testsets[14]); 00572 #endif 00573 00574 print_output_speedup_total(file_out_tex, testsets+12, 3); 00575 00576 } 00577 00578 void test2(int *nthreads_array, int n_threads_array_size, int m) 00579 { 00580 s_testset testsets[15]; 00581 00582 run_testset(&testsets[0], 1, 0, 16777216, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00583 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00584 print_output_histo_DFBRT(file_out_tex, testsets[0]); 00585 #endif 00586 00587 run_testset(&testsets[1], 1, 0, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00588 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00589 print_output_histo_DFBRT(file_out_tex, testsets[1]); 00590 #endif 00591 00592 print_output_speedup_total(file_out_tex, testsets, 2); 00593 00594 run_testset(&testsets[2], 1, 1, 16777216, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00595 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00596 print_output_histo_DFBRT(file_out_tex, testsets[2]); 00597 #endif 00598 00599 run_testset(&testsets[3], 1, 1, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00600 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00601 print_output_histo_DFBRT(file_out_tex, testsets[3]); 00602 #endif 00603 00604 run_testset(&testsets[4], 1, 1, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00605 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00606 print_output_histo_DFBRT(file_out_tex, testsets[4]); 00607 #endif 00608 00609 print_output_speedup_total(file_out_tex, testsets+2, 3); 00610 00611 run_testset(&testsets[5], 2, 0, 4096, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size); 00612 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00613 print_output_histo_DFBRT(file_out_tex, testsets[5]); 00614 #endif 00615 00616 run_testset(&testsets[6], 2, 0, 4096, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00617 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00618 print_output_histo_DFBRT(file_out_tex, testsets[6]); 00619 #endif 00620 00621 print_output_speedup_total(file_out_tex, testsets+5, 2); 00622 00623 run_testset(&testsets[7], 2, 1, 4096, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size); 00624 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00625 print_output_histo_DFBRT(file_out_tex, testsets[7]); 00626 #endif 00627 00628 run_testset(&testsets[8], 2, 1, 4096, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00629 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00630 print_output_histo_DFBRT(file_out_tex, testsets[8]); 00631 #endif 00632 00633 run_testset(&testsets[9], 2, 1, 4096, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00634 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00635 print_output_histo_DFBRT(file_out_tex, testsets[9]); 00636 #endif 00637 00638 print_output_speedup_total(file_out_tex, testsets+7, 3); 00639 00640 run_testset(&testsets[10], 3, 0, 256, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00641 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00642 print_output_histo_DFBRT(file_out_tex, testsets[10]); 00643 #endif 00644 00645 run_testset(&testsets[11], 3, 0, 256, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00646 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00647 print_output_histo_DFBRT(file_out_tex, testsets[11]); 00648 #endif 00649 00650 print_output_speedup_total(file_out_tex, testsets+10, 2); 00651 00652 run_testset(&testsets[12], 3, 1, 256, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size); 00653 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00654 print_output_histo_DFBRT(file_out_tex, testsets[12]); 00655 #endif 00656 00657 run_testset(&testsets[13], 3, 1, 256, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size); 00658 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00659 print_output_histo_DFBRT(file_out_tex, testsets[13]); 00660 #endif 00661 00662 run_testset(&testsets[14], 3, 1, 256, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size); 00663 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 00664 print_output_histo_DFBRT(file_out_tex, testsets[14]); 00665 #endif 00666 00667 print_output_speedup_total(file_out_tex, testsets+12, 3); 00668 00669 } 00670 00671 int main(int argc, char** argv) 00672 { 00673 int *nthreads_array; 00674 int n_threads_array_size = get_nthreads_array(&nthreads_array); 00675 int k; 00676 00677 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW) 00678 fprintf(stderr, "WARNING: Detailed time measurements for NFFT are not activated.\n"); 00679 fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n"); 00680 fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n"); 00681 fprintf(stderr, "and run \"make clean all\"\n\n"); 00682 #endif 00683 00684 for (k = 0; k < n_threads_array_size; k++) 00685 fprintf(stderr, "%d ", nthreads_array[k]); 00686 fprintf(stderr, "\n"); 00687 00688 file_out_tex = fopen("nfft_benchomp_results_plots.tex", "w"); 00689 00690 test1(nthreads_array, n_threads_array_size, 2); 00691 test1(nthreads_array, n_threads_array_size, 4); 00692 test1(nthreads_array, n_threads_array_size, 6); 00693 // test2(nthreads_array, n_threads_array_size, 2); 00694 00695 fclose(file_out_tex); 00696 00697 return 0; 00698 }