NFFT  3.3.2
nfft_benchomp.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <string.h>
00021 #include <unistd.h>
00022 
00023 #include "config.h"
00024 
00025 #include "nfft3.h"
00026 #include "infft.h"
00027 
00028 #define NREPEAT 5
00029 
00030 #if defined(_WIN32) || defined(_WIN64)
00031 const char *CMD_CREATEDATASET = "nfft_benchomp_createdataset.exe";
00032 const char *CMD_DETAIL_SINGLE = "nfft_benchomp_detail_single.exe";
00033 const char *CMD_DETAIL_THREADS = "nfft_benchomp_detail_threads.exe";
00034 #else
00035 const char *CMD_CREATEDATASET = "./nfft_benchomp_createdataset";
00036 const char *CMD_DETAIL_SINGLE = "./nfft_benchomp_detail_single";
00037 const char *CMD_DETAIL_THREADS = "./nfft_benchomp_detail_threads";
00038 #endif
00039 
00040 static FILE* file_out_tex = NULL;
00041 
00042 int get_nthreads_array(int **arr)
00043 {
00044   int max_threads = NFFT(get_num_threads)();
00045   int alloc_num = 2;
00046   int k;
00047   int ret_number = 0;
00048   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
00049 
00050   if (max_threads <= 5)
00051   {
00052     *arr = (int*) malloc(max_threads*sizeof(int));
00053     for (k = 0; k < max_threads; k++)
00054       *(*arr + k) = k+1;
00055     return max_threads;
00056   }
00057 
00058   for (k = 1; k <= max_threads; k*=2, alloc_num++);
00059 
00060   *arr = (int*) malloc(alloc_num*sizeof(int));
00061 
00062   for (k = 1; k <= max_threads; k*=2)
00063   {
00064     if (k != max_threads && 2*k > max_threads && max_threads_pw2)
00065     {
00066       *(*arr + ret_number) = max_threads/2;
00067       ret_number++;
00068     }
00069 
00070     *(*arr + ret_number) = k;
00071     ret_number++;
00072 
00073     if (k != max_threads && 2*k > max_threads)
00074     {
00075       *(*arr + ret_number) = max_threads;
00076       ret_number++;
00077       break;
00078     }
00079   }
00080 
00081   return ret_number;
00082 } 
00083   
00084 
00085 void check_result_value(const int val, const int ok, const char *msg)
00086 {
00087   if (val != ok)
00088   {
00089     fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
00090 
00091     exit(1);
00092   }
00093 }
00094 
00095 void run_test_create(int d, int trafo_adjoint, int N, int M, double sigma)
00096 {
00097   char cmd[1025];
00098 
00099   if (d==1)
00100     snprintf(cmd, 1024, "%s %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, M, sigma);
00101   else if (d==2)  
00102     snprintf(cmd, 1024, "%s %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, M, sigma);
00103   else if (d==3)  
00104     snprintf(cmd, 1024, "%s %d %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, N, M, sigma);
00105   else if (d==4)  
00106     snprintf(cmd, 1024, "%s %d %d %d %d %d %d %d %lg > nfft_benchomp_test.data", CMD_CREATEDATASET, d, trafo_adjoint, N, N, N, N, M, sigma);
00107   else
00108     exit(1);
00109   fprintf(stderr, "%s\n", cmd);
00110   check_result_value(system(cmd), 0, "createdataset");
00111 }
00112 
00113 void run_test_init_output()
00114 {
00115   FILE *f = fopen("nfft_benchomp_test.result", "w");
00116   if (f!= NULL)
00117     fclose(f);
00118 }
00119 
00120 typedef struct
00121 {
00122   int d;
00123   int trafo_adjoint;
00124   int N;
00125   int M;
00126   double sigma;
00127   int m;
00128   int flags;
00129 } s_param;
00130 
00131 typedef struct
00132 {
00133   double avg;
00134   double min;
00135   double max;
00136 } s_resval;
00137 
00138 typedef struct
00139 {
00140   int nthreads;
00141   s_resval resval[6];
00142 } s_result;
00143 
00144 typedef struct
00145 {
00146   s_param param;
00147   s_result *results;
00148   int nresults;
00149 } s_testset;
00150 
00151 void run_test(s_resval *res, int nrepeat, int m, int flags, int nthreads)
00152 {
00153   char cmd[1025];
00154   int r,t;
00155   
00156   for (t = 0; t < 6; t++)
00157   {
00158     res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
00159   }
00160 
00161   if (nthreads < 2)
00162     snprintf(cmd, 1024, "%s %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", CMD_DETAIL_SINGLE, m, flags);
00163   else
00164     snprintf(cmd, 1024, "%s %d %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", CMD_DETAIL_THREADS, m, flags, nthreads);
00165   fprintf(stderr, "%s\n", cmd);
00166   check_result_value(system(cmd), 0, cmd);
00167 
00168   for (r = 0; r < nrepeat; r++)
00169   {
00170     int retval;
00171     double v[6];
00172     FILE *f;
00173     check_result_value(system(cmd), 0, cmd);
00174     f = fopen("nfft_benchomp_test.out", "r");
00175     retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
00176     check_result_value(retval, 6, "read nfft_benchomp_test.out");
00177     fclose(f);
00178 
00179     for (t = 0; t < 6; t++)
00180     {
00181       res[t].avg += v[t];
00182       if (res[t].min > v[t])
00183         res[t].min = v[t];
00184       if (res[t].max < v[t])
00185         res[t].max = v[t];
00186     }
00187   }
00188 
00189   for (t = 0; t < 6; t++)
00190     res[t].avg /= nrepeat;
00191 
00192   fprintf(stderr, "%d %d: ", nthreads, nrepeat);
00193   for (t = 0; t < 6; t++)
00194     fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
00195   fprintf(stderr, "\n");
00196 }
00197 
00198 const char *get_psi_string(int flags)
00199 {
00200   if (flags & PRE_ONE_PSI)
00201     return "unknownPSI";
00202 
00203   return "nopsi";
00204 }
00205 const char *get_sort_string(int flags)
00206 {
00207   if (flags & NFFT_SORT_NODES)
00208     return "sorted";
00209 
00210     return "unsorted";
00211 }
00212 
00213 const char *get_adjoint_omp_string(int flags)
00214 {
00215   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00216     return "blockwise";
00217 
00218     return "";
00219 }
00220 
00221 #define MASK_D (1U<<0)
00222 #define MASK_TA (1U<<1)
00223 #define MASK_N (1U<<2)
00224 #define MASK_SIGMA (1U<<3)
00225 #define MASK_M (1U<<4)
00226 #define MASK_WINM (1U<<5)
00227 #define MASK_FLAGS_PSI (1U<<6)
00228 #define MASK_FLAGS_SORT (1U<<7)
00229 #define MASK_FLAGS_BW (1U<<8)
00230 
00231 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets)
00232 {
00233   int t;
00234   unsigned int mask = 0;
00235 
00236   if (ntestsets < 2)
00237     return 0;
00238 
00239   for (t = 1; t < ntestsets; t++)
00240   {
00241     if (testsets[t-1].param.d != testsets[t].param.d)
00242       mask |= MASK_D;
00243     if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
00244       mask |= MASK_TA;
00245     if (testsets[t-1].param.N != testsets[t].param.N)
00246       mask |= MASK_N;
00247     if (testsets[t-1].param.sigma != testsets[t].param.sigma)
00248       mask |= MASK_SIGMA;
00249     if (testsets[t-1].param.M != testsets[t].param.M)
00250       mask |= MASK_M;
00251     if (testsets[t-1].param.m != testsets[t].param.m)
00252       mask |= MASK_WINM;
00253     if ((testsets[t-1].param.flags & PRE_ONE_PSI) != (testsets[t].param.flags & PRE_ONE_PSI))
00254       mask |= MASK_FLAGS_PSI;
00255     if ((testsets[t-1].param.flags & NFFT_SORT_NODES) != (testsets[t].param.flags & NFFT_SORT_NODES))
00256       mask |= MASK_FLAGS_SORT;
00257     if ((testsets[t-1].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT))
00258       mask |= MASK_FLAGS_BW;
00259   }
00260 
00261   return mask;
00262 }
00263 
00264 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
00265 {
00266   unsigned int mask = ~diff_mask;
00267   int offset = 0;
00268   int len;
00269 
00270   len = snprintf(outstr, maxlen, "%s", hostname);
00271   if (len < 0 || len+offset >= maxlen-1) return;
00272   offset += len;
00273 
00274   if (mask & MASK_D)
00275   {
00276     len = snprintf(outstr+offset, maxlen-offset, " %dd", param.d);
00277     if (len < 0 || len+offset >= maxlen-1) return;
00278     offset += len;
00279   }
00280 
00281   if (mask & MASK_TA)
00282   {
00283     len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFFT}%s$", param.trafo_adjoint==0?"":"^\\top");
00284     if (len < 0 || len+offset >= maxlen-1) return;
00285     offset += len;
00286   }
00287 
00288   if (mask & MASK_N)
00289   {
00290     len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N);
00291     if (len < 0 || len+offset >= maxlen-1) return;
00292     offset += len;
00293   }
00294 
00295   if (mask & MASK_SIGMA)
00296   {
00297     len = snprintf(outstr+offset, maxlen-offset, " N=%g", param.sigma);
00298     if (len < 0 || len+offset >= maxlen-1) return;
00299     offset += len;
00300   }
00301 
00302   if (mask & MASK_M)
00303   {
00304     len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
00305     if (len < 0 || len+offset >= maxlen-1) return;
00306     offset += len;
00307   }
00308 
00309   if (mask & MASK_WINM)
00310   {
00311     len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
00312     if (len < 0 || len+offset >= maxlen-1) return;
00313     offset += len;
00314   }
00315 
00316   if (mask & MASK_FLAGS_PSI)
00317   {
00318     len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.flags));
00319     if (len < 0 || len+offset >= maxlen-1) return;
00320     offset += len;
00321   }
00322 
00323   if (mask & MASK_FLAGS_SORT)
00324   {
00325     len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.flags));
00326     if (len < 0 || len+offset >= maxlen-1) return;
00327     offset += len;
00328   }
00329 
00330   if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.flags)) > 0)
00331   {
00332     len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.flags));
00333     if (len < 0 || len+offset >= maxlen-1) return;
00334     offset += len;
00335   }
00336 }
00337 
00338 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, double tref)
00339 {
00340   int i, t;
00341   char hostname[1025];
00342   char plottitle[1025];
00343   unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
00344 
00345 #ifdef HAVE_GETHOSTNAME
00346   if (gethostname(hostname, 1024) != 0)
00347 #endif
00348     strncpy(hostname, "unnamed", 1024);
00349 
00350   get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask | MASK_FLAGS_SORT);
00351 
00352   fprintf(out, "\\begin{tikzpicture}\n");
00353   fprintf(out, "\\begin{axis}[");
00354   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
00355   fprintf(out, " title={%s}", plottitle);
00356   fprintf(out, " ]\n");
00357 
00358   for (t = 0; t < ntestsets; t++)
00359   {
00360     s_testset testset = testsets[t];
00361     fprintf(stderr, "%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
00362     fprintf(stderr, "\n");
00363 
00364     fprintf(out, "\\addplot coordinates {");
00365     for (i = 0; i < testset.nresults; i++)
00366       fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
00367     fprintf(out, "};\n");
00368 
00369     for (i = 0; i < testset.nresults; i++)
00370     {
00371       fprintf(stderr, "%d:%.3f  ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
00372     }
00373     fprintf(stderr, "\n\n");
00374   }
00375 
00376   fprintf(out, "\\legend{{");
00377   for (t = 0; t < ntestsets; t++)
00378   {
00379     char title[256];
00380     if (t > 0)
00381       fprintf(out, "},{");
00382     get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask | MASK_FLAGS_SORT));
00383     fprintf(out, "%s", title);
00384   }
00385   fprintf(out, "}}\n");
00386   fprintf(out, "\\end{axis}\n");
00387   fprintf(out, "\\end{tikzpicture}\n");
00388   fprintf(out, "\n\n");
00389 
00390   fflush(out);
00391 }
00392 
00393 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets)
00394 {
00395   double tref = 1.0/0.0;
00396   int t, k;
00397 
00398   for (t = 0; t < ntestsets; t++)
00399     for (k = 0; k < testsets[t].nresults; k++)
00400       if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
00401         tref = testsets[t].results[k].resval[5].avg;
00402 
00403   print_output_speedup_total_tref(out, testsets, ntestsets, tref);
00404 }
00405 
00406 void print_output_histo_DFBRT(FILE *out, s_testset testset)
00407 {
00408   int i, size = testset.nresults;
00409   char hostname[1025];
00410 
00411 #ifdef HAVE_GETHOSTNAME
00412   if (gethostname(hostname, 1024) != 0)
00413 #endif
00414     strncpy(hostname, "unnamed", 1024);
00415 
00416   fprintf(out, "\\begin{tikzpicture}\n");
00417   fprintf(out, "\\begin{axis}[");
00418   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00419   fprintf(out, "symbolic x coords={");
00420   for (i = 0; i < size; i++)
00421     if (i > 0)
00422       fprintf(out, ",%d", testset.results[i].nthreads);
00423     else
00424       fprintf(out, "%d", testset.results[i].nthreads);
00425 fprintf(stderr, "FLAGS: %d\n", testset.param.flags);
00426 
00427   fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00428   fprintf(out, " title={%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
00429   fprintf(out, " ]\n");
00430   fprintf(out, "\\addplot coordinates {");
00431   for (i = 0; i < size; i++)
00432     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
00433   fprintf(out, "};\n");
00434 
00435   fprintf(out, "\\addplot coordinates {");
00436   for (i = 0; i < size; i++)
00437     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
00438   fprintf(out, "};\n");
00439 
00440   fprintf(out, "\\addplot coordinates {");
00441   for (i = 0; i < size; i++)
00442     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
00443   fprintf(out, "};\n");
00444 
00445   fprintf(out, "\\addplot coordinates {");
00446   for (i = 0; i < size; i++)
00447     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
00448   fprintf(out, "};\n");
00449 
00450   fprintf(out, "\\addplot coordinates {");
00451   for (i = 0; i < size; i++)
00452     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
00453   fprintf(out, "};\n");
00454   fprintf(out, "\\legend{D,F,B,rest,total}\n");
00455   fprintf(out, "\\end{axis}\n");
00456   fprintf(out, "\\end{tikzpicture}\n");
00457   fprintf(out, "\n\n");
00458 
00459   fflush(out);
00460 }
00461 
00462 void run_testset(s_testset *testset, int d, int trafo_adjoint, int N, int M, double sigma, int m, int flags, int *nthreads_array, int n_threads_array_size)
00463 {
00464   int i;
00465   testset->param.d = d;
00466   testset->param.trafo_adjoint = trafo_adjoint;
00467   testset->param.N = N;
00468   testset->param.M = M;
00469   testset->param.sigma = sigma;
00470   testset->param.m = m;
00471   testset->param.flags = flags;
00472 
00473   testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
00474   testset->nresults = n_threads_array_size;
00475 
00476   run_test_create(testset->param.d, testset->param.trafo_adjoint, testset->param.N, testset->param.M, testset->param.sigma);
00477   for (i = 0; i < n_threads_array_size; i++)
00478   {
00479     testset->results[i].nthreads = nthreads_array[i];
00480     run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.flags, testset->results[i].nthreads = nthreads_array[i]);
00481   }
00482 
00483 }
00484 
00485 void test1(int *nthreads_array, int n_threads_array_size, int m)
00486 {
00487   s_testset testsets[15];
00488 
00489   run_testset(&testsets[0], 1, 0, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00490 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00491   print_output_histo_DFBRT(file_out_tex, testsets[0]);
00492 #endif
00493 
00494   run_testset(&testsets[1], 1, 0, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00495 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00496   print_output_histo_DFBRT(file_out_tex, testsets[1]);
00497 #endif
00498 
00499   print_output_speedup_total(file_out_tex, testsets, 2);
00500 
00501   run_testset(&testsets[2], 1, 1, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00502 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00503   print_output_histo_DFBRT(file_out_tex, testsets[2]);
00504 #endif
00505 
00506   run_testset(&testsets[3], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00507 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00508   print_output_histo_DFBRT(file_out_tex, testsets[3]);
00509 #endif
00510 
00511   run_testset(&testsets[4], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00512 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00513   print_output_histo_DFBRT(file_out_tex, testsets[4]);
00514 #endif
00515 
00516   print_output_speedup_total(file_out_tex, testsets+2, 3);
00517 
00518   run_testset(&testsets[5], 2, 0, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
00519 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00520   print_output_histo_DFBRT(file_out_tex, testsets[5]);
00521 #endif
00522 
00523   run_testset(&testsets[6], 2, 0, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00524 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00525   print_output_histo_DFBRT(file_out_tex, testsets[6]);
00526 #endif
00527 
00528   print_output_speedup_total(file_out_tex, testsets+5, 2);
00529 
00530   run_testset(&testsets[7], 2, 1, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
00531 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00532   print_output_histo_DFBRT(file_out_tex, testsets[7]);
00533 #endif
00534 
00535   run_testset(&testsets[8], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00536 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00537   print_output_histo_DFBRT(file_out_tex, testsets[8]);
00538 #endif
00539 
00540   run_testset(&testsets[9], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00541 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00542   print_output_histo_DFBRT(file_out_tex, testsets[9]);
00543 #endif
00544 
00545   print_output_speedup_total(file_out_tex, testsets+7, 3);
00546 
00547   run_testset(&testsets[10], 3, 0, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00548 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00549   print_output_histo_DFBRT(file_out_tex, testsets[10]);
00550 #endif
00551 
00552   run_testset(&testsets[11], 3, 0, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00553 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00554   print_output_histo_DFBRT(file_out_tex, testsets[11]);
00555 #endif
00556 
00557   print_output_speedup_total(file_out_tex, testsets+10, 2);
00558 
00559   run_testset(&testsets[12], 3, 1, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00560 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00561   print_output_histo_DFBRT(file_out_tex, testsets[12]);
00562 #endif
00563 
00564   run_testset(&testsets[13], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00565 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00566   print_output_histo_DFBRT(file_out_tex, testsets[13]);
00567 #endif
00568 
00569   run_testset(&testsets[14], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00570 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00571   print_output_histo_DFBRT(file_out_tex, testsets[14]);
00572 #endif
00573 
00574   print_output_speedup_total(file_out_tex, testsets+12, 3);
00575 
00576 }
00577 
00578 void test2(int *nthreads_array, int n_threads_array_size, int m)
00579 {
00580   s_testset testsets[15];
00581 
00582   run_testset(&testsets[0], 1, 0, 16777216, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00583 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00584   print_output_histo_DFBRT(file_out_tex, testsets[0]);
00585 #endif
00586 
00587   run_testset(&testsets[1], 1, 0, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00588 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00589   print_output_histo_DFBRT(file_out_tex, testsets[1]);
00590 #endif
00591 
00592   print_output_speedup_total(file_out_tex, testsets, 2);
00593 
00594   run_testset(&testsets[2], 1, 1, 16777216, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00595 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00596   print_output_histo_DFBRT(file_out_tex, testsets[2]);
00597 #endif
00598 
00599   run_testset(&testsets[3], 1, 1, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00600 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00601   print_output_histo_DFBRT(file_out_tex, testsets[3]);
00602 #endif
00603 
00604   run_testset(&testsets[4], 1, 1, 16777216, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00605 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00606   print_output_histo_DFBRT(file_out_tex, testsets[4]);
00607 #endif
00608 
00609   print_output_speedup_total(file_out_tex, testsets+2, 3);
00610 
00611   run_testset(&testsets[5], 2, 0, 4096, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
00612 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00613   print_output_histo_DFBRT(file_out_tex, testsets[5]);
00614 #endif
00615 
00616   run_testset(&testsets[6], 2, 0, 4096, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00617 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00618   print_output_histo_DFBRT(file_out_tex, testsets[6]);
00619 #endif
00620 
00621   print_output_speedup_total(file_out_tex, testsets+5, 2);
00622 
00623   run_testset(&testsets[7], 2, 1, 4096, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
00624 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00625   print_output_histo_DFBRT(file_out_tex, testsets[7]);
00626 #endif
00627 
00628   run_testset(&testsets[8], 2, 1, 4096, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00629 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00630   print_output_histo_DFBRT(file_out_tex, testsets[8]);
00631 #endif
00632 
00633   run_testset(&testsets[9], 2, 1, 4096, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00634 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00635   print_output_histo_DFBRT(file_out_tex, testsets[9]);
00636 #endif
00637 
00638   print_output_speedup_total(file_out_tex, testsets+7, 3);
00639 
00640   run_testset(&testsets[10], 3, 0, 256, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00641 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00642   print_output_histo_DFBRT(file_out_tex, testsets[10]);
00643 #endif
00644 
00645   run_testset(&testsets[11], 3, 0, 256, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00646 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00647   print_output_histo_DFBRT(file_out_tex, testsets[11]);
00648 #endif
00649 
00650   print_output_speedup_total(file_out_tex, testsets+10, 2);
00651 
00652   run_testset(&testsets[12], 3, 1, 256, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
00653 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00654   print_output_histo_DFBRT(file_out_tex, testsets[12]);
00655 #endif
00656 
00657   run_testset(&testsets[13], 3, 1, 256, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00658 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00659   print_output_histo_DFBRT(file_out_tex, testsets[13]);
00660 #endif
00661 
00662   run_testset(&testsets[14], 3, 1, 256, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00663 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00664   print_output_histo_DFBRT(file_out_tex, testsets[14]);
00665 #endif
00666 
00667   print_output_speedup_total(file_out_tex, testsets+12, 3);
00668 
00669 }
00670 
00671 int main(int argc, char** argv)
00672 {
00673   int *nthreads_array;
00674   int n_threads_array_size = get_nthreads_array(&nthreads_array);
00675   int k;
00676 
00677 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
00678   fprintf(stderr, "WARNING: Detailed time measurements for NFFT are not activated.\n");
00679   fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n");
00680   fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
00681   fprintf(stderr, "and run \"make clean all\"\n\n");
00682 #endif
00683 
00684   for (k = 0; k < n_threads_array_size; k++)
00685     fprintf(stderr, "%d ", nthreads_array[k]);
00686   fprintf(stderr, "\n");
00687 
00688   file_out_tex = fopen("nfft_benchomp_results_plots.tex", "w");
00689 
00690   test1(nthreads_array, n_threads_array_size, 2);
00691   test1(nthreads_array, n_threads_array_size, 4);
00692   test1(nthreads_array, n_threads_array_size, 6);
00693 //  test2(nthreads_array, n_threads_array_size, 2);
00694 
00695   fclose(file_out_tex);
00696 
00697   return 0;
00698 }