NFFT  3.3.2
fastgauss.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00025 #include <stdio.h>
00026 #include <stdlib.h>
00027 #include <math.h>
00028 #include <complex.h>
00029 
00030 #define NFFT_PRECISION_DOUBLE
00031 
00032 #include "nfft3mp.h"
00033 
00042 #define DGT_PRE_CEXP     (1U<< 0)
00043 
00053 #define FGT_NDFT         (1U<< 1)
00054 
00063 #define FGT_APPROX_B     (1U<< 2)
00064 
00066 typedef struct
00067 {
00068   int N; 
00069   int M; 
00071   NFFT_C *alpha; 
00072   NFFT_C *f; 
00074   unsigned flags; 
00077   NFFT_C sigma; 
00079   NFFT_R *x; 
00080   NFFT_R *y; 
00082   NFFT_C *pre_cexp; 
00084   int n; 
00085   NFFT_R p; 
00087   NFFT_C *b; 
00089   NFFT(plan) *nplan1; 
00090   NFFT(plan) *nplan2; 
00092 } fgt_plan;
00093 
00101 static void dgt_trafo(fgt_plan *ths)
00102 {
00103   NFFT_INT j, k, l;
00104 
00105   for (j = 0; j < ths->M; j++)
00106     ths->f[j] = NFFT_K(0.0);
00107 
00108   if (ths->flags & DGT_PRE_CEXP)
00109     for (j = 0, l = 0; j < ths->M; j++)
00110       for (k = 0; k < ths->N; k++, l++)
00111         ths->f[j] += ths->alpha[k] * ths->pre_cexp[l];
00112   else
00113     for (j = 0; j < ths->M; j++)
00114       for (k = 0; k < ths->N; k++)
00115         ths->f[j] += ths->alpha[k]
00116             * NFFT_M(cexp)(
00117                 -ths->sigma * (ths->y[j] - ths->x[k])
00118                     * (ths->y[j] - ths->x[k]));
00119 }
00120 
00128 static void fgt_trafo(fgt_plan *ths)
00129 {
00130   NFFT_INT l;
00131 
00132   if (ths->flags & FGT_NDFT)
00133   {
00134     NFFT(adjoint_direct)(ths->nplan1);
00135 
00136     for (l = 0; l < ths->n; l++)
00137       ths->nplan1->f_hat[l] *= ths->b[l];
00138 
00139     NFFT(trafo_direct)(ths->nplan2);
00140   }
00141   else
00142   {
00143     NFFT(adjoint)(ths->nplan1);
00144 
00145     for (l = 0; l < ths->n; l++)
00146       ths->nplan1->f_hat[l] *= ths->b[l];
00147 
00148     NFFT(trafo)(ths->nplan2);
00149   }
00150 }
00151 
00166 static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m,
00167     unsigned flags)
00168 {
00169   int j, n_fftw;
00170   FFTW(plan) fplan;
00171 
00172   ths->M = M;
00173   ths->N = N;
00174   ths->sigma = sigma;
00175   ths->flags = flags;
00176 
00177   ths->x = (NFFT_R*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_R));
00178   ths->y = (NFFT_R*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_R));
00179   ths->alpha = (NFFT_C*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_C));
00180   ths->f = (NFFT_C*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_C));
00181 
00182   ths->n = n;
00183   ths->p = p;
00184 
00185   ths->b = (NFFT_C*) NFFT(malloc)((size_t)(ths->n) * sizeof(NFFT_C));
00186 
00187   ths->nplan1 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));
00188   ths->nplan2 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));
00189 
00190   n_fftw = (int)NFFT(next_power_of_2)(2 * ths->n);
00191 
00192   {
00193     NFFT(init_guru)(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT |
00194     PRE_PSI | MALLOC_X | MALLOC_F_HAT | FFTW_INIT, FFTW_MEASURE);
00195     NFFT(init_guru)(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT |
00196     PRE_PSI | MALLOC_X | FFTW_INIT, FFTW_MEASURE);
00197   }
00198 
00199   ths->nplan1->f = ths->alpha;
00200   ths->nplan2->f_hat = ths->nplan1->f_hat;
00201   ths->nplan2->f = ths->f;
00202 
00203   if (ths->flags & FGT_APPROX_B)
00204   {
00205     fplan = FFTW(plan_dft_1d)(ths->n, ths->b, ths->b, FFTW_FORWARD,
00206     FFTW_MEASURE);
00207 
00208     for (j = 0; j < ths->n; j++)
00209       ths->b[j] = NFFT_M(cexp)(
00210           -ths->p * ths->p * ths->sigma * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))
00211               / ((NFFT_R) (ths->n * ths->n))) / ((NFFT_R)(ths->n));
00212 
00213     NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);
00214     FFTW(execute)(fplan);
00215     NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);
00216 
00217     FFTW(destroy_plan)(fplan);
00218   }
00219   else
00220   {
00221     for (j = 0; j < ths->n; j++)
00222       ths->b[j] = NFFT_K(1.0) / ths->p * NFFT_M(csqrt)(NFFT_KPI / ths->sigma)
00223           * NFFT_M(cexp)(
00224               -NFFT_KPI * NFFT_KPI * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))
00225                   / (ths->p * ths->p * ths->sigma));
00226   }
00227 }
00228 
00240 static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
00241 {
00242   NFFT_R p;
00243   int n;
00244 
00245   p = NFFT_K(0.5) + NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma));
00246 
00247   if (p < NFFT_K(1.0))
00248     p = NFFT_K(1.0);
00249 
00250   n = (int)(2 * (NFFT_M(lrint)(NFFT_M(ceil)(p * NFFT_M(cabs)(sigma) / NFFT_KPI * NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma))))));
00251 
00252   fgt_init_guru(ths, N, M, sigma, n, p, 7, N * M <= ((NFFT_INT) (1U << 20)) ? DGT_PRE_CEXP : 0);
00253 }
00254 
00261 static void fgt_init_node_dependent(fgt_plan *ths)
00262 {
00263   int j, k, l;
00264 
00265   if (ths->flags & DGT_PRE_CEXP)
00266   {
00267     ths->pre_cexp = (NFFT_C*) NFFT(malloc)((size_t)(ths->M * ths->N) * sizeof(NFFT_C));
00268 
00269     for (j = 0, l = 0; j < ths->M; j++)
00270       for (k = 0; k < ths->N; k++, l++)
00271         ths->pre_cexp[l] = NFFT_M(cexp)(
00272             -ths->sigma * (ths->y[j] - ths->x[k]) * (ths->y[j] - ths->x[k]));
00273   }
00274 
00275   for (j = 0; j < ths->nplan1->M_total; j++)
00276     ths->nplan1->x[j] = ths->x[j] / ths->p;
00277   for (j = 0; j < ths->nplan2->M_total; j++)
00278     ths->nplan2->x[j] = ths->y[j] / ths->p;
00279 
00280   if (ths->nplan1->flags & PRE_PSI)
00281     NFFT(precompute_psi)(ths->nplan1);
00282   if (ths->nplan2->flags & PRE_PSI)
00283     NFFT(precompute_psi)(ths->nplan2);
00284 }
00285 
00292 static void fgt_finalize(fgt_plan *ths)
00293 {
00294   NFFT(finalize)(ths->nplan2);
00295   NFFT(finalize)(ths->nplan1);
00296 
00297   NFFT(free)(ths->nplan2);
00298   NFFT(free)(ths->nplan1);
00299 
00300   NFFT(free)(ths->b);
00301 
00302   NFFT(free)(ths->f);
00303   NFFT(free)(ths->y);
00304 
00305   NFFT(free)(ths->alpha);
00306   NFFT(free)(ths->x);
00307 }
00308 
00315 static void fgt_test_init_rand(fgt_plan *ths)
00316 {
00317   NFFT_INT j, k;
00318 
00319   for (k = 0; k < ths->N; k++)
00320     ths->x[k] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
00321 
00322   for (j = 0; j < ths->M; j++)
00323     ths->y[j] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
00324 
00325   for (k = 0; k < ths->N; k++)
00326     ths->alpha[k] = NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0)
00327         + _Complex_I * (NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0));
00328 }
00329 
00338 static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
00339 {
00340   int r;
00341   NFFT_R t0, t1, time_diff;
00342   NFFT_R t_out;
00343   NFFT_R tau = NFFT_K(0.01);
00344 
00345   t_out = NFFT_K(0.0);
00346   r = 0;
00347   while (t_out < tau)
00348   {
00349     r++;
00350     t0 = NFFT(clock_gettime_seconds)();
00351     if (dgt)
00352       dgt_trafo(ths);
00353     else
00354       fgt_trafo(ths);
00355     t1 = NFFT(clock_gettime_seconds)();
00356     time_diff = t1 - t0;
00357     t_out += time_diff;
00358   }
00359   t_out /= (NFFT_R)(r);
00360 
00361   return t_out;
00362 }
00363 
00373 static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
00374 {
00375   fgt_plan my_plan;
00376   NFFT_C *swap_dgt;
00377 
00378   fgt_init(&my_plan, N, M, sigma, eps);
00379   swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));
00380 
00381   fgt_test_init_rand(&my_plan);
00382   fgt_init_node_dependent(&my_plan);
00383 
00384   NFFT_CSWAP(swap_dgt, my_plan.f);
00385   dgt_trafo(&my_plan);
00386   NFFT(vpr_complex)(my_plan.f, my_plan.M, "discrete gauss transform");
00387   NFFT_CSWAP(swap_dgt, my_plan.f);
00388 
00389   fgt_trafo(&my_plan);
00390   NFFT(vpr_complex)(my_plan.f, my_plan.M, "fast gauss transform");
00391 
00392   printf("\n relative error: %1.3" NFFT__FES__ "\n",
00393       NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M,
00394           my_plan.alpha, my_plan.N));
00395 
00396   NFFT(free)(swap_dgt);
00397   fgt_finalize(&my_plan);
00398 }
00399 
00409 static void fgt_test_andersson(void)
00410 {
00411   fgt_plan my_plan;
00412   NFFT_C *swap_dgt;
00413   int N;
00414 
00415   NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
00416   int n = 128;
00417   int N_dgt_pre_exp = (int) (1U << 11);
00418   int N_dgt = (int) (1U << 19);
00419 
00420   printf("n=%d, sigma=%1.3" NFFT__FES__ "+i%1.3" NFFT__FES__ "\n", n, NFFT_M(creal)(sigma), NFFT_M(cimag)(sigma));
00421 
00422   for (N = ((NFFT_INT) (1U << 6)); N < ((NFFT_INT) (1U << 22)); N = N << 1)
00423   {
00424     printf("$%d$\t & ", N);
00425 
00426     fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, N < N_dgt_pre_exp ? DGT_PRE_CEXP : 0);
00427 
00428     swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));
00429 
00430     fgt_test_init_rand(&my_plan);
00431 
00432     fgt_init_node_dependent(&my_plan);
00433 
00434     if (N < N_dgt)
00435     {
00436       NFFT_CSWAP(swap_dgt, my_plan.f);
00437       if (N < N_dgt_pre_exp)
00438         my_plan.flags ^= DGT_PRE_CEXP;
00439 
00440       printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));
00441       if (N < N_dgt_pre_exp)
00442         my_plan.flags ^= DGT_PRE_CEXP;
00443       NFFT_CSWAP(swap_dgt, my_plan.f);
00444     }
00445     else
00446       printf("\t\t & ");
00447 
00448     if (N < N_dgt_pre_exp)
00449       printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));
00450     else
00451       printf("\t\t & ");
00452 
00453     my_plan.flags ^= FGT_NDFT;
00454     printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));
00455     my_plan.flags ^= FGT_NDFT;
00456 
00457     printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));
00458 
00459     printf("$%1.1" NFFT__FES__ "$\t \\\\ \n",
00460     NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00461     fflush(stdout);
00462 
00463     NFFT(free)(swap_dgt);
00464     fgt_finalize(&my_plan);
00465     FFTW(cleanup)();
00466   }
00467 }
00468 
00475 static void fgt_test_error(void)
00476 {
00477   fgt_plan my_plan;
00478   NFFT_C *swap_dgt;
00479   int n, mi;
00480 
00481   NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
00482   int N = 1000;
00483   int M = 1000;
00484   int m[2] = { 7, 3 };
00485 
00486   printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),
00487       NFFT_M(cimag)(sigma));
00488   printf("error=[\n");
00489 
00490   swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));
00491 
00492   for (n = 8; n <= 128; n += 4)
00493   {
00494     printf("%d\t", n);
00495     for (mi = 0; mi < 2; mi++)
00496     {
00497       fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
00498       fgt_test_init_rand(&my_plan);
00499       fgt_init_node_dependent(&my_plan);
00500 
00501       NFFT_CSWAP(swap_dgt, my_plan.f);
00502       dgt_trafo(&my_plan);
00503       NFFT_CSWAP(swap_dgt, my_plan.f);
00504 
00505       fgt_trafo(&my_plan);
00506 
00507       printf("%1.3" NFFT__FES__ "\t",
00508         NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00509       fflush(stdout);
00510 
00511       fgt_finalize(&my_plan);
00512       FFTW(cleanup)();
00513     }
00514     printf("\n");
00515   }
00516   printf("];\n");
00517 
00518   NFFT(free)(swap_dgt);
00519 }
00520 
00527 static void fgt_test_error_p(void)
00528 {
00529   fgt_plan my_plan;
00530   NFFT_C *swap_dgt;
00531   int n, pi;
00532 
00533   NFFT_C sigma = NFFT_K(20.0) + NFFT_K(40.0) * _Complex_I;
00534   int N = 1000;
00535   int M = 1000;
00536   NFFT_R p[3] = {NFFT_K(1.0), NFFT_K(1.5), NFFT_K(2.0)};
00537 
00538   printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),
00539       NFFT_M(cimag)(sigma));
00540   printf("error=[\n");
00541 
00542   swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));
00543 
00544   for (n = 8; n <= 128; n += 4)
00545   {
00546     printf("%d\t", n);
00547     for (pi = 0; pi < 3; pi++)
00548     {
00549       fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
00550       fgt_test_init_rand(&my_plan);
00551       fgt_init_node_dependent(&my_plan);
00552 
00553       NFFT_CSWAP(swap_dgt, my_plan.f);
00554       dgt_trafo(&my_plan);
00555       NFFT_CSWAP(swap_dgt, my_plan.f);
00556 
00557       fgt_trafo(&my_plan);
00558 
00559       printf("%1.3" NFFT__FES__ "\t",
00560       NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00561       fflush(stdout);
00562 
00563       fgt_finalize(&my_plan);
00564       FFTW(cleanup)();
00565     }
00566     printf("\n");
00567   }
00568   printf("];\n");
00569 }
00570 
00576 int main(int argc, char **argv)
00577 {
00578   if (argc != 2)
00579   {
00580     fprintf(stderr, "fastgauss type\n");
00581     fprintf(stderr, " type\n");
00582     fprintf(stderr, "  0 - Simple test.\n");
00583     fprintf(stderr, "  1 - Compares accuracy and execution time.\n");
00584     fprintf(stderr, "      Pipe to output_andersson.tex\n");
00585     fprintf(stderr, "  2 - Compares accuracy.\n");
00586     fprintf(stderr, "      Pipe to output_error.m\n");
00587     fprintf(stderr, "  3 - Compares accuracy.\n");
00588     fprintf(stderr, "      Pipe to output_error_p.m\n");
00589     return EXIT_FAILURE;
00590   }
00591 
00592   if (atoi(argv[1]) == 0)
00593     fgt_test_simple(10, 10, NFFT_K(5.0) + NFFT_K(3.0) * _Complex_I, NFFT_K(0.001));
00594 
00595   if (atoi(argv[1]) == 1)
00596     fgt_test_andersson();
00597 
00598   if (atoi(argv[1]) == 2)
00599     fgt_test_error();
00600 
00601   if (atoi(argv[1]) == 3)
00602     fgt_test_error_p();
00603 
00604   return EXIT_SUCCESS;
00605 }
00606 /* \} */