NFFT  3.3.2
nfst.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* Nonequispaced fast cosine transform */
00020 
00021 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
00022 
00023 /* configure header */
00024 #include "config.h"
00025 
00026 /* complex datatype (maybe) */
00027 #ifdef HAVE_COMPLEX_H
00028 #include<complex.h>
00029 #endif
00030 
00031 /* NFFT headers */
00032 #include "nfft3.h"
00033 #include "infft.h"
00034 
00035 #ifdef _OPENMP
00036 #include <omp.h>
00037 #endif
00038 
00039 #ifdef OMP_ASSERT
00040 #include <assert.h>
00041 #endif
00042 
00043 #undef X
00044 #define X(name) NFST(name)
00045 
00047 static inline INT intprod(const INT *vec, const INT a, const INT d)
00048 {
00049   INT t, p;
00050 
00051   p = 1;
00052   for (t = 0; t < d; t++)
00053     p *= vec[t] - a;
00054 
00055   return p;
00056 }
00057 
00058 /* handy shortcuts */
00059 #define BASE(x) SIN(x)
00060 #define NN(x) (x + 1)
00061 #define OFFSET 1
00062 #define FOURIER_TRAFO FFTW_RODFT00
00063 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
00064 
00065 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
00066 
00067 #define MACRO_with_FG_PSI fg_psi[t][lj[t]]
00068 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
00069 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
00070   - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
00071 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
00072 
00088 void X(trafo_direct)(const X(plan) *ths)
00089 {
00090   R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
00091 
00092   memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
00093 
00094   if (ths->d == 1)
00095   {
00096     /* specialize for univariate case, rationale: faster */
00097     INT j;
00098 #ifdef _OPENMP
00099     #pragma omp parallel for default(shared) private(j)
00100 #endif
00101     for (j = 0; j < ths->M_total; j++)
00102     {
00103       INT k_L;
00104       for (k_L = 0; k_L < ths->N_total; k_L++)
00105       {
00106         R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
00107         f[j] += f_hat[k_L] * BASE(omega);
00108       }
00109     }
00110   }
00111   else
00112   {
00113     /* multivariate case */
00114     INT j;
00115 #ifdef _OPENMP
00116     #pragma omp parallel for default(shared) private(j)
00117 #endif
00118     for (j = 0; j < ths->M_total; j++)
00119     {
00120       R x[ths->d], omega, Omega[ths->d + 1];
00121       INT t, t2, k_L, k[ths->d];
00122       Omega[0] = K(1.0);
00123       for (t = 0; t < ths->d; t++)
00124       {
00125         k[t] = OFFSET;
00126         x[t] = K2PI * ths->x[j * ths->d + t];
00127         Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
00128       }
00129       omega = Omega[ths->d];
00130 
00131       for (k_L = 0; k_L < ths->N_total; k_L++)
00132       {
00133         f[j] += f_hat[k_L] * omega;
00134         {
00135           for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
00136             k[t] = OFFSET;
00137 
00138           k[t]++;
00139 
00140           for (t2 = t; t2 < ths->d; t2++)
00141             Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
00142 
00143           omega = Omega[ths->d];
00144         }
00145       }
00146     }
00147   }
00148 }
00149 
00150 void X(adjoint_direct)(const X(plan) *ths)
00151 {
00152   R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
00153 
00154   memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
00155 
00156   if (ths->d == 1)
00157   {
00158     /* specialize for univariate case, rationale: faster */
00159 #ifdef _OPENMP
00160       INT k_L;
00161       #pragma omp parallel for default(shared) private(k_L)
00162       for (k_L = 0; k_L < ths->N_total; k_L++)
00163       {
00164         INT j;
00165         for (j = 0; j < ths->M_total; j++)
00166         {
00167           R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
00168           f_hat[k_L] += f[j] * BASE(omega);
00169         }
00170       }
00171 #else
00172       INT j;
00173       for (j = 0; j < ths->M_total; j++)
00174       {
00175         INT k_L;
00176         for (k_L = 0; k_L < ths->N_total; k_L++)
00177         {
00178           R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
00179           f_hat[k_L] += f[j] * BASE(omega);
00180         }
00181       }
00182 #endif
00183   }
00184   else
00185   {
00186     /* multivariate case */
00187     INT j, k_L;
00188 #ifdef _OPENMP
00189     #pragma omp parallel for default(shared) private(j, k_L)
00190     for (k_L = 0; k_L < ths->N_total; k_L++)
00191     {
00192       INT k[ths->d], k_temp, t;
00193 
00194       k_temp = k_L;
00195 
00196       for (t = ths->d - 1; t >= 0; t--)
00197       {
00198         k[t] = k_temp % ths->N[t];
00199         k_temp /= ths->N[t];
00200       }
00201 
00202       for (j = 0; j < ths->M_total; j++)
00203       {
00204         R omega = K(1.0);
00205         for (t = 0; t < ths->d; t++)
00206           omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
00207         f_hat[k_L] += f[j] * omega;
00208       }
00209     }
00210 #else
00211     for (j = 0; j < ths->M_total; j++)
00212     {
00213       R x[ths->d], omega, Omega[ths->d+1];
00214       INT t, t2, k[ths->d];
00215       Omega[0] = K(1.0);
00216       for (t = 0; t < ths->d; t++)
00217       {
00218         k[t] = OFFSET;
00219         x[t] = K2PI * ths->x[j * ths->d + t];
00220         Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
00221       }
00222       omega = Omega[ths->d];
00223       for (k_L = 0; k_L < ths->N_total; k_L++)
00224       {
00225         f_hat[k_L] += f[j] * omega;
00226 
00227         for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
00228           k[t] = OFFSET;
00229 
00230         k[t]++;
00231 
00232         for (t2 = t; t2 < ths->d; t2++)
00233           Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
00234 
00235         omega = Omega[ths->d];
00236       }
00237     }
00238 #endif
00239   }
00240 }
00241 
00261 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
00262   const INT act_dim)
00263 {
00264   const R xj = ths->x[j * ths->d + act_dim];
00265   INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
00266 
00267   (*up) = c - (ths->m);
00268   (*op) = c + 1 + (ths->m);
00269 }
00270 
00271 #define MACRO_D_compute_A \
00272 { \
00273   g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
00274 }
00275 
00276 #define MACRO_D_compute_T \
00277 { \
00278   f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
00279 }
00280 
00281 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
00282 
00283 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
00284 
00285 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
00286 
00287 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
00288 
00289 #define MACRO_init_k_ks \
00290 { \
00291   for (t = 0; t < ths->d; t++) \
00292   { \
00293     kg[t] = 0; \
00294   } \
00295   i = 0; \
00296 }
00297 
00298 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
00299 { \
00300   for (t = i; t < ths->d; t++) \
00301   { \
00302     MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
00303     kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
00304   } \
00305 }
00306 
00307 #define MACRO_update_c_phi_inv_k_A(which_phi) \
00308 { \
00309   c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
00310 }
00311 
00312 #define MACRO_update_c_phi_inv_k_T(which_phi) \
00313 { \
00314   c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
00315 }
00316 
00317 #define MACRO_count_k_ks \
00318 { \
00319   kg[ths->d - 1]++; \
00320   i = ths->d - 1; \
00321 \
00322   while ((kg[i] == ths->N[i] - 1) && (i > 0)) \
00323   { \
00324     kg[i - 1]++; \
00325     kg[i] = 0; \
00326     i--; \
00327   } \
00328 }
00329 
00330 /* sub routines for the fast transforms  matrix vector multiplication with D, D^T */
00331 #define MACRO_D(which_one) \
00332 static inline void D_ ## which_one (X(plan) *ths) \
00333 { \
00334   R *g_hat, *f_hat; /* local copy */ \
00335   R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
00336   INT t; /* index dimensions */ \
00337   INT i; \
00338   INT k_L; /* plain index */ \
00339   INT kg[ths->d]; /* multi index in g_hat */ \
00340   INT kg_plain[ths->d+1]; /* postfix plain index */ \
00341 \
00342   f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
00343   MACRO_D_init_result_ ## which_one; \
00344 \
00345   c_phi_inv_k[0] = K(1.0); \
00346   kg_plain[0] = 0; \
00347 \
00348   MACRO_init_k_ks; \
00349 \
00350   if (ths->flags & PRE_PHI_HUT) \
00351   { \
00352     for (k_L = 0; k_L < ths->N_total; k_L++) \
00353     { \
00354       MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
00355       MACRO_D_compute_ ## which_one; \
00356       MACRO_count_k_ks; \
00357     } \
00358   } \
00359   else \
00360   { \
00361     for (k_L = 0; k_L < ths->N_total; k_L++) \
00362     { \
00363       MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
00364       MACRO_D_compute_ ## which_one; \
00365       MACRO_count_k_ks; \
00366     } \
00367   } \
00368 }
00369 
00370 MACRO_D(A)
00371 MACRO_D(T)
00372 
00373 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */
00374 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
00375 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
00376 
00377 #define MACRO_B_PRE_FULL_PSI_compute_A \
00378 { \
00379   (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
00380 }
00381 
00382 #define MACRO_B_PRE_FULL_PSI_compute_T \
00383 { \
00384   g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \
00385 }
00386 
00387 #define MACRO_B_compute_A \
00388 { \
00389   (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
00390 }
00391 
00392 #define MACRO_B_compute_T \
00393 { \
00394   g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
00395 }
00396 
00397 #define MACRO_init_uo_l_lj_t \
00398 { \
00399   for (t2 = 0; t2 < ths->d; t2++) \
00400   { \
00401     uo(ths, j, &u[t2], &o[t2], t2); \
00402     \
00403     /* determine index in g-array corresponding to u[(t2)] */ \
00404     if (u[(t2)] < 0) \
00405       lg_offset[(t2)] = \
00406         (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
00407     else \
00408       lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
00409       if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
00410         lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
00411     \
00412     if (lg_offset[t2] <= 0) \
00413     { \
00414       l[t2] = -lg_offset[t2]; \
00415       count_lg[t2] = -1; \
00416     } \
00417     else \
00418     { \
00419       l[t2] = +lg_offset[t2]; \
00420       count_lg[t2] = +1; \
00421     } \
00422  \
00423     lj[t2] = 0; \
00424    } \
00425    t2 = 0; \
00426 }
00427 
00428 #define FOO_A ((R)count_lg[t])
00429 
00430 #define FOO_T ((R)count_lg[t])
00431 
00432 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
00433 { \
00434   for (t = t2; t < ths->d; t++) \
00435   { \
00436     if ((l[t] != 0) && (l[t] != NN(ths->n[t]))) \
00437     { \
00438       phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
00439       ll_plain[t+1]  = ll_plain[t] * ths->n[t] + l[t] - 1; \
00440     } \
00441     else \
00442     { \
00443       phi_prod[t + 1] = K(0.0); \
00444       ll_plain[t+1]  = ll_plain[t] * ths->n[t]; \
00445     } \
00446   } \
00447 }
00448 
00449 #define MACRO_count_uo_l_lj_t \
00450 { \
00451   /* turn around if we hit one of the boundaries */ \
00452   if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
00453     count_lg[(ths->d-1)] *= -1; \
00454  \
00455   /* move array index */ \
00456   l[(ths->d-1)] += count_lg[(ths->d-1)]; \
00457  \
00458   lj[ths->d - 1]++; \
00459   t2 = ths->d - 1; \
00460  \
00461   while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
00462   { \
00463     lj[t2 - 1]++; \
00464     lj[t2] = 0; \
00465     /* ansonsten lg[i-1] verschieben */ \
00466  \
00467     /* turn around if we hit one of the boundaries */ \
00468     if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
00469       count_lg[(t2 - 1)] *= -1; \
00470     /* move array index */ \
00471     l[(t2 - 1)] += count_lg[(t2 - 1)]; \
00472  \
00473     /* lg[i] = anfangswert */ \
00474     if (lg_offset[t2] <= 0) \
00475     { \
00476       l[t2] = -lg_offset[t2]; \
00477       count_lg[t2] = -1; \
00478     } \
00479     else \
00480     { \
00481       l[t2] = +lg_offset[t2]; \
00482       count_lg[t2] = +1; \
00483     } \
00484  \
00485     t2--; \
00486   } \
00487 }
00488 
00489 #define MACRO_B(which_one) \
00490 static inline void B_ ## which_one (X(plan) *ths) \
00491 { \
00492   INT lprod; /* 'regular bandwidth' of matrix B  */ \
00493   INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
00494   INT t, t2; /* index dimensions */ \
00495   INT j; /* index nodes */ \
00496   INT l_L, ix; /* index one row of B */ \
00497   INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
00498   INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
00499   INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
00500   R phi_prod[ths->d+1]; /* postfix product of PHI */ \
00501   R *f, *g; /* local copy */ \
00502   R *fj; /* local copy */ \
00503   R y[ths->d]; \
00504   R fg_psi[ths->d][2*ths->m+2]; \
00505   R fg_exp_l[ths->d][2*ths->m+2]; \
00506   INT l_fg,lj_fg; \
00507   R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
00508   R ip_w; \
00509   INT ip_u; \
00510   INT ip_s = ths->K/(ths->m+2); \
00511   INT lg_offset[ths->d]; /* offset in g according to u */ \
00512   INT count_lg[ths->d]; /* count summands (2m+2) */ \
00513 \
00514   f = (R*)ths->f; g = (R*)ths->g; \
00515 \
00516   MACRO_B_init_result_ ## which_one \
00517 \
00518   if (ths->flags & PRE_FULL_PSI) \
00519   { \
00520     for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
00521     { \
00522       for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
00523       { \
00524         MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
00525       } \
00526     } \
00527     return; \
00528   } \
00529 \
00530   phi_prod[0] = K(1.0); \
00531   ll_plain[0] = 0; \
00532 \
00533   for (t = 0, lprod = 1; t < ths->d; t++) \
00534     lprod *= (2 * ths->m + 2); \
00535 \
00536   if (ths->flags & PRE_PSI) \
00537   { \
00538     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00539     { \
00540       MACRO_init_uo_l_lj_t; \
00541  \
00542       for (l_L = 0; l_L < lprod; l_L++) \
00543       { \
00544         MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
00545  \
00546         MACRO_B_compute_ ## which_one; \
00547  \
00548         MACRO_count_uo_l_lj_t; \
00549       } /* for(l_L) */ \
00550     } /* for(j) */ \
00551     return; \
00552   } /* if(PRE_PSI) */ \
00553  \
00554   if (ths->flags & PRE_FG_PSI) \
00555   { \
00556     for (t = 0; t < ths->d; t++) \
00557     { \
00558       tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
00559       tmpEXP2sq = tmpEXP2 * tmpEXP2; \
00560       tmp2 = K(1.0); \
00561       tmp3 = K(1.0); \
00562       fg_exp_l[t][0] = K(1.0); \
00563  \
00564       for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
00565       { \
00566         tmp3 = tmp2 * tmpEXP2; \
00567         tmp2 *= tmpEXP2sq; \
00568         fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
00569       } \
00570     } \
00571  \
00572     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00573     { \
00574       MACRO_init_uo_l_lj_t; \
00575  \
00576       for (t = 0; t < ths->d; t++) \
00577       { \
00578         fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
00579         tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
00580         tmp1 = K(1.0); \
00581  \
00582         for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
00583         { \
00584           tmp1 *= tmpEXP1; \
00585           fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
00586         } \
00587       } \
00588  \
00589       for (l_L= 0; l_L < lprod; l_L++) \
00590       { \
00591         MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
00592  \
00593         MACRO_B_compute_ ## which_one; \
00594  \
00595         MACRO_count_uo_l_lj_t; \
00596       } \
00597     } \
00598     return; \
00599   } \
00600  \
00601   if (ths->flags & FG_PSI) \
00602   { \
00603     for (t = 0; t < ths->d; t++) \
00604     { \
00605       tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
00606       tmpEXP2sq = tmpEXP2 * tmpEXP2; \
00607       tmp2 = K(1.0); \
00608       tmp3 = K(1.0); \
00609       fg_exp_l[t][0] = K(1.0); \
00610       for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
00611       { \
00612         tmp3 = tmp2 * tmpEXP2; \
00613         tmp2 *= tmpEXP2sq; \
00614         fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
00615       } \
00616     } \
00617  \
00618     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00619     { \
00620       MACRO_init_uo_l_lj_t; \
00621  \
00622       for (t = 0; t < ths->d; t++) \
00623       { \
00624         fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
00625  \
00626         tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
00627         tmp1 = K(1.0); \
00628         for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
00629         { \
00630           tmp1 *= tmpEXP1; \
00631           fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
00632         } \
00633       } \
00634   \
00635       for (l_L = 0; l_L < lprod; l_L++) \
00636       { \
00637         MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
00638  \
00639         MACRO_B_compute_ ## which_one; \
00640  \
00641         MACRO_count_uo_l_lj_t; \
00642       } \
00643     } \
00644     return; \
00645   } \
00646  \
00647   if (ths->flags & PRE_LIN_PSI) \
00648   { \
00649     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00650     { \
00651       MACRO_init_uo_l_lj_t; \
00652   \
00653       for (t = 0; t < ths->d; t++) \
00654       { \
00655         y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
00656                 * ((R)ths->K))/(ths->m + 2); \
00657         ip_u  = LRINT(FLOOR(y[t])); \
00658         ip_w  = y[t]-ip_u; \
00659         for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
00660         { \
00661           fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
00662             * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
00663             * (ip_w); \
00664         } \
00665       } \
00666   \
00667       for (l_L = 0; l_L < lprod; l_L++) \
00668       { \
00669         MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
00670  \
00671         MACRO_B_compute_ ## which_one; \
00672  \
00673         MACRO_count_uo_l_lj_t; \
00674       }  /* for(l_L) */  \
00675     } /* for(j) */  \
00676     return; \
00677   } /* if(PRE_LIN_PSI) */ \
00678   \
00679   /* no precomputed psi at all */ \
00680   for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
00681   { \
00682     MACRO_init_uo_l_lj_t; \
00683  \
00684     for (l_L = 0; l_L < lprod; l_L++) \
00685     { \
00686       MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
00687  \
00688       MACRO_B_compute_ ## which_one; \
00689  \
00690       MACRO_count_uo_l_lj_t; \
00691     } /* for (l_L) */ \
00692   } /* for (j) */ \
00693 } /* B */
00694 
00695 MACRO_B(A)
00696 MACRO_B(T)
00697 
00701 void X(trafo)(X(plan) *ths)
00702 {
00703   switch(ths->d)
00704   {
00705     default:
00706     {
00707       /* use ths->my_fftw_r2r_plan */
00708       ths->g_hat = ths->g1;
00709       ths->g = ths->g2;
00710 
00711       /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
00712        * k \in I_N \f$ */
00713       TIC(0)
00714       D_A(ths);
00715       TOC(0)
00716 
00717       /* Compute by d-variate discrete Fourier transform
00718        * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
00719        * \text{ for } l \in I_n \f$ */
00720       TIC_FFTW(1)
00721       FFTW(execute)(ths->my_fftw_r2r_plan);
00722       TOC_FFTW(1)
00723 
00724       /*if (ths->flags & PRE_FULL_PSI)
00725         full_psi__A(ths);*/
00726 
00727       /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
00728        * \text{ for } j=0,\hdots,M-1 \f$ */
00729       TIC(2)
00730       B_A(ths);
00731       TOC(2)
00732 
00733       /*if (ths->flags & PRE_FULL_PSI)
00734       {
00735         Y(free)(ths->psi_index_g);
00736         Y(free)(ths->psi_index_f);
00737       }*/
00738     }
00739   }
00740 } /* trafo */
00741 
00742 void X(adjoint)(X(plan) *ths)
00743 {
00744   switch(ths->d)
00745   {
00746     default:
00747     {
00748       /* use ths->my_fftw_plan */
00749       ths->g_hat = ths->g2;
00750       ths->g = ths->g1;
00751 
00752       /*if (ths->flags & PRE_FULL_PSI)
00753         full_psi__T(ths);*/
00754 
00755       /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
00756        * \text{ for } l \in I_n,m(x_j) \f$ */
00757       TIC(2)
00758       B_T(ths);
00759       TOC(2)
00760 
00761       /* Compute by d-variate discrete cosine transform
00762        * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
00763        * \text{ for }  k \in I_N\f$ */
00764       TIC_FFTW(1)
00765       FFTW(execute)(ths->my_fftw_r2r_plan);
00766       TOC_FFTW(1)
00767 
00768       /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
00769        * k \in I_N \f$ */
00770       TIC(0)
00771       D_T(ths);
00772       TOC(0)
00773     }
00774   }
00775 } /* adjoint */
00776 
00779 static inline void precompute_phi_hut(X(plan) *ths)
00780 {
00781   INT ks[ths->d]; /* index over all frequencies */
00782   INT t; /* index over all dimensions */
00783 
00784   ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
00785 
00786   for (t = 0; t < ths->d; t++)
00787   {
00788     ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
00789 
00790     for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
00791     {
00792       ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
00793     }
00794   }
00795 } /* phi_hut */
00796 
00802 void X(precompute_lin_psi)(X(plan) *ths)
00803 {
00804   INT t; 
00805   INT j; 
00806   R step; 
00808   for (t = 0; t < ths->d; t++)
00809   {
00810     step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
00811 
00812     for (j = 0; j <= ths->K; j++)
00813     {
00814       ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
00815     } /* for(j) */
00816   } /* for(t) */
00817 }
00818 
00819 void X(precompute_fg_psi)(X(plan) *ths)
00820 {
00821   INT t; /* index over all dimensions */
00822   INT u, o; /* depends on x_j */
00823 
00824 //  sort(ths);
00825 
00826   for (t = 0; t < ths->d; t++)
00827   {
00828     INT j;
00829 //    #pragma omp parallel for default(shared) private(j,u,o)
00830     for (j = 0; j < ths->M_total; j++)
00831     {
00832       uo(ths, j, &u, &o, t);
00833 
00834       ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
00835       ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
00836       } /* for(j) */
00837   }
00838   /* for(t) */
00839 } /* nfft_precompute_fg_psi */
00840 
00841 void X(precompute_psi)(X(plan) *ths)
00842 {
00843   INT t; /* index over all dimensions */
00844   INT lj; /* index 0<=lj<u+o+1 */
00845   INT u, o; /* depends on x_j */
00846 
00847   //sort(ths);
00848 
00849   for (t = 0; t < ths->d; t++)
00850   {
00851     INT j;
00852 
00853     for (j = 0; j < ths->M_total; j++)
00854     {
00855       uo(ths, j, &u, &o, t);
00856 
00857       for(lj = 0; lj < (2 * ths->m + 2); lj++)
00858         ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
00859             (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
00860     } /* for (j) */
00861   } /* for (t) */
00862 } /* precompute_psi */
00863 
00864 void X(precompute_full_psi)(X(plan) *ths)
00865 {
00866 //#ifdef _OPENMP
00867 //  sort(ths);
00868 //
00869 //  nfft_precompute_full_psi_omp(ths);
00870 //#else
00871   INT t, t2; /* index over all dimensions */
00872   INT j; /* index over all nodes */
00873   INT l_L; /* plain index 0 <= l_L < lprod */
00874   INT l[ths->d]; /* multi index u<=l<=o */
00875   INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
00876   INT ll_plain[ths->d+1]; /* postfix plain index */
00877   INT lprod; /* 'bandwidth' of matrix B */
00878   INT u[ths->d], o[ths->d]; /* depends on x_j */
00879   INT count_lg[ths->d];
00880   INT lg_offset[ths->d];
00881 
00882   R phi_prod[ths->d+1];
00883 
00884   INT ix, ix_old;
00885 
00886   //sort(ths);
00887 
00888   phi_prod[0] = K(1.0);
00889   ll_plain[0]  = 0;
00890 
00891   for (t = 0, lprod = 1; t < ths->d; t++)
00892     lprod *= 2 * ths->m + 2;
00893 
00894   for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
00895   {
00896     MACRO_init_uo_l_lj_t;
00897 
00898     for (l_L = 0; l_L < lprod; l_L++, ix++)
00899     {
00900       MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
00901 
00902       ths->psi_index_g[ix] = ll_plain[ths->d];
00903       ths->psi[ix] = phi_prod[ths->d];
00904 
00905       MACRO_count_uo_l_lj_t;
00906     } /* for (l_L) */
00907 
00908     ths->psi_index_f[j] = ix - ix_old;
00909     ix_old = ix;
00910   } /* for(j) */
00911 //#endif
00912 }
00913 
00914 void X(precompute_one_psi)(X(plan) *ths)
00915 {
00916   if(ths->flags & PRE_PSI)
00917     X(precompute_psi)(ths);
00918   if(ths->flags & PRE_FULL_PSI)
00919     X(precompute_full_psi)(ths);
00920   if(ths->flags & PRE_FG_PSI)
00921     X(precompute_fg_psi)(ths);
00922   if(ths->flags & PRE_LIN_PSI)
00923     X(precompute_lin_psi)(ths);
00924 }
00925 
00926 static inline void init_help(X(plan) *ths)
00927 {
00928   INT t; /* index over all dimensions */
00929   INT lprod; /* 'bandwidth' of matrix B */
00930 
00931   if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00932     ths->flags |= NFFT_SORT_NODES;
00933 
00934   ths->N_total = intprod(ths->N, OFFSET, ths->d);
00935   ths->n_total = intprod(ths->n, 0, ths->d);
00936 
00937   ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
00938 
00939   for (t = 0; t < ths->d; t++)
00940     ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
00941 
00942   /* Assign r2r transform kinds for each dimension */
00943   ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
00944   for (t = 0; t < ths->d; t++)
00945     ths->r2r_kind[t] = FOURIER_TRAFO;
00946 
00947   WINDOW_HELP_INIT;
00948 
00949   if (ths->flags & MALLOC_X)
00950     ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
00951 
00952   if (ths->flags & MALLOC_F_HAT)
00953     ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
00954 
00955   if (ths->flags & MALLOC_F)
00956     ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
00957 
00958   if (ths->flags & PRE_PHI_HUT)
00959     precompute_phi_hut(ths);
00960 
00961   if(ths->flags & PRE_LIN_PSI)
00962   {
00963       ths->K = (1U<< 10) * (ths->m+2);
00964       ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
00965   }
00966 
00967   if(ths->flags & PRE_FG_PSI)
00968     ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
00969 
00970   if (ths->flags & PRE_PSI)
00971     ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
00972 
00973   if(ths->flags & PRE_FULL_PSI)
00974   {
00975       for (t = 0, lprod = 1; t < ths->d; t++)
00976         lprod *= 2 * ths->m + 2;
00977 
00978       ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
00979 
00980       ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
00981       ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
00982   }
00983 
00984   if (ths->flags & FFTW_INIT)
00985   {
00986     ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
00987 
00988     if (ths->flags & FFT_OUT_OF_PLACE)
00989       ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
00990     else
00991       ths->g2 = ths->g1;
00992 
00993     {
00994       int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
00995 
00996       for (t = 0; t < ths->d; t++)
00997         _n[t] = (int)(ths->n[t]);
00998 
00999       ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
01000       Y(free)(_n);
01001     }
01002   }
01003 
01004 //  if(ths->flags & NFFT_SORT_NODES)
01005 //    ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
01006 //  else
01007 //    ths->index_x = NULL;
01008 
01009   ths->mv_trafo = (void (*) (void* ))X(trafo);
01010   ths->mv_adjoint = (void (*) (void* ))X(adjoint);
01011 }
01012 
01013 void X(init)(X(plan) *ths, int d, int *N, int M_total)
01014 {
01015   int t; /* index over all dimensions */
01016 
01017   ths->d = (INT)d;
01018 
01019   ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
01020 
01021   for (t = 0; t < d; t++)
01022     ths->N[t] = (INT)N[t];
01023 
01024   ths->M_total = (INT)M_total;
01025 
01026   ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
01027 
01028   for (t = 0; t < d; t++)
01029     ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
01030 
01031   ths->m = WINDOW_HELP_ESTIMATE_m;
01032 
01033   if (d > 1)
01034   {
01035 //#ifdef _OPENMP
01036 //    ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
01037 //                      FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES |
01038 //          NFFT_OMP_BLOCKWISE_ADJOINT;
01039 //#else
01040     ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
01041                       FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES;
01042 //#endif
01043   }
01044   else
01045     ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
01046                       FFTW_INIT | FFT_OUT_OF_PLACE;
01047 
01048   ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
01049 
01050   init_help(ths);
01051 }
01052 
01053 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
01054   unsigned flags, unsigned fftw_flags)
01055 {
01056   INT t; /* index over all dimensions */
01057 
01058   ths->d = (INT)d;
01059   ths->M_total = (INT)M_total;
01060   ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
01061 
01062   for (t = 0; t < d; t++)
01063     ths->N[t] = (INT)N[t];
01064 
01065   ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
01066 
01067   for (t = 0; t < d; t++)
01068     ths->n[t] = (INT)n[t];
01069 
01070   ths->m = (INT)m;
01071 
01072   ths->flags = flags;
01073   ths->fftw_flags = fftw_flags;
01074 
01075   init_help(ths);
01076 }
01077 
01078 void X(init_1d)(X(plan) *ths, int N1, int M_total)
01079 {
01080   int N[1];
01081 
01082   N[0] = N1;
01083 
01084   X(init)(ths, 1, N, M_total);
01085 }
01086 
01087 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
01088 {
01089   int N[2];
01090 
01091   N[0] = N1;
01092   N[1] = N2;
01093 
01094   X(init)(ths, 2, N, M_total);
01095 }
01096 
01097 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
01098 {
01099   int N[3];
01100 
01101   N[0] = N1;
01102   N[1] = N2;
01103   N[2] = N3;
01104 
01105   X(init)(ths, 3, N, M_total);
01106 }
01107 
01108 const char* X(check)(X(plan) *ths)
01109 {
01110   INT j;
01111 
01112   if (!ths->f)
01113       return "Member f not initialized.";
01114 
01115   if (!ths->x)
01116       return "Member x not initialized.";
01117 
01118   if (!ths->f_hat)
01119       return "Member f_hat not initialized.";
01120 
01121   for (j = 0; j < ths->M_total * ths->d; j++)
01122   {
01123     if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
01124     {
01125       return "ths->x out of range [0.0,0.5)";
01126     }
01127   }
01128 
01129   for (j = 0; j < ths->d; j++)
01130   {
01131     if (ths->sigma[j] <= 1)
01132       return "Oversampling factor too small";
01133 
01134     if(ths->N[j] - 1 <= ths->m)
01135       return "Polynomial degree N is smaller than cut-off m";
01136 
01137     if(ths->N[j]%2 == 1)
01138       return "polynomial degree N has to be even";
01139   }
01140   return 0;
01141 }
01142 
01143 void X(finalize)(X(plan) *ths)
01144 {
01145   INT t; /* index over dimensions */
01146 
01147 //  if(ths->flags & NFFT_SORT_NODES)
01148 //    Y(free)(ths->index_x);
01149 
01150   if (ths->flags & FFTW_INIT)
01151   {
01152 #ifdef _OPENMP
01153     #pragma omp critical (nfft_omp_critical_fftw_plan)
01154 #endif
01155     FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
01156 
01157     if (ths->flags & FFT_OUT_OF_PLACE)
01158       Y(free)(ths->g2);
01159 
01160     Y(free)(ths->g1);
01161   }
01162 
01163   if(ths->flags & PRE_FULL_PSI)
01164   {
01165     Y(free)(ths->psi_index_g);
01166     Y(free)(ths->psi_index_f);
01167     Y(free)(ths->psi);
01168   }
01169 
01170   if (ths->flags & PRE_PSI)
01171     Y(free)(ths->psi);
01172 
01173   if(ths->flags & PRE_FG_PSI)
01174     Y(free)(ths->psi);
01175 
01176   if(ths->flags & PRE_LIN_PSI)
01177     Y(free)(ths->psi);
01178 
01179   if (ths->flags & PRE_PHI_HUT)
01180   {
01181     for (t = 0; t < ths->d; t++)
01182       Y(free)(ths->c_phi_inv[t]);
01183     Y(free)(ths->c_phi_inv);
01184   }
01185 
01186   if (ths->flags & MALLOC_F)
01187     Y(free)(ths->f);
01188 
01189   if(ths->flags & MALLOC_F_HAT)
01190     Y(free)(ths->f_hat);
01191 
01192   if (ths->flags & MALLOC_X)
01193     Y(free)(ths->x);
01194 
01195   WINDOW_HELP_FINALIZE;
01196 
01197   Y(free)(ths->N);
01198   Y(free)(ths->n);
01199   Y(free)(ths->sigma);
01200 
01201   Y(free)(ths->r2r_kind);
01202 } /* finalize */