![]() |
NFFT
3.3.2
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00019 /* Nonequispaced fast cosine transform */ 00020 00021 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */ 00022 00023 /* configure header */ 00024 #include "config.h" 00025 00026 /* complex datatype (maybe) */ 00027 #ifdef HAVE_COMPLEX_H 00028 #include<complex.h> 00029 #endif 00030 00031 /* NFFT headers */ 00032 #include "nfft3.h" 00033 #include "infft.h" 00034 00035 #ifdef _OPENMP 00036 #include <omp.h> 00037 #endif 00038 00039 #ifdef OMP_ASSERT 00040 #include <assert.h> 00041 #endif 00042 00043 #undef X 00044 #define X(name) NFST(name) 00045 00047 static inline INT intprod(const INT *vec, const INT a, const INT d) 00048 { 00049 INT t, p; 00050 00051 p = 1; 00052 for (t = 0; t < d; t++) 00053 p *= vec[t] - a; 00054 00055 return p; 00056 } 00057 00058 /* handy shortcuts */ 00059 #define BASE(x) SIN(x) 00060 #define NN(x) (x + 1) 00061 #define OFFSET 1 00062 #define FOURIER_TRAFO FFTW_RODFT00 00063 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT 00064 00065 #define NODE(p,r) (ths->x[(p) * ths->d + (r)]) 00066 00067 #define MACRO_with_FG_PSI fg_psi[t][lj[t]] 00068 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]] 00069 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \ 00070 - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t) 00071 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t) 00072 00088 void X(trafo_direct)(const X(plan) *ths) 00089 { 00090 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f; 00091 00092 memset(f, 0, (size_t)(ths->M_total) * sizeof(R)); 00093 00094 if (ths->d == 1) 00095 { 00096 /* specialize for univariate case, rationale: faster */ 00097 INT j; 00098 #ifdef _OPENMP 00099 #pragma omp parallel for default(shared) private(j) 00100 #endif 00101 for (j = 0; j < ths->M_total; j++) 00102 { 00103 INT k_L; 00104 for (k_L = 0; k_L < ths->N_total; k_L++) 00105 { 00106 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j]; 00107 f[j] += f_hat[k_L] * BASE(omega); 00108 } 00109 } 00110 } 00111 else 00112 { 00113 /* multivariate case */ 00114 INT j; 00115 #ifdef _OPENMP 00116 #pragma omp parallel for default(shared) private(j) 00117 #endif 00118 for (j = 0; j < ths->M_total; j++) 00119 { 00120 R x[ths->d], omega, Omega[ths->d + 1]; 00121 INT t, t2, k_L, k[ths->d]; 00122 Omega[0] = K(1.0); 00123 for (t = 0; t < ths->d; t++) 00124 { 00125 k[t] = OFFSET; 00126 x[t] = K2PI * ths->x[j * ths->d + t]; 00127 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t]; 00128 } 00129 omega = Omega[ths->d]; 00130 00131 for (k_L = 0; k_L < ths->N_total; k_L++) 00132 { 00133 f[j] += f_hat[k_L] * omega; 00134 { 00135 for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--) 00136 k[t] = OFFSET; 00137 00138 k[t]++; 00139 00140 for (t2 = t; t2 < ths->d; t2++) 00141 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2]; 00142 00143 omega = Omega[ths->d]; 00144 } 00145 } 00146 } 00147 } 00148 } 00149 00150 void X(adjoint_direct)(const X(plan) *ths) 00151 { 00152 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f; 00153 00154 memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R)); 00155 00156 if (ths->d == 1) 00157 { 00158 /* specialize for univariate case, rationale: faster */ 00159 #ifdef _OPENMP 00160 INT k_L; 00161 #pragma omp parallel for default(shared) private(k_L) 00162 for (k_L = 0; k_L < ths->N_total; k_L++) 00163 { 00164 INT j; 00165 for (j = 0; j < ths->M_total; j++) 00166 { 00167 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j]; 00168 f_hat[k_L] += f[j] * BASE(omega); 00169 } 00170 } 00171 #else 00172 INT j; 00173 for (j = 0; j < ths->M_total; j++) 00174 { 00175 INT k_L; 00176 for (k_L = 0; k_L < ths->N_total; k_L++) 00177 { 00178 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j]; 00179 f_hat[k_L] += f[j] * BASE(omega); 00180 } 00181 } 00182 #endif 00183 } 00184 else 00185 { 00186 /* multivariate case */ 00187 INT j, k_L; 00188 #ifdef _OPENMP 00189 #pragma omp parallel for default(shared) private(j, k_L) 00190 for (k_L = 0; k_L < ths->N_total; k_L++) 00191 { 00192 INT k[ths->d], k_temp, t; 00193 00194 k_temp = k_L; 00195 00196 for (t = ths->d - 1; t >= 0; t--) 00197 { 00198 k[t] = k_temp % ths->N[t]; 00199 k_temp /= ths->N[t]; 00200 } 00201 00202 for (j = 0; j < ths->M_total; j++) 00203 { 00204 R omega = K(1.0); 00205 for (t = 0; t < ths->d; t++) 00206 omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]); 00207 f_hat[k_L] += f[j] * omega; 00208 } 00209 } 00210 #else 00211 for (j = 0; j < ths->M_total; j++) 00212 { 00213 R x[ths->d], omega, Omega[ths->d+1]; 00214 INT t, t2, k[ths->d]; 00215 Omega[0] = K(1.0); 00216 for (t = 0; t < ths->d; t++) 00217 { 00218 k[t] = OFFSET; 00219 x[t] = K2PI * ths->x[j * ths->d + t]; 00220 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t]; 00221 } 00222 omega = Omega[ths->d]; 00223 for (k_L = 0; k_L < ths->N_total; k_L++) 00224 { 00225 f_hat[k_L] += f[j] * omega; 00226 00227 for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--) 00228 k[t] = OFFSET; 00229 00230 k[t]++; 00231 00232 for (t2 = t; t2 < ths->d; t2++) 00233 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2]; 00234 00235 omega = Omega[ths->d]; 00236 } 00237 } 00238 #endif 00239 } 00240 } 00241 00261 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op, 00262 const INT act_dim) 00263 { 00264 const R xj = ths->x[j * ths->d + act_dim]; 00265 INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)]))); 00266 00267 (*up) = c - (ths->m); 00268 (*op) = c + 1 + (ths->m); 00269 } 00270 00271 #define MACRO_D_compute_A \ 00272 { \ 00273 g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \ 00274 } 00275 00276 #define MACRO_D_compute_T \ 00277 { \ 00278 f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \ 00279 } 00280 00281 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R)); 00282 00283 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R)); 00284 00285 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]] 00286 00287 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t))) 00288 00289 #define MACRO_init_k_ks \ 00290 { \ 00291 for (t = 0; t < ths->d; t++) \ 00292 { \ 00293 kg[t] = 0; \ 00294 } \ 00295 i = 0; \ 00296 } 00297 00298 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \ 00299 { \ 00300 for (t = i; t < ths->d; t++) \ 00301 { \ 00302 MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \ 00303 kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \ 00304 } \ 00305 } 00306 00307 #define MACRO_update_c_phi_inv_k_A(which_phi) \ 00308 { \ 00309 c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \ 00310 } 00311 00312 #define MACRO_update_c_phi_inv_k_T(which_phi) \ 00313 { \ 00314 c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \ 00315 } 00316 00317 #define MACRO_count_k_ks \ 00318 { \ 00319 kg[ths->d - 1]++; \ 00320 i = ths->d - 1; \ 00321 \ 00322 while ((kg[i] == ths->N[i] - 1) && (i > 0)) \ 00323 { \ 00324 kg[i - 1]++; \ 00325 kg[i] = 0; \ 00326 i--; \ 00327 } \ 00328 } 00329 00330 /* sub routines for the fast transforms matrix vector multiplication with D, D^T */ 00331 #define MACRO_D(which_one) \ 00332 static inline void D_ ## which_one (X(plan) *ths) \ 00333 { \ 00334 R *g_hat, *f_hat; /* local copy */ \ 00335 R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \ 00336 INT t; /* index dimensions */ \ 00337 INT i; \ 00338 INT k_L; /* plain index */ \ 00339 INT kg[ths->d]; /* multi index in g_hat */ \ 00340 INT kg_plain[ths->d+1]; /* postfix plain index */ \ 00341 \ 00342 f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \ 00343 MACRO_D_init_result_ ## which_one; \ 00344 \ 00345 c_phi_inv_k[0] = K(1.0); \ 00346 kg_plain[0] = 0; \ 00347 \ 00348 MACRO_init_k_ks; \ 00349 \ 00350 if (ths->flags & PRE_PHI_HUT) \ 00351 { \ 00352 for (k_L = 0; k_L < ths->N_total; k_L++) \ 00353 { \ 00354 MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \ 00355 MACRO_D_compute_ ## which_one; \ 00356 MACRO_count_k_ks; \ 00357 } \ 00358 } \ 00359 else \ 00360 { \ 00361 for (k_L = 0; k_L < ths->N_total; k_L++) \ 00362 { \ 00363 MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \ 00364 MACRO_D_compute_ ## which_one; \ 00365 MACRO_count_k_ks; \ 00366 } \ 00367 } \ 00368 } 00369 00370 MACRO_D(A) 00371 MACRO_D(T) 00372 00373 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */ 00374 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R)); 00375 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R)); 00376 00377 #define MACRO_B_PRE_FULL_PSI_compute_A \ 00378 { \ 00379 (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \ 00380 } 00381 00382 #define MACRO_B_PRE_FULL_PSI_compute_T \ 00383 { \ 00384 g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \ 00385 } 00386 00387 #define MACRO_B_compute_A \ 00388 { \ 00389 (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \ 00390 } 00391 00392 #define MACRO_B_compute_T \ 00393 { \ 00394 g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \ 00395 } 00396 00397 #define MACRO_init_uo_l_lj_t \ 00398 { \ 00399 for (t2 = 0; t2 < ths->d; t2++) \ 00400 { \ 00401 uo(ths, j, &u[t2], &o[t2], t2); \ 00402 \ 00403 /* determine index in g-array corresponding to u[(t2)] */ \ 00404 if (u[(t2)] < 0) \ 00405 lg_offset[(t2)] = \ 00406 (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \ 00407 else \ 00408 lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \ 00409 if (lg_offset[(t2)] > NN(ths->n[(t2)])) \ 00410 lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \ 00411 \ 00412 if (lg_offset[t2] <= 0) \ 00413 { \ 00414 l[t2] = -lg_offset[t2]; \ 00415 count_lg[t2] = -1; \ 00416 } \ 00417 else \ 00418 { \ 00419 l[t2] = +lg_offset[t2]; \ 00420 count_lg[t2] = +1; \ 00421 } \ 00422 \ 00423 lj[t2] = 0; \ 00424 } \ 00425 t2 = 0; \ 00426 } 00427 00428 #define FOO_A ((R)count_lg[t]) 00429 00430 #define FOO_T ((R)count_lg[t]) 00431 00432 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \ 00433 { \ 00434 for (t = t2; t < ths->d; t++) \ 00435 { \ 00436 if ((l[t] != 0) && (l[t] != NN(ths->n[t]))) \ 00437 { \ 00438 phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \ 00439 ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t] - 1; \ 00440 } \ 00441 else \ 00442 { \ 00443 phi_prod[t + 1] = K(0.0); \ 00444 ll_plain[t+1] = ll_plain[t] * ths->n[t]; \ 00445 } \ 00446 } \ 00447 } 00448 00449 #define MACRO_count_uo_l_lj_t \ 00450 { \ 00451 /* turn around if we hit one of the boundaries */ \ 00452 if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \ 00453 count_lg[(ths->d-1)] *= -1; \ 00454 \ 00455 /* move array index */ \ 00456 l[(ths->d-1)] += count_lg[(ths->d-1)]; \ 00457 \ 00458 lj[ths->d - 1]++; \ 00459 t2 = ths->d - 1; \ 00460 \ 00461 while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \ 00462 { \ 00463 lj[t2 - 1]++; \ 00464 lj[t2] = 0; \ 00465 /* ansonsten lg[i-1] verschieben */ \ 00466 \ 00467 /* turn around if we hit one of the boundaries */ \ 00468 if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \ 00469 count_lg[(t2 - 1)] *= -1; \ 00470 /* move array index */ \ 00471 l[(t2 - 1)] += count_lg[(t2 - 1)]; \ 00472 \ 00473 /* lg[i] = anfangswert */ \ 00474 if (lg_offset[t2] <= 0) \ 00475 { \ 00476 l[t2] = -lg_offset[t2]; \ 00477 count_lg[t2] = -1; \ 00478 } \ 00479 else \ 00480 { \ 00481 l[t2] = +lg_offset[t2]; \ 00482 count_lg[t2] = +1; \ 00483 } \ 00484 \ 00485 t2--; \ 00486 } \ 00487 } 00488 00489 #define MACRO_B(which_one) \ 00490 static inline void B_ ## which_one (X(plan) *ths) \ 00491 { \ 00492 INT lprod; /* 'regular bandwidth' of matrix B */ \ 00493 INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \ 00494 INT t, t2; /* index dimensions */ \ 00495 INT j; /* index nodes */ \ 00496 INT l_L, ix; /* index one row of B */ \ 00497 INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \ 00498 INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \ 00499 INT ll_plain[ths->d+1]; /* postfix plain index in g */ \ 00500 R phi_prod[ths->d+1]; /* postfix product of PHI */ \ 00501 R *f, *g; /* local copy */ \ 00502 R *fj; /* local copy */ \ 00503 R y[ths->d]; \ 00504 R fg_psi[ths->d][2*ths->m+2]; \ 00505 R fg_exp_l[ths->d][2*ths->m+2]; \ 00506 INT l_fg,lj_fg; \ 00507 R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \ 00508 R ip_w; \ 00509 INT ip_u; \ 00510 INT ip_s = ths->K/(ths->m+2); \ 00511 INT lg_offset[ths->d]; /* offset in g according to u */ \ 00512 INT count_lg[ths->d]; /* count summands (2m+2) */ \ 00513 \ 00514 f = (R*)ths->f; g = (R*)ths->g; \ 00515 \ 00516 MACRO_B_init_result_ ## which_one \ 00517 \ 00518 if (ths->flags & PRE_FULL_PSI) \ 00519 { \ 00520 for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00521 { \ 00522 for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \ 00523 { \ 00524 MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \ 00525 } \ 00526 } \ 00527 return; \ 00528 } \ 00529 \ 00530 phi_prod[0] = K(1.0); \ 00531 ll_plain[0] = 0; \ 00532 \ 00533 for (t = 0, lprod = 1; t < ths->d; t++) \ 00534 lprod *= (2 * ths->m + 2); \ 00535 \ 00536 if (ths->flags & PRE_PSI) \ 00537 { \ 00538 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00539 { \ 00540 MACRO_init_uo_l_lj_t; \ 00541 \ 00542 for (l_L = 0; l_L < lprod; l_L++) \ 00543 { \ 00544 MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \ 00545 \ 00546 MACRO_B_compute_ ## which_one; \ 00547 \ 00548 MACRO_count_uo_l_lj_t; \ 00549 } /* for(l_L) */ \ 00550 } /* for(j) */ \ 00551 return; \ 00552 } /* if(PRE_PSI) */ \ 00553 \ 00554 if (ths->flags & PRE_FG_PSI) \ 00555 { \ 00556 for (t = 0; t < ths->d; t++) \ 00557 { \ 00558 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \ 00559 tmpEXP2sq = tmpEXP2 * tmpEXP2; \ 00560 tmp2 = K(1.0); \ 00561 tmp3 = K(1.0); \ 00562 fg_exp_l[t][0] = K(1.0); \ 00563 \ 00564 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \ 00565 { \ 00566 tmp3 = tmp2 * tmpEXP2; \ 00567 tmp2 *= tmpEXP2sq; \ 00568 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \ 00569 } \ 00570 } \ 00571 \ 00572 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00573 { \ 00574 MACRO_init_uo_l_lj_t; \ 00575 \ 00576 for (t = 0; t < ths->d; t++) \ 00577 { \ 00578 fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \ 00579 tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \ 00580 tmp1 = K(1.0); \ 00581 \ 00582 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \ 00583 { \ 00584 tmp1 *= tmpEXP1; \ 00585 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \ 00586 } \ 00587 } \ 00588 \ 00589 for (l_L= 0; l_L < lprod; l_L++) \ 00590 { \ 00591 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \ 00592 \ 00593 MACRO_B_compute_ ## which_one; \ 00594 \ 00595 MACRO_count_uo_l_lj_t; \ 00596 } \ 00597 } \ 00598 return; \ 00599 } \ 00600 \ 00601 if (ths->flags & FG_PSI) \ 00602 { \ 00603 for (t = 0; t < ths->d; t++) \ 00604 { \ 00605 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \ 00606 tmpEXP2sq = tmpEXP2 * tmpEXP2; \ 00607 tmp2 = K(1.0); \ 00608 tmp3 = K(1.0); \ 00609 fg_exp_l[t][0] = K(1.0); \ 00610 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \ 00611 { \ 00612 tmp3 = tmp2 * tmpEXP2; \ 00613 tmp2 *= tmpEXP2sq; \ 00614 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \ 00615 } \ 00616 } \ 00617 \ 00618 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00619 { \ 00620 MACRO_init_uo_l_lj_t; \ 00621 \ 00622 for (t = 0; t < ths->d; t++) \ 00623 { \ 00624 fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\ 00625 \ 00626 tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \ 00627 tmp1 = K(1.0); \ 00628 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \ 00629 { \ 00630 tmp1 *= tmpEXP1; \ 00631 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \ 00632 } \ 00633 } \ 00634 \ 00635 for (l_L = 0; l_L < lprod; l_L++) \ 00636 { \ 00637 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \ 00638 \ 00639 MACRO_B_compute_ ## which_one; \ 00640 \ 00641 MACRO_count_uo_l_lj_t; \ 00642 } \ 00643 } \ 00644 return; \ 00645 } \ 00646 \ 00647 if (ths->flags & PRE_LIN_PSI) \ 00648 { \ 00649 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00650 { \ 00651 MACRO_init_uo_l_lj_t; \ 00652 \ 00653 for (t = 0; t < ths->d; t++) \ 00654 { \ 00655 y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \ 00656 * ((R)ths->K))/(ths->m + 2); \ 00657 ip_u = LRINT(FLOOR(y[t])); \ 00658 ip_w = y[t]-ip_u; \ 00659 for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \ 00660 { \ 00661 fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \ 00662 * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \ 00663 * (ip_w); \ 00664 } \ 00665 } \ 00666 \ 00667 for (l_L = 0; l_L < lprod; l_L++) \ 00668 { \ 00669 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \ 00670 \ 00671 MACRO_B_compute_ ## which_one; \ 00672 \ 00673 MACRO_count_uo_l_lj_t; \ 00674 } /* for(l_L) */ \ 00675 } /* for(j) */ \ 00676 return; \ 00677 } /* if(PRE_LIN_PSI) */ \ 00678 \ 00679 /* no precomputed psi at all */ \ 00680 for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \ 00681 { \ 00682 MACRO_init_uo_l_lj_t; \ 00683 \ 00684 for (l_L = 0; l_L < lprod; l_L++) \ 00685 { \ 00686 MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \ 00687 \ 00688 MACRO_B_compute_ ## which_one; \ 00689 \ 00690 MACRO_count_uo_l_lj_t; \ 00691 } /* for (l_L) */ \ 00692 } /* for (j) */ \ 00693 } /* B */ 00694 00695 MACRO_B(A) 00696 MACRO_B(T) 00697 00701 void X(trafo)(X(plan) *ths) 00702 { 00703 switch(ths->d) 00704 { 00705 default: 00706 { 00707 /* use ths->my_fftw_r2r_plan */ 00708 ths->g_hat = ths->g1; 00709 ths->g = ths->g2; 00710 00711 /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for } 00712 * k \in I_N \f$ */ 00713 TIC(0) 00714 D_A(ths); 00715 TOC(0) 00716 00717 /* Compute by d-variate discrete Fourier transform 00718 * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}} 00719 * \text{ for } l \in I_n \f$ */ 00720 TIC_FFTW(1) 00721 FFTW(execute)(ths->my_fftw_r2r_plan); 00722 TOC_FFTW(1) 00723 00724 /*if (ths->flags & PRE_FULL_PSI) 00725 full_psi__A(ths);*/ 00726 00727 /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right) 00728 * \text{ for } j=0,\hdots,M-1 \f$ */ 00729 TIC(2) 00730 B_A(ths); 00731 TOC(2) 00732 00733 /*if (ths->flags & PRE_FULL_PSI) 00734 { 00735 Y(free)(ths->psi_index_g); 00736 Y(free)(ths->psi_index_f); 00737 }*/ 00738 } 00739 } 00740 } /* trafo */ 00741 00742 void X(adjoint)(X(plan) *ths) 00743 { 00744 switch(ths->d) 00745 { 00746 default: 00747 { 00748 /* use ths->my_fftw_plan */ 00749 ths->g_hat = ths->g2; 00750 ths->g = ths->g1; 00751 00752 /*if (ths->flags & PRE_FULL_PSI) 00753 full_psi__T(ths);*/ 00754 00755 /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right) 00756 * \text{ for } l \in I_n,m(x_j) \f$ */ 00757 TIC(2) 00758 B_T(ths); 00759 TOC(2) 00760 00761 /* Compute by d-variate discrete cosine transform 00762 * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}} 00763 * \text{ for } k \in I_N\f$ */ 00764 TIC_FFTW(1) 00765 FFTW(execute)(ths->my_fftw_r2r_plan); 00766 TOC_FFTW(1) 00767 00768 /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for } 00769 * k \in I_N \f$ */ 00770 TIC(0) 00771 D_T(ths); 00772 TOC(0) 00773 } 00774 } 00775 } /* adjoint */ 00776 00779 static inline void precompute_phi_hut(X(plan) *ths) 00780 { 00781 INT ks[ths->d]; /* index over all frequencies */ 00782 INT t; /* index over all dimensions */ 00783 00784 ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*)); 00785 00786 for (t = 0; t < ths->d; t++) 00787 { 00788 ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R)); 00789 00790 for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++) 00791 { 00792 ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t))); 00793 } 00794 } 00795 } /* phi_hut */ 00796 00802 void X(precompute_lin_psi)(X(plan) *ths) 00803 { 00804 INT t; 00805 INT j; 00806 R step; 00808 for (t = 0; t < ths->d; t++) 00809 { 00810 step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t]))); 00811 00812 for (j = 0; j <= ths->K; j++) 00813 { 00814 ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t); 00815 } /* for(j) */ 00816 } /* for(t) */ 00817 } 00818 00819 void X(precompute_fg_psi)(X(plan) *ths) 00820 { 00821 INT t; /* index over all dimensions */ 00822 INT u, o; /* depends on x_j */ 00823 00824 // sort(ths); 00825 00826 for (t = 0; t < ths->d; t++) 00827 { 00828 INT j; 00829 // #pragma omp parallel for default(shared) private(j,u,o) 00830 for (j = 0; j < ths->M_total; j++) 00831 { 00832 uo(ths, j, &u, &o, t); 00833 00834 ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t))); 00835 ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]); 00836 } /* for(j) */ 00837 } 00838 /* for(t) */ 00839 } /* nfft_precompute_fg_psi */ 00840 00841 void X(precompute_psi)(X(plan) *ths) 00842 { 00843 INT t; /* index over all dimensions */ 00844 INT lj; /* index 0<=lj<u+o+1 */ 00845 INT u, o; /* depends on x_j */ 00846 00847 //sort(ths); 00848 00849 for (t = 0; t < ths->d; t++) 00850 { 00851 INT j; 00852 00853 for (j = 0; j < ths->M_total; j++) 00854 { 00855 uo(ths, j, &u, &o, t); 00856 00857 for(lj = 0; lj < (2 * ths->m + 2); lj++) 00858 ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] = 00859 (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t)); 00860 } /* for (j) */ 00861 } /* for (t) */ 00862 } /* precompute_psi */ 00863 00864 void X(precompute_full_psi)(X(plan) *ths) 00865 { 00866 //#ifdef _OPENMP 00867 // sort(ths); 00868 // 00869 // nfft_precompute_full_psi_omp(ths); 00870 //#else 00871 INT t, t2; /* index over all dimensions */ 00872 INT j; /* index over all nodes */ 00873 INT l_L; /* plain index 0 <= l_L < lprod */ 00874 INT l[ths->d]; /* multi index u<=l<=o */ 00875 INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */ 00876 INT ll_plain[ths->d+1]; /* postfix plain index */ 00877 INT lprod; /* 'bandwidth' of matrix B */ 00878 INT u[ths->d], o[ths->d]; /* depends on x_j */ 00879 INT count_lg[ths->d]; 00880 INT lg_offset[ths->d]; 00881 00882 R phi_prod[ths->d+1]; 00883 00884 INT ix, ix_old; 00885 00886 //sort(ths); 00887 00888 phi_prod[0] = K(1.0); 00889 ll_plain[0] = 0; 00890 00891 for (t = 0, lprod = 1; t < ths->d; t++) 00892 lprod *= 2 * ths->m + 2; 00893 00894 for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++) 00895 { 00896 MACRO_init_uo_l_lj_t; 00897 00898 for (l_L = 0; l_L < lprod; l_L++, ix++) 00899 { 00900 MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI); 00901 00902 ths->psi_index_g[ix] = ll_plain[ths->d]; 00903 ths->psi[ix] = phi_prod[ths->d]; 00904 00905 MACRO_count_uo_l_lj_t; 00906 } /* for (l_L) */ 00907 00908 ths->psi_index_f[j] = ix - ix_old; 00909 ix_old = ix; 00910 } /* for(j) */ 00911 //#endif 00912 } 00913 00914 void X(precompute_one_psi)(X(plan) *ths) 00915 { 00916 if(ths->flags & PRE_PSI) 00917 X(precompute_psi)(ths); 00918 if(ths->flags & PRE_FULL_PSI) 00919 X(precompute_full_psi)(ths); 00920 if(ths->flags & PRE_FG_PSI) 00921 X(precompute_fg_psi)(ths); 00922 if(ths->flags & PRE_LIN_PSI) 00923 X(precompute_lin_psi)(ths); 00924 } 00925 00926 static inline void init_help(X(plan) *ths) 00927 { 00928 INT t; /* index over all dimensions */ 00929 INT lprod; /* 'bandwidth' of matrix B */ 00930 00931 if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT) 00932 ths->flags |= NFFT_SORT_NODES; 00933 00934 ths->N_total = intprod(ths->N, OFFSET, ths->d); 00935 ths->n_total = intprod(ths->n, 0, ths->d); 00936 00937 ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R)); 00938 00939 for (t = 0; t < ths->d; t++) 00940 ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t]; 00941 00942 /* Assign r2r transform kinds for each dimension */ 00943 ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind))); 00944 for (t = 0; t < ths->d; t++) 00945 ths->r2r_kind[t] = FOURIER_TRAFO; 00946 00947 WINDOW_HELP_INIT; 00948 00949 if (ths->flags & MALLOC_X) 00950 ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R)); 00951 00952 if (ths->flags & MALLOC_F_HAT) 00953 ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R)); 00954 00955 if (ths->flags & MALLOC_F) 00956 ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R)); 00957 00958 if (ths->flags & PRE_PHI_HUT) 00959 precompute_phi_hut(ths); 00960 00961 if(ths->flags & PRE_LIN_PSI) 00962 { 00963 ths->K = (1U<< 10) * (ths->m+2); 00964 ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R)); 00965 } 00966 00967 if(ths->flags & PRE_FG_PSI) 00968 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R)); 00969 00970 if (ths->flags & PRE_PSI) 00971 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R)); 00972 00973 if(ths->flags & PRE_FULL_PSI) 00974 { 00975 for (t = 0, lprod = 1; t < ths->d; t++) 00976 lprod *= 2 * ths->m + 2; 00977 00978 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R)); 00979 00980 ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT)); 00981 ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT)); 00982 } 00983 00984 if (ths->flags & FFTW_INIT) 00985 { 00986 ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R)); 00987 00988 if (ths->flags & FFT_OUT_OF_PLACE) 00989 ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R)); 00990 else 00991 ths->g2 = ths->g1; 00992 00993 { 00994 int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int)); 00995 00996 for (t = 0; t < ths->d; t++) 00997 _n[t] = (int)(ths->n[t]); 00998 00999 ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags); 01000 Y(free)(_n); 01001 } 01002 } 01003 01004 // if(ths->flags & NFFT_SORT_NODES) 01005 // ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total); 01006 // else 01007 // ths->index_x = NULL; 01008 01009 ths->mv_trafo = (void (*) (void* ))X(trafo); 01010 ths->mv_adjoint = (void (*) (void* ))X(adjoint); 01011 } 01012 01013 void X(init)(X(plan) *ths, int d, int *N, int M_total) 01014 { 01015 int t; /* index over all dimensions */ 01016 01017 ths->d = (INT)d; 01018 01019 ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT)); 01020 01021 for (t = 0; t < d; t++) 01022 ths->N[t] = (INT)N[t]; 01023 01024 ths->M_total = (INT)M_total; 01025 01026 ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT)); 01027 01028 for (t = 0; t < d; t++) 01029 ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET; 01030 01031 ths->m = WINDOW_HELP_ESTIMATE_m; 01032 01033 if (d > 1) 01034 { 01035 //#ifdef _OPENMP 01036 // ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F | 01037 // FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES | 01038 // NFFT_OMP_BLOCKWISE_ADJOINT; 01039 //#else 01040 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F | 01041 FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES; 01042 //#endif 01043 } 01044 else 01045 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F | 01046 FFTW_INIT | FFT_OUT_OF_PLACE; 01047 01048 ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT; 01049 01050 init_help(ths); 01051 } 01052 01053 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m, 01054 unsigned flags, unsigned fftw_flags) 01055 { 01056 INT t; /* index over all dimensions */ 01057 01058 ths->d = (INT)d; 01059 ths->M_total = (INT)M_total; 01060 ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT)); 01061 01062 for (t = 0; t < d; t++) 01063 ths->N[t] = (INT)N[t]; 01064 01065 ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT)); 01066 01067 for (t = 0; t < d; t++) 01068 ths->n[t] = (INT)n[t]; 01069 01070 ths->m = (INT)m; 01071 01072 ths->flags = flags; 01073 ths->fftw_flags = fftw_flags; 01074 01075 init_help(ths); 01076 } 01077 01078 void X(init_1d)(X(plan) *ths, int N1, int M_total) 01079 { 01080 int N[1]; 01081 01082 N[0] = N1; 01083 01084 X(init)(ths, 1, N, M_total); 01085 } 01086 01087 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total) 01088 { 01089 int N[2]; 01090 01091 N[0] = N1; 01092 N[1] = N2; 01093 01094 X(init)(ths, 2, N, M_total); 01095 } 01096 01097 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total) 01098 { 01099 int N[3]; 01100 01101 N[0] = N1; 01102 N[1] = N2; 01103 N[2] = N3; 01104 01105 X(init)(ths, 3, N, M_total); 01106 } 01107 01108 const char* X(check)(X(plan) *ths) 01109 { 01110 INT j; 01111 01112 if (!ths->f) 01113 return "Member f not initialized."; 01114 01115 if (!ths->x) 01116 return "Member x not initialized."; 01117 01118 if (!ths->f_hat) 01119 return "Member f_hat not initialized."; 01120 01121 for (j = 0; j < ths->M_total * ths->d; j++) 01122 { 01123 if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5))) 01124 { 01125 return "ths->x out of range [0.0,0.5)"; 01126 } 01127 } 01128 01129 for (j = 0; j < ths->d; j++) 01130 { 01131 if (ths->sigma[j] <= 1) 01132 return "Oversampling factor too small"; 01133 01134 if(ths->N[j] - 1 <= ths->m) 01135 return "Polynomial degree N is smaller than cut-off m"; 01136 01137 if(ths->N[j]%2 == 1) 01138 return "polynomial degree N has to be even"; 01139 } 01140 return 0; 01141 } 01142 01143 void X(finalize)(X(plan) *ths) 01144 { 01145 INT t; /* index over dimensions */ 01146 01147 // if(ths->flags & NFFT_SORT_NODES) 01148 // Y(free)(ths->index_x); 01149 01150 if (ths->flags & FFTW_INIT) 01151 { 01152 #ifdef _OPENMP 01153 #pragma omp critical (nfft_omp_critical_fftw_plan) 01154 #endif 01155 FFTW(destroy_plan)(ths->my_fftw_r2r_plan); 01156 01157 if (ths->flags & FFT_OUT_OF_PLACE) 01158 Y(free)(ths->g2); 01159 01160 Y(free)(ths->g1); 01161 } 01162 01163 if(ths->flags & PRE_FULL_PSI) 01164 { 01165 Y(free)(ths->psi_index_g); 01166 Y(free)(ths->psi_index_f); 01167 Y(free)(ths->psi); 01168 } 01169 01170 if (ths->flags & PRE_PSI) 01171 Y(free)(ths->psi); 01172 01173 if(ths->flags & PRE_FG_PSI) 01174 Y(free)(ths->psi); 01175 01176 if(ths->flags & PRE_LIN_PSI) 01177 Y(free)(ths->psi); 01178 01179 if (ths->flags & PRE_PHI_HUT) 01180 { 01181 for (t = 0; t < ths->d; t++) 01182 Y(free)(ths->c_phi_inv[t]); 01183 Y(free)(ths->c_phi_inv); 01184 } 01185 01186 if (ths->flags & MALLOC_F) 01187 Y(free)(ths->f); 01188 01189 if(ths->flags & MALLOC_F_HAT) 01190 Y(free)(ths->f_hat); 01191 01192 if (ths->flags & MALLOC_X) 01193 Y(free)(ths->x); 01194 01195 WINDOW_HELP_FINALIZE; 01196 01197 Y(free)(ths->N); 01198 Y(free)(ths->n); 01199 Y(free)(ths->sigma); 01200 01201 Y(free)(ths->r2r_kind); 01202 } /* finalize */