![]() |
NFFT
3.3.2
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00019 /* Nonequispaced fast cosine transform */ 00020 00021 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */ 00022 00023 /* configure header */ 00024 #include "config.h" 00025 00026 /* complex datatype (maybe) */ 00027 #ifdef HAVE_COMPLEX_H 00028 #include<complex.h> 00029 #endif 00030 00031 /* NFFT headers */ 00032 #include "nfft3.h" 00033 #include "infft.h" 00034 00035 #ifdef _OPENMP 00036 #include <omp.h> 00037 #endif 00038 00039 #ifdef OMP_ASSERT 00040 #include <assert.h> 00041 #endif 00042 00043 #undef X 00044 #define X(name) NFCT(name) 00045 00047 static inline INT intprod(const INT *vec, const INT a, const INT d) 00048 { 00049 INT t, p; 00050 00051 p = 1; 00052 for (t = 0; t < d; t++) 00053 p *= vec[t] - a; 00054 00055 return p; 00056 } 00057 00058 /* handy shortcuts */ 00059 #define BASE(x) COS(x) 00060 #define NN(x) (x - 1) 00061 #define OFFSET 0 00062 #define FOURIER_TRAFO FFTW_REDFT00 00063 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT 00064 00065 #define NODE(p,r) (ths->x[(p) * ths->d + (r)]) 00066 00067 #define MACRO_with_FG_PSI fg_psi[t][lj[t]] 00068 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]] 00069 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \ 00070 - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t) 00071 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t) 00072 00088 void X(trafo_direct)(const X(plan) *ths) 00089 { 00090 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f; 00091 00092 memset(f, 0, (size_t)(ths->M_total) * sizeof(R)); 00093 00094 if (ths->d == 1) 00095 { 00096 /* specialize for univariate case, rationale: faster */ 00097 INT j; 00098 #ifdef _OPENMP 00099 #pragma omp parallel for default(shared) private(j) 00100 #endif 00101 for (j = 0; j < ths->M_total; j++) 00102 { 00103 INT k_L; 00104 for (k_L = 0; k_L < ths->N_total; k_L++) 00105 { 00106 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j]; 00107 f[j] += f_hat[k_L] * BASE(omega); 00108 } 00109 } 00110 } 00111 else 00112 { 00113 /* multivariate case */ 00114 INT j; 00115 #ifdef _OPENMP 00116 #pragma omp parallel for default(shared) private(j) 00117 #endif 00118 for (j = 0; j < ths->M_total; j++) 00119 { 00120 R x[ths->d], omega, Omega[ths->d + 1]; 00121 INT t, t2, k_L, k[ths->d]; 00122 Omega[0] = K(1.0); 00123 for (t = 0; t < ths->d; t++) 00124 { 00125 k[t] = OFFSET; 00126 x[t] = K2PI * ths->x[j * ths->d + t]; 00127 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t]; 00128 } 00129 omega = Omega[ths->d]; 00130 00131 for (k_L = 0; k_L < ths->N_total; k_L++) 00132 { 00133 f[j] += f_hat[k_L] * omega; 00134 { 00135 for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--) 00136 k[t] = OFFSET; 00137 00138 k[t]++; 00139 00140 for (t2 = t; t2 < ths->d; t2++) 00141 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2]; 00142 00143 omega = Omega[ths->d]; 00144 } 00145 } 00146 } 00147 } 00148 } 00149 00150 void X(adjoint_direct)(const X(plan) *ths) 00151 { 00152 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f; 00153 00154 memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R)); 00155 00156 if (ths->d == 1) 00157 { 00158 /* specialize for univariate case, rationale: faster */ 00159 #ifdef _OPENMP 00160 INT k_L; 00161 #pragma omp parallel for default(shared) private(k_L) 00162 for (k_L = 0; k_L < ths->N_total; k_L++) 00163 { 00164 INT j; 00165 for (j = 0; j < ths->M_total; j++) 00166 { 00167 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j]; 00168 f_hat[k_L] += f[j] * BASE(omega); 00169 } 00170 } 00171 #else 00172 INT j; 00173 for (j = 0; j < ths->M_total; j++) 00174 { 00175 INT k_L; 00176 for (k_L = 0; k_L < ths->N_total; k_L++) 00177 { 00178 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j]; 00179 f_hat[k_L] += f[j] * BASE(omega); 00180 } 00181 } 00182 #endif 00183 } 00184 else 00185 { 00186 /* multivariate case */ 00187 INT j, k_L; 00188 #ifdef _OPENMP 00189 #pragma omp parallel for default(shared) private(j, k_L) 00190 for (k_L = 0; k_L < ths->N_total; k_L++) 00191 { 00192 INT k[ths->d], k_temp, t; 00193 00194 k_temp = k_L; 00195 00196 for (t = ths->d - 1; t >= 0; t--) 00197 { 00198 k[t] = k_temp % ths->N[t]; 00199 k_temp /= ths->N[t]; 00200 } 00201 00202 for (j = 0; j < ths->M_total; j++) 00203 { 00204 R omega = K(1.0); 00205 for (t = 0; t < ths->d; t++) 00206 omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]); 00207 f_hat[k_L] += f[j] * omega; 00208 } 00209 } 00210 #else 00211 for (j = 0; j < ths->M_total; j++) 00212 { 00213 R x[ths->d], omega, Omega[ths->d+1]; 00214 INT t, t2, k[ths->d]; 00215 Omega[0] = K(1.0); 00216 for (t = 0; t < ths->d; t++) 00217 { 00218 k[t] = OFFSET; 00219 x[t] = K2PI * ths->x[j * ths->d + t]; 00220 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t]; 00221 } 00222 omega = Omega[ths->d]; 00223 for (k_L = 0; k_L < ths->N_total; k_L++) 00224 { 00225 f_hat[k_L] += f[j] * omega; 00226 00227 for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--) 00228 k[t] = OFFSET; 00229 00230 k[t]++; 00231 00232 for (t2 = t; t2 < ths->d; t2++) 00233 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2]; 00234 00235 omega = Omega[ths->d]; 00236 } 00237 } 00238 #endif 00239 } 00240 } 00241 00261 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op, 00262 const INT act_dim) 00263 { 00264 const R xj = ths->x[j * ths->d + act_dim]; 00265 INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)]))); 00266 00267 (*up) = c - (ths->m); 00268 (*op) = c + 1 + (ths->m); 00269 } 00270 00271 #define MACRO_D_compute_A \ 00272 { \ 00273 g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \ 00274 } 00275 00276 #define MACRO_D_compute_T \ 00277 { \ 00278 f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \ 00279 } 00280 00281 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R)); 00282 00283 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R)); 00284 00285 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]] 00286 00287 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t))) 00288 00289 #define MACRO_init_k_ks \ 00290 { \ 00291 for (t = 0; t < ths->d; t++) \ 00292 { \ 00293 kg[t] = 0; \ 00294 } \ 00295 i = 0; \ 00296 } 00297 00298 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \ 00299 { \ 00300 for (t = i; t < ths->d; t++) \ 00301 { \ 00302 MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \ 00303 kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \ 00304 } \ 00305 } 00306 00307 #define MACRO_update_c_phi_inv_k_A(which_phi) \ 00308 { \ 00309 c_phi_inv_k[t+1] = (kg[t] == 0 ? K(1.0) : K(0.5)) * c_phi_inv_k[t] * MACRO_ ## which_phi; \ 00310 } 00311 00312 #define MACRO_update_c_phi_inv_k_T(which_phi) \ 00313 { \ 00314 c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \ 00315 } 00316 00317 #define MACRO_count_k_ks \ 00318 { \ 00319 kg[ths->d - 1]++; \ 00320 i = ths->d - 1; \ 00321 \ 00322 while ((kg[i] == ths->N[i]) && (i > 0)) \ 00323 { \ 00324 kg[i - 1]++; \ 00325 kg[i] = 0; \ 00326 i--; \ 00327 } \ 00328 } 00329 00330 /* sub routines for the fast transforms matrix vector multiplication with D, D^T */ 00331 #define MACRO_D(which_one) \ 00332 static inline void D_ ## which_one (X(plan) *ths) \ 00333 { \ 00334 R *g_hat, *f_hat; /* local copy */ \ 00335 R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \ 00336 INT t; /* index dimensions */ \ 00337 INT i; \ 00338 INT k_L; /* plain index */ \ 00339 INT kg[ths->d]; /* multi index in g_hat */ \ 00340 INT kg_plain[ths->d+1]; /* postfix plain index */ \ 00341 \ 00342 f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \ 00343 MACRO_D_init_result_ ## which_one; \ 00344 \ 00345 c_phi_inv_k[0] = K(1.0); \ 00346 kg_plain[0] = 0; \ 00347 \ 00348 MACRO_init_k_ks; \ 00349 \ 00350 if (ths->flags & PRE_PHI_HUT) \ 00351 { \ 00352 for (k_L = 0; k_L < ths->N_total; k_L++) \ 00353 { \ 00354 MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \ 00355 MACRO_D_compute_ ## which_one; \ 00356 MACRO_count_k_ks; \ 00357 } \ 00358 } \ 00359 else \ 00360 { \ 00361 for (k_L = 0; k_L < ths->N_total; k_L++) \ 00362 { \ 00363 MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \ 00364 MACRO_D_compute_ ## which_one; \ 00365 MACRO_count_k_ks; \ 00366 } \ 00367 } \ 00368 } 00369 00370 MACRO_D(A) 00371 MACRO_D(T) 00372 00373 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */ 00374 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R)); 00375 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R)); 00376 00377 #define MACRO_B_PRE_FULL_PSI_compute_A \ 00378 { \ 00379 (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \ 00380 } 00381 00382 #define MACRO_B_PRE_FULL_PSI_compute_T \ 00383 { \ 00384 R factor = K(1.0); \ 00385 INT d = ths->psi_index_g[ix]; \ 00386 for (t = ths->d - 1; t >= 0; t--) \ 00387 { \ 00388 INT m = d % ths->n[t]; \ 00389 if (m != 0 && m != ths->n[t] - 1) \ 00390 factor *= K(0.5); \ 00391 d = d / ths->n[t]; \ 00392 } \ 00393 g[ths->psi_index_g[ix]] += factor * ths->psi[ix] * (*fj); \ 00394 } 00395 00396 #define MACRO_B_compute_A \ 00397 { \ 00398 (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \ 00399 } 00400 00401 #define MACRO_B_compute_T \ 00402 { \ 00403 g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \ 00404 } 00405 00406 #define MACRO_init_uo_l_lj_t \ 00407 { \ 00408 for (t2 = 0; t2 < ths->d; t2++) \ 00409 { \ 00410 uo(ths, j, &u[t2], &o[t2], t2); \ 00411 \ 00412 /* determine index in g-array corresponding to u[(t2)] */ \ 00413 if (u[(t2)] < 0) \ 00414 lg_offset[(t2)] = \ 00415 (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \ 00416 else \ 00417 lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \ 00418 if (lg_offset[(t2)] > NN(ths->n[(t2)])) \ 00419 lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \ 00420 \ 00421 if (lg_offset[t2] <= 0) \ 00422 { \ 00423 l[t2] = -lg_offset[t2]; \ 00424 count_lg[t2] = -1; \ 00425 } \ 00426 else \ 00427 { \ 00428 l[t2] = +lg_offset[t2]; \ 00429 count_lg[t2] = +1; \ 00430 } \ 00431 \ 00432 lj[t2] = 0; \ 00433 } \ 00434 t2 = 0; \ 00435 } 00436 00437 #define FOO_A K(1.0) 00438 00439 #define FOO_T ((l[t] == 0 || l[t] == ths->n[t] - 1) ? K(1.0) : K(0.5)) 00440 00441 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \ 00442 { \ 00443 for (t = t2; t < ths->d; t++) \ 00444 { \ 00445 phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \ 00446 ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t]; \ 00447 } \ 00448 } 00449 00450 #define MACRO_count_uo_l_lj_t \ 00451 { \ 00452 /* turn around if we hit one of the boundaries */ \ 00453 if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \ 00454 count_lg[(ths->d-1)] *= -1; \ 00455 \ 00456 /* move array index */ \ 00457 l[(ths->d-1)] += count_lg[(ths->d-1)]; \ 00458 \ 00459 lj[ths->d - 1]++; \ 00460 t2 = ths->d - 1; \ 00461 \ 00462 while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \ 00463 { \ 00464 lj[t2 - 1]++; \ 00465 lj[t2] = 0; \ 00466 /* ansonsten lg[i-1] verschieben */ \ 00467 \ 00468 /* turn around if we hit one of the boundaries */ \ 00469 if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \ 00470 count_lg[(t2 - 1)] *= -1; \ 00471 /* move array index */ \ 00472 l[(t2 - 1)] += count_lg[(t2 - 1)]; \ 00473 \ 00474 /* lg[i] = anfangswert */ \ 00475 if (lg_offset[t2] <= 0) \ 00476 { \ 00477 l[t2] = -lg_offset[t2]; \ 00478 count_lg[t2] = -1; \ 00479 } \ 00480 else \ 00481 { \ 00482 l[t2] = +lg_offset[t2]; \ 00483 count_lg[t2] = +1; \ 00484 } \ 00485 \ 00486 t2--; \ 00487 } \ 00488 } 00489 00490 #define MACRO_B(which_one) \ 00491 static inline void B_ ## which_one (X(plan) *ths) \ 00492 { \ 00493 INT lprod; /* 'regular bandwidth' of matrix B */ \ 00494 INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \ 00495 INT t, t2; /* index dimensions */ \ 00496 INT j; /* index nodes */ \ 00497 INT l_L, ix; /* index one row of B */ \ 00498 INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \ 00499 INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \ 00500 INT ll_plain[ths->d+1]; /* postfix plain index in g */ \ 00501 R phi_prod[ths->d+1]; /* postfix product of PHI */ \ 00502 R *f, *g; /* local copy */ \ 00503 R *fj; /* local copy */ \ 00504 R y[ths->d]; \ 00505 R fg_psi[ths->d][2*ths->m+2]; \ 00506 R fg_exp_l[ths->d][2*ths->m+2]; \ 00507 INT l_fg,lj_fg; \ 00508 R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \ 00509 R ip_w; \ 00510 INT ip_u; \ 00511 INT ip_s = ths->K/(ths->m+2); \ 00512 INT lg_offset[ths->d]; /* offset in g according to u */ \ 00513 INT count_lg[ths->d]; /* count summands (2m+2) */ \ 00514 \ 00515 f = (R*)ths->f; g = (R*)ths->g; \ 00516 \ 00517 MACRO_B_init_result_ ## which_one \ 00518 \ 00519 if (ths->flags & PRE_FULL_PSI) \ 00520 { \ 00521 for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00522 { \ 00523 for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \ 00524 { \ 00525 MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \ 00526 } \ 00527 } \ 00528 return; \ 00529 } \ 00530 \ 00531 phi_prod[0] = K(1.0); \ 00532 ll_plain[0] = 0; \ 00533 \ 00534 for (t = 0, lprod = 1; t < ths->d; t++) \ 00535 lprod *= (2 * ths->m + 2); \ 00536 \ 00537 if (ths->flags & PRE_PSI) \ 00538 { \ 00539 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00540 { \ 00541 MACRO_init_uo_l_lj_t; \ 00542 \ 00543 for (l_L = 0; l_L < lprod; l_L++) \ 00544 { \ 00545 MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \ 00546 \ 00547 MACRO_B_compute_ ## which_one; \ 00548 \ 00549 MACRO_count_uo_l_lj_t; \ 00550 } /* for(l_L) */ \ 00551 } /* for(j) */ \ 00552 return; \ 00553 } /* if(PRE_PSI) */ \ 00554 \ 00555 if (ths->flags & PRE_FG_PSI) \ 00556 { \ 00557 for (t = 0; t < ths->d; t++) \ 00558 { \ 00559 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \ 00560 tmpEXP2sq = tmpEXP2 * tmpEXP2; \ 00561 tmp2 = K(1.0); \ 00562 tmp3 = K(1.0); \ 00563 fg_exp_l[t][0] = K(1.0); \ 00564 \ 00565 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \ 00566 { \ 00567 tmp3 = tmp2 * tmpEXP2; \ 00568 tmp2 *= tmpEXP2sq; \ 00569 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \ 00570 } \ 00571 } \ 00572 \ 00573 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00574 { \ 00575 MACRO_init_uo_l_lj_t; \ 00576 \ 00577 for (t = 0; t < ths->d; t++) \ 00578 { \ 00579 fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \ 00580 tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \ 00581 tmp1 = K(1.0); \ 00582 \ 00583 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \ 00584 { \ 00585 tmp1 *= tmpEXP1; \ 00586 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \ 00587 } \ 00588 } \ 00589 \ 00590 for (l_L= 0; l_L < lprod; l_L++) \ 00591 { \ 00592 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \ 00593 \ 00594 MACRO_B_compute_ ## which_one; \ 00595 \ 00596 MACRO_count_uo_l_lj_t; \ 00597 } \ 00598 } \ 00599 return; \ 00600 } \ 00601 \ 00602 if (ths->flags & FG_PSI) \ 00603 { \ 00604 for (t = 0; t < ths->d; t++) \ 00605 { \ 00606 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \ 00607 tmpEXP2sq = tmpEXP2 * tmpEXP2; \ 00608 tmp2 = K(1.0); \ 00609 tmp3 = K(1.0); \ 00610 fg_exp_l[t][0] = K(1.0); \ 00611 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \ 00612 { \ 00613 tmp3 = tmp2 * tmpEXP2; \ 00614 tmp2 *= tmpEXP2sq; \ 00615 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \ 00616 } \ 00617 } \ 00618 \ 00619 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00620 { \ 00621 MACRO_init_uo_l_lj_t; \ 00622 \ 00623 for (t = 0; t < ths->d; t++) \ 00624 { \ 00625 fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\ 00626 \ 00627 tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \ 00628 tmp1 = K(1.0); \ 00629 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \ 00630 { \ 00631 tmp1 *= tmpEXP1; \ 00632 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \ 00633 } \ 00634 } \ 00635 \ 00636 for (l_L = 0; l_L < lprod; l_L++) \ 00637 { \ 00638 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \ 00639 \ 00640 MACRO_B_compute_ ## which_one; \ 00641 \ 00642 MACRO_count_uo_l_lj_t; \ 00643 } \ 00644 } \ 00645 return; \ 00646 } \ 00647 \ 00648 if (ths->flags & PRE_LIN_PSI) \ 00649 { \ 00650 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \ 00651 { \ 00652 MACRO_init_uo_l_lj_t; \ 00653 \ 00654 for (t = 0; t < ths->d; t++) \ 00655 { \ 00656 y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \ 00657 * ((R)ths->K))/(ths->m + 2); \ 00658 ip_u = LRINT(FLOOR(y[t])); \ 00659 ip_w = y[t]-ip_u; \ 00660 for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \ 00661 { \ 00662 fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \ 00663 * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \ 00664 * (ip_w); \ 00665 } \ 00666 } \ 00667 \ 00668 for (l_L = 0; l_L < lprod; l_L++) \ 00669 { \ 00670 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \ 00671 \ 00672 MACRO_B_compute_ ## which_one; \ 00673 \ 00674 MACRO_count_uo_l_lj_t; \ 00675 } /* for(l_L) */ \ 00676 } /* for(j) */ \ 00677 return; \ 00678 } /* if(PRE_LIN_PSI) */ \ 00679 \ 00680 /* no precomputed psi at all */ \ 00681 for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \ 00682 { \ 00683 MACRO_init_uo_l_lj_t; \ 00684 \ 00685 for (l_L = 0; l_L < lprod; l_L++) \ 00686 { \ 00687 MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \ 00688 \ 00689 MACRO_B_compute_ ## which_one; \ 00690 \ 00691 MACRO_count_uo_l_lj_t; \ 00692 } /* for (l_L) */ \ 00693 } /* for (j) */ \ 00694 } /* B */ 00695 00696 MACRO_B(A) 00697 MACRO_B(T) 00698 00702 void X(trafo)(X(plan) *ths) 00703 { 00704 switch(ths->d) 00705 { 00706 default: 00707 { 00708 /* use ths->my_fftw_r2r_plan */ 00709 ths->g_hat = ths->g1; 00710 ths->g = ths->g2; 00711 00712 /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for } 00713 * k \in I_N \f$ */ 00714 TIC(0) 00715 D_A(ths); 00716 TOC(0) 00717 00718 /* Compute by d-variate discrete Fourier transform 00719 * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}} 00720 * \text{ for } l \in I_n \f$ */ 00721 TIC_FFTW(1) 00722 FFTW(execute)(ths->my_fftw_r2r_plan); 00723 TOC_FFTW(1) 00724 00725 /*if (ths->flags & PRE_FULL_PSI) 00726 full_psi__A(ths);*/ 00727 00728 /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right) 00729 * \text{ for } j=0,\hdots,M-1 \f$ */ 00730 TIC(2) 00731 B_A(ths); 00732 TOC(2) 00733 00734 /*if (ths->flags & PRE_FULL_PSI) 00735 { 00736 Y(free)(ths->psi_index_g); 00737 Y(free)(ths->psi_index_f); 00738 }*/ 00739 } 00740 } 00741 } /* trafo */ 00742 00743 void X(adjoint)(X(plan) *ths) 00744 { 00745 switch(ths->d) 00746 { 00747 default: 00748 { 00749 /* use ths->my_fftw_plan */ 00750 ths->g_hat = ths->g2; 00751 ths->g = ths->g1; 00752 00753 /*if (ths->flags & PRE_FULL_PSI) 00754 full_psi__T(ths);*/ 00755 00756 /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right) 00757 * \text{ for } l \in I_n,m(x_j) \f$ */ 00758 TIC(2) 00759 B_T(ths); 00760 TOC(2) 00761 00762 /* Compute by d-variate discrete cosine transform 00763 * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}} 00764 * \text{ for } k \in I_N\f$ */ 00765 TIC_FFTW(1) 00766 FFTW(execute)(ths->my_fftw_r2r_plan); 00767 TOC_FFTW(1) 00768 00769 /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for } 00770 * k \in I_N \f$ */ 00771 TIC(0) 00772 D_T(ths); 00773 TOC(0) 00774 } 00775 } 00776 } /* adjoint */ 00777 00780 static inline void precompute_phi_hut(X(plan) *ths) 00781 { 00782 INT ks[ths->d]; /* index over all frequencies */ 00783 INT t; /* index over all dimensions */ 00784 00785 ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*)); 00786 00787 for (t = 0; t < ths->d; t++) 00788 { 00789 ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R)); 00790 00791 for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++) 00792 { 00793 ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t))); 00794 } 00795 } 00796 } /* phi_hut */ 00797 00803 void X(precompute_lin_psi)(X(plan) *ths) 00804 { 00805 INT t; 00806 INT j; 00807 R step; 00809 for (t = 0; t < ths->d; t++) 00810 { 00811 step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t]))); 00812 00813 for (j = 0; j <= ths->K; j++) 00814 { 00815 ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t); 00816 } /* for(j) */ 00817 } /* for(t) */ 00818 } 00819 00820 void X(precompute_fg_psi)(X(plan) *ths) 00821 { 00822 INT t; /* index over all dimensions */ 00823 INT u, o; /* depends on x_j */ 00824 00825 // sort(ths); 00826 00827 for (t = 0; t < ths->d; t++) 00828 { 00829 INT j; 00830 // #pragma omp parallel for default(shared) private(j,u,o) 00831 for (j = 0; j < ths->M_total; j++) 00832 { 00833 uo(ths, j, &u, &o, t); 00834 00835 ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t))); 00836 ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]); 00837 } /* for(j) */ 00838 } 00839 /* for(t) */ 00840 } /* nfft_precompute_fg_psi */ 00841 00842 void X(precompute_psi)(X(plan) *ths) 00843 { 00844 INT t; /* index over all dimensions */ 00845 INT lj; /* index 0<=lj<u+o+1 */ 00846 INT u, o; /* depends on x_j */ 00847 00848 //sort(ths); 00849 00850 for (t = 0; t < ths->d; t++) 00851 { 00852 INT j; 00853 00854 for (j = 0; j < ths->M_total; j++) 00855 { 00856 uo(ths, j, &u, &o, t); 00857 00858 for(lj = 0; lj < (2 * ths->m + 2); lj++) 00859 ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] = 00860 (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t)); 00861 } /* for (j) */ 00862 } /* for (t) */ 00863 } /* precompute_psi */ 00864 00865 void X(precompute_full_psi)(X(plan) *ths) 00866 { 00867 //#ifdef _OPENMP 00868 // sort(ths); 00869 // 00870 // nfft_precompute_full_psi_omp(ths); 00871 //#else 00872 INT t, t2; /* index over all dimensions */ 00873 INT j; /* index over all nodes */ 00874 INT l_L; /* plain index 0 <= l_L < lprod */ 00875 INT l[ths->d]; /* multi index u<=l<=o */ 00876 INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */ 00877 INT ll_plain[ths->d+1]; /* postfix plain index */ 00878 INT lprod; /* 'bandwidth' of matrix B */ 00879 INT u[ths->d], o[ths->d]; /* depends on x_j */ 00880 INT count_lg[ths->d]; 00881 INT lg_offset[ths->d]; 00882 00883 R phi_prod[ths->d+1]; 00884 00885 INT ix, ix_old; 00886 00887 //sort(ths); 00888 00889 phi_prod[0] = K(1.0); 00890 ll_plain[0] = 0; 00891 00892 for (t = 0, lprod = 1; t < ths->d; t++) 00893 lprod *= 2 * ths->m + 2; 00894 00895 for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++) 00896 { 00897 MACRO_init_uo_l_lj_t; 00898 00899 for (l_L = 0; l_L < lprod; l_L++, ix++) 00900 { 00901 MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI); 00902 00903 ths->psi_index_g[ix] = ll_plain[ths->d]; 00904 ths->psi[ix] = phi_prod[ths->d]; 00905 00906 MACRO_count_uo_l_lj_t; 00907 } /* for (l_L) */ 00908 00909 ths->psi_index_f[j] = ix - ix_old; 00910 ix_old = ix; 00911 } /* for(j) */ 00912 //#endif 00913 } 00914 00915 void X(precompute_one_psi)(X(plan) *ths) 00916 { 00917 if(ths->flags & PRE_PSI) 00918 X(precompute_psi)(ths); 00919 if(ths->flags & PRE_FULL_PSI) 00920 X(precompute_full_psi)(ths); 00921 if(ths->flags & PRE_FG_PSI) 00922 X(precompute_fg_psi)(ths); 00923 if(ths->flags & PRE_LIN_PSI) 00924 X(precompute_lin_psi)(ths); 00925 } 00926 00927 static inline void init_help(X(plan) *ths) 00928 { 00929 INT t; /* index over all dimensions */ 00930 INT lprod; /* 'bandwidth' of matrix B */ 00931 00932 if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT) 00933 ths->flags |= NFFT_SORT_NODES; 00934 00935 ths->N_total = intprod(ths->N, OFFSET, ths->d); 00936 ths->n_total = intprod(ths->n, 0, ths->d); 00937 00938 ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R)); 00939 00940 for (t = 0; t < ths->d; t++) 00941 ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t]; 00942 00943 /* Assign r2r transform kinds for each dimension */ 00944 ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind))); 00945 for (t = 0; t < ths->d; t++) 00946 ths->r2r_kind[t] = FOURIER_TRAFO; 00947 00948 WINDOW_HELP_INIT; 00949 00950 if (ths->flags & MALLOC_X) 00951 ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R)); 00952 00953 if (ths->flags & MALLOC_F_HAT) 00954 ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R)); 00955 00956 if (ths->flags & MALLOC_F) 00957 ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R)); 00958 00959 if (ths->flags & PRE_PHI_HUT) 00960 precompute_phi_hut(ths); 00961 00962 if(ths->flags & PRE_LIN_PSI) 00963 { 00964 ths->K = (1U<< 10) * (ths->m+2); 00965 ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R)); 00966 } 00967 00968 if(ths->flags & PRE_FG_PSI) 00969 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R)); 00970 00971 if (ths->flags & PRE_PSI) 00972 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R)); 00973 00974 if(ths->flags & PRE_FULL_PSI) 00975 { 00976 for (t = 0, lprod = 1; t < ths->d; t++) 00977 lprod *= 2 * ths->m + 2; 00978 00979 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R)); 00980 00981 ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT)); 00982 ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT)); 00983 } 00984 00985 if (ths->flags & FFTW_INIT) 00986 { 00987 ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R)); 00988 00989 if (ths->flags & FFT_OUT_OF_PLACE) 00990 ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R)); 00991 else 00992 ths->g2 = ths->g1; 00993 00994 { 00995 int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int)); 00996 00997 for (t = 0; t < ths->d; t++) 00998 _n[t] = (int)(ths->n[t]); 00999 01000 ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags); 01001 Y(free)(_n); 01002 } 01003 } 01004 01005 // if(ths->flags & NFFT_SORT_NODES) 01006 // ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total); 01007 // else 01008 // ths->index_x = NULL; 01009 01010 ths->mv_trafo = (void (*) (void* ))X(trafo); 01011 ths->mv_adjoint = (void (*) (void* ))X(adjoint); 01012 } 01013 01014 void X(init)(X(plan) *ths, int d, int *N, int M_total) 01015 { 01016 int t; /* index over all dimensions */ 01017 01018 ths->d = (INT)d; 01019 01020 ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT)); 01021 01022 for (t = 0; t < d; t++) 01023 ths->N[t] = (INT)N[t]; 01024 01025 ths->M_total = (INT)M_total; 01026 01027 ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT)); 01028 01029 for (t = 0; t < d; t++) 01030 ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET; 01031 01032 ths->m = WINDOW_HELP_ESTIMATE_m; 01033 01034 if (d > 1) 01035 { 01036 //#ifdef _OPENMP 01037 // ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F | 01038 // FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES | 01039 // NFFT_OMP_BLOCKWISE_ADJOINT; 01040 //#else 01041 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F | 01042 FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES; 01043 //#endif 01044 } 01045 else 01046 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F | 01047 FFTW_INIT | FFT_OUT_OF_PLACE; 01048 01049 ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT; 01050 01051 init_help(ths); 01052 } 01053 01054 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m, 01055 unsigned flags, unsigned fftw_flags) 01056 { 01057 INT t; /* index over all dimensions */ 01058 01059 ths->d = (INT)d; 01060 ths->M_total = (INT)M_total; 01061 ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT)); 01062 01063 for (t = 0; t < d; t++) 01064 ths->N[t] = (INT)N[t]; 01065 01066 ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT)); 01067 01068 for (t = 0; t < d; t++) 01069 ths->n[t] = (INT)n[t]; 01070 01071 ths->m = (INT)m; 01072 01073 ths->flags = flags; 01074 ths->fftw_flags = fftw_flags; 01075 01076 init_help(ths); 01077 } 01078 01079 void X(init_1d)(X(plan) *ths, int N1, int M_total) 01080 { 01081 int N[1]; 01082 01083 N[0] = N1; 01084 01085 X(init)(ths, 1, N, M_total); 01086 } 01087 01088 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total) 01089 { 01090 int N[2]; 01091 01092 N[0] = N1; 01093 N[1] = N2; 01094 01095 X(init)(ths, 2, N, M_total); 01096 } 01097 01098 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total) 01099 { 01100 int N[3]; 01101 01102 N[0] = N1; 01103 N[1] = N2; 01104 N[2] = N3; 01105 01106 X(init)(ths, 3, N, M_total); 01107 } 01108 01109 const char* X(check)(X(plan) *ths) 01110 { 01111 INT j; 01112 01113 if (!ths->f) 01114 return "Member f not initialized."; 01115 01116 if (!ths->x) 01117 return "Member x not initialized."; 01118 01119 if (!ths->f_hat) 01120 return "Member f_hat not initialized."; 01121 01122 for (j = 0; j < ths->M_total * ths->d; j++) 01123 { 01124 if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5))) 01125 { 01126 return "ths->x out of range [0.0,0.5)"; 01127 } 01128 } 01129 01130 for (j = 0; j < ths->d; j++) 01131 { 01132 if (ths->sigma[j] <= 1) 01133 return "Oversampling factor too small"; 01134 01135 if(ths->N[j] - 1 <= ths->m) 01136 return "Polynomial degree N is smaller than cut-off m"; 01137 01138 if(ths->N[j]%2 == 1) 01139 return "polynomial degree N has to be even"; 01140 } 01141 return 0; 01142 } 01143 01144 void X(finalize)(X(plan) *ths) 01145 { 01146 INT t; /* index over dimensions */ 01147 01148 // if(ths->flags & NFFT_SORT_NODES) 01149 // Y(free)(ths->index_x); 01150 01151 if (ths->flags & FFTW_INIT) 01152 { 01153 #ifdef _OPENMP 01154 #pragma omp critical (nfft_omp_critical_fftw_plan) 01155 #endif 01156 FFTW(destroy_plan)(ths->my_fftw_r2r_plan); 01157 01158 if (ths->flags & FFT_OUT_OF_PLACE) 01159 Y(free)(ths->g2); 01160 01161 Y(free)(ths->g1); 01162 } 01163 01164 if(ths->flags & PRE_FULL_PSI) 01165 { 01166 Y(free)(ths->psi_index_g); 01167 Y(free)(ths->psi_index_f); 01168 Y(free)(ths->psi); 01169 } 01170 01171 if (ths->flags & PRE_PSI) 01172 Y(free)(ths->psi); 01173 01174 if(ths->flags & PRE_FG_PSI) 01175 Y(free)(ths->psi); 01176 01177 if(ths->flags & PRE_LIN_PSI) 01178 Y(free)(ths->psi); 01179 01180 if (ths->flags & PRE_PHI_HUT) 01181 { 01182 for (t = 0; t < ths->d; t++) 01183 Y(free)(ths->c_phi_inv[t]); 01184 Y(free)(ths->c_phi_inv); 01185 } 01186 01187 if (ths->flags & MALLOC_F) 01188 Y(free)(ths->f); 01189 01190 if(ths->flags & MALLOC_F_HAT) 01191 Y(free)(ths->f_hat); 01192 01193 if (ths->flags & MALLOC_X) 01194 Y(free)(ths->x); 01195 01196 WINDOW_HELP_FINALIZE; 01197 01198 Y(free)(ths->N); 01199 Y(free)(ths->n); 01200 Y(free)(ths->sigma); 01201 01202 Y(free)(ths->r2r_kind); 01203 } /* finalize */