![]() |
NFFT
3.3.2
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00025 #include "config.h" 00026 00027 #include <stdlib.h> 00028 #include <string.h> 00029 #include <stdbool.h> 00030 #include <math.h> 00031 #ifdef HAVE_COMPLEX_H 00032 #include <complex.h> 00033 #endif 00034 00035 #include "nfft3.h" 00036 #include "infft.h" 00037 00038 /* Macros for index calculation. */ 00039 00041 #define K_START_TILDE(x,y) (MAX(MIN(x,y-2),0)) 00042 00044 #define K_END_TILDE(x,y) MIN(x,y-1) 00045 00047 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y))) 00048 00050 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1) 00051 00052 #define N_TILDE(y) (y-1) 00053 00054 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z)) 00055 00056 #define FPT_BREAK_EVEN 4 00057 00061 typedef struct fpt_step_ 00062 { 00063 bool stable; 00066 int Ns; 00067 int ts; 00068 double **a11,**a12,**a21,**a22; 00069 double *g; 00070 } fpt_step; 00071 00075 typedef struct fpt_data_ 00076 { 00077 fpt_step **steps; 00078 int k_start; 00079 double *alphaN; 00080 double *betaN; 00081 double *gammaN; 00082 double alpha_0; 00083 double beta_0; 00084 double gamma_m1; 00085 /* Data for direct transform. */ 00086 double *_alpha; 00087 double *_beta; 00088 double *_gamma; 00089 } fpt_data; 00090 00094 typedef struct fpt_set_s_ 00095 { 00096 int flags; 00097 int M; 00098 int N; 00100 int t; 00101 fpt_data *dpt; 00102 double **xcvecs; 00105 double *xc; 00106 double _Complex *temp; 00107 double _Complex *work; 00108 double _Complex *result; 00109 double _Complex *vec3; 00110 double _Complex *vec4; 00111 double _Complex *z; 00112 fftw_plan *plans_dct3; 00114 fftw_plan *plans_dct2; 00116 fftw_r2r_kind *kinds; 00118 fftw_r2r_kind *kindsr; 00121 int *lengths; 00123 /* Data for slow transforms. */ 00124 double *xc_slow; 00125 } fpt_set_s; 00126 00127 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x, 00128 double* v, double _Complex* y, double* w, int n) 00129 { 00130 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; 00131 double *v_ptr = v, *w_ptr = w; 00132 for (l = 0; l < n; l++) 00133 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); 00134 } 00135 00136 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \ 00137 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \ 00138 double* v, double _Complex* y, double* w, int n) \ 00139 { \ 00140 const int n2 = n>>1; \ 00141 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \ 00142 double *v_ptr = v, *w_ptr = w; \ 00143 for (l = 0; l < n2; l++) \ 00144 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \ 00145 v_ptr--; w_ptr--; \ 00146 for (l = 0; l < n2; l++) \ 00147 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \ 00148 } 00149 00150 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0) 00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0) 00152 00153 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \ 00154 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \ 00155 double* v, double _Complex* y, int n, double *xx) \ 00156 { \ 00157 const int n2 = n>>1; \ 00158 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \ 00159 double *v_ptr = v, *xx_ptr = xx; \ 00160 for (l = 0; l < n2; l++, v_ptr++) \ 00161 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \ 00162 v_ptr--; \ 00163 for (l = 0; l < n2; l++, v_ptr--) \ 00164 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \ 00165 } 00166 00167 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0) 00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0) 00169 00170 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \ 00171 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \ 00172 double _Complex* y, double* w, int n, double *xx) \ 00173 { \ 00174 const int n2 = n>>1; \ 00175 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \ 00176 double *w_ptr = w, *xx_ptr = xx; \ 00177 for (l = 0; l < n2; l++, w_ptr++) \ 00178 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \ 00179 w_ptr--; \ 00180 for (l = 0; l < n2; l++, w_ptr--) \ 00181 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \ 00182 } 00183 00184 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0) 00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0) 00186 00187 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v, 00188 double _Complex* y, double* w, int n) 00189 { 00190 int l; 00191 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; 00192 double *v_ptr = v, *w_ptr = w; 00193 for (l = n; l > 0; l--) 00194 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); 00195 } 00196 00197 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x, 00198 double* v, double _Complex* y, double* w, int n) 00199 { 00200 const int n2 = n>>1; \ 00201 int l; 00202 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; 00203 double *v_ptr = v, *w_ptr = w; 00204 for (l = n2; l > 0; l--) 00205 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); 00206 v_ptr--; w_ptr--; 00207 for (l = n2; l > 0; l--) 00208 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++)); 00209 } 00210 00211 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x, 00212 double* v, double _Complex* y, double* w, int n, double *xx) 00213 { 00214 const int n2 = n>>1; \ 00215 int l; 00216 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; 00217 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx; 00218 for (l = n2; l > 0; l--, xx_ptr++) 00219 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++)); 00220 v_ptr--; w_ptr--; 00221 for (l = n2; l > 0; l--, xx_ptr++) 00222 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++)); 00223 } 00224 00225 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x, 00226 double* v, double _Complex* y, double* w, int n, double *xx) 00227 { 00228 const int n2 = n>>1; \ 00229 int l; 00230 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; 00231 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx; 00232 for (l = n2; l > 0; l--, xx_ptr++) 00233 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++)); 00234 v_ptr--; w_ptr--; 00235 for (l = n2; l > 0; l--, xx_ptr++) 00236 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++)); 00237 } 00238 00239 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \ 00240 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \ 00241 double *a21, double *a22, double g, int tau, fpt_set set) \ 00242 { \ 00243 \ 00244 int length = 1<<(tau+1); \ 00245 \ 00246 double norm = 1.0/(length<<1); \ 00247 \ 00248 /* Compensate for factors introduced by a raw DCT-III. */ \ 00249 a[0] *= 2.0; \ 00250 b[0] *= 2.0; \ 00251 \ 00252 /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \ 00253 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \ 00254 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \ 00255 \ 00256 /*for (k = 0; k < length; k++)*/ \ 00257 /*{*/ \ 00258 /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \ 00259 /* a11[k],a12[k],a21[k],a22[k]);*/ \ 00260 /*}*/ \ 00261 \ 00262 /* Check, if gamma is zero. */ \ 00263 if (g == 0.0) \ 00264 { \ 00265 /*fprintf(stderr,"gamma == 0!\n");*/ \ 00266 /* Perform multiplication only for second row. */ \ 00267 M2_FUNCTION(norm,b,b,a22,a,a21,length); \ 00268 } \ 00269 else \ 00270 { \ 00271 /*fprintf(stderr,"gamma != 0!\n");*/ \ 00272 /* Perform multiplication for both rows. */ \ 00273 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \ 00274 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \ 00275 memcpy(b,set->z,length*sizeof(double _Complex)); \ 00276 /* Compute Chebyshev-coefficients using a DCT-II. */ \ 00277 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \ 00278 /* Compensate for factors introduced by a raw DCT-II. */ \ 00279 a[0] *= 0.5; \ 00280 } \ 00281 \ 00282 /* Compute Chebyshev-coefficients using a DCT-II. */ \ 00283 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \ 00284 /* Compensate for factors introduced by a raw DCT-II. */ \ 00285 b[0] *= 0.5; \ 00286 } 00287 00288 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy) 00289 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric) 00290 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy) 00291 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/ 00292 00293 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b, 00294 double *a11, double *a12, double *a21, double *a22, double *x, 00295 double gam, int tau, fpt_set set) 00296 { 00298 int length = 1<<(tau+1); 00300 double norm = 1.0/(length<<1); 00301 00302 UNUSED(a21); UNUSED(a22); 00303 00304 /* Compensate for factors introduced by a raw DCT-III. */ 00305 a[0] *= 2.0; 00306 b[0] *= 2.0; 00307 00308 /* Compute function values from Chebyshev-coefficients using a DCT-III. */ 00309 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); 00310 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); 00311 00312 /*for (k = 0; k < length; k++)*/ 00313 /*{*/ 00314 /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ 00315 /* a11[k],a12[k],a21[k],a22[k]);*/ 00316 /*}*/ 00317 00318 /* Check, if gamma is zero. */ 00319 if (gam == 0.0) 00320 { 00321 /*fprintf(stderr,"gamma == 0!\n");*/ 00322 /* Perform multiplication only for second row. */ 00323 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x); 00324 } 00325 else 00326 { 00327 /*fprintf(stderr,"gamma != 0!\n");*/ 00328 /* Perform multiplication for both rows. */ 00329 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x); 00330 auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length); 00331 memcpy(b,set->z,length*sizeof(double _Complex)); 00332 /* Compute Chebyshev-coefficients using a DCT-II. */ 00333 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); 00334 /* Compensate for factors introduced by a raw DCT-II. */ 00335 a[0] *= 0.5; 00336 } 00337 00338 /* Compute Chebyshev-coefficients using a DCT-II. */ 00339 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); 00340 /* Compensate for factors introduced by a raw DCT-II. */ 00341 b[0] *= 0.5; 00342 } 00343 00344 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b, 00345 double *a11, double *a12, double *a21, double *a22, double *x, double gam, int tau, fpt_set set) 00346 { 00348 int length = 1<<(tau+1); 00350 double norm = 1.0/(length<<1); 00351 00352 /* Compensate for factors introduced by a raw DCT-III. */ 00353 a[0] *= 2.0; 00354 b[0] *= 2.0; 00355 00356 UNUSED(a11); UNUSED(a12); 00357 00358 /* Compute function values from Chebyshev-coefficients using a DCT-III. */ 00359 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); 00360 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); 00361 00362 /*for (k = 0; k < length; k++)*/ 00363 /*{*/ 00364 /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ 00365 /* a11[k],a12[k],a21[k],a22[k]);*/ 00366 /*}*/ 00367 00368 /* Check, if gamma is zero. */ 00369 if (gam == 0.0) 00370 { 00371 /*fprintf(stderr,"gamma == 0!\n");*/ 00372 /* Perform multiplication only for second row. */ 00373 auvxpwy_symmetric(norm,b,b,a22,a,a21,length); 00374 } 00375 else 00376 { 00377 /*fprintf(stderr,"gamma != 0!\n");*/ 00378 /* Perform multiplication for both rows. */ 00379 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length); 00380 auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x); 00381 memcpy(b,set->z,length*sizeof(double _Complex)); 00382 /* Compute Chebyshev-coefficients using a DCT-II. */ 00383 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); 00384 /* Compensate for factors introduced by a raw DCT-II. */ 00385 a[0] *= 0.5; 00386 } 00387 00388 /* Compute Chebyshev-coefficients using a DCT-II. */ 00389 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); 00390 /* Compensate for factors introduced by a raw DCT-II. */ 00391 b[0] *= 0.5; 00392 } 00393 00394 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \ 00395 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \ 00396 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \ 00397 { \ 00398 \ 00399 int length = 1<<(tau+1); \ 00400 \ 00401 double norm = 1.0/(length<<1); \ 00402 \ 00403 /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \ 00404 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \ 00405 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \ 00406 \ 00407 /* Perform matrix multiplication. */ \ 00408 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \ 00409 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \ 00410 memcpy(a,set->z,length*sizeof(double _Complex)); \ 00411 \ 00412 /* Compute Chebyshev-coefficients using a DCT-II. */ \ 00413 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \ 00414 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \ 00415 } 00416 00417 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy) 00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2) 00419 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/ 00420 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/ 00421 00422 00423 static inline void fpt_do_step_t_symmetric_u(double _Complex *a, 00424 double _Complex *b, double *a11, double *a12, double *x, 00425 double gam, int tau, fpt_set set) 00426 { 00428 int length = 1<<(tau+1); 00430 double norm = 1.0/(length<<1); 00431 00432 /* Compute function values from Chebyshev-coefficients using a DCT-III. */ 00433 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); 00434 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); 00435 00436 /* Perform matrix multiplication. */ 00437 abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x); 00438 abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x); 00439 memcpy(a,set->z,length*sizeof(double _Complex)); 00440 00441 /* Compute Chebyshev-coefficients using a DCT-II. */ 00442 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); 00443 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); 00444 } 00445 00446 static inline void fpt_do_step_t_symmetric_l(double _Complex *a, 00447 double _Complex *b, double *a21, double *a22, 00448 double *x, double gam, int tau, fpt_set set) 00449 { 00451 int length = 1<<(tau+1); 00453 double norm = 1.0/(length<<1); 00454 00455 /* Compute function values from Chebyshev-coefficients using a DCT-III. */ 00456 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); 00457 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); 00458 00459 /* Perform matrix multiplication. */ 00460 abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x); 00461 abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x); 00462 memcpy(a,set->z,length*sizeof(double _Complex)); 00463 00464 /* Compute Chebyshev-coefficients using a DCT-II. */ 00465 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); 00466 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); 00467 } 00468 00469 00470 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha, 00471 const double *beta, const double *gam) 00472 { 00473 /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector 00474 * of knots x[0], ..., x[size-1] by the Clenshaw algorithm 00475 */ 00476 int i,j; 00477 double a,b,x_val_act,a_old; 00478 const double *x_act; 00479 double *y_act; 00480 const double *alpha_act, *beta_act, *gamma_act; 00481 00482 /* Traverse all nodes. */ 00483 x_act = x; 00484 y_act = y; 00485 for (i = 0; i < size; i++) 00486 { 00487 a = 1.0; 00488 b = 0.0; 00489 x_val_act = *x_act; 00490 00491 if (k == 0) 00492 { 00493 *y_act = 1.0; 00494 } 00495 else 00496 { 00497 alpha_act = &(alpha[k]); 00498 beta_act = &(beta[k]); 00499 gamma_act = &(gam[k]); 00500 for (j = k; j > 1; j--) 00501 { 00502 a_old = a; 00503 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act)); 00504 b = a_old*(*gamma_act); 00505 alpha_act--; 00506 beta_act--; 00507 gamma_act--; 00508 } 00509 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b); 00510 } 00511 x_act++; 00512 y_act++; 00513 } 00514 } 00515 00516 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha, 00517 const double *beta, const double *gam) 00518 { 00519 /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector 00520 * of knots x[0], ..., x[size-1] by the Clenshaw algorithm 00521 */ 00522 int i,j; 00523 double a,b,x_val_act,a_old; 00524 const double *x_act; 00525 double *y_act, *z_act; 00526 const double *alpha_act, *beta_act, *gamma_act; 00527 00528 /* Traverse all nodes. */ 00529 x_act = x; 00530 y_act = y; 00531 z_act = z; 00532 for (i = 0; i < size; i++) 00533 { 00534 a = 1.0; 00535 b = 0.0; 00536 x_val_act = *x_act; 00537 00538 if (k == 0) 00539 { 00540 *y_act = 1.0; 00541 *z_act = 0.0; 00542 } 00543 else 00544 { 00545 alpha_act = &(alpha[k]); 00546 beta_act = &(beta[k]); 00547 gamma_act = &(gam[k]); 00548 for (j = k; j > 1; j--) 00549 { 00550 a_old = a; 00551 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act)); 00552 b = a_old*(*gamma_act); 00553 alpha_act--; 00554 beta_act--; 00555 gamma_act--; 00556 } 00557 if (i < size1) 00558 *z_act = a; 00559 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b); 00560 } 00561 00562 x_act++; 00563 y_act++; 00564 z_act++; 00565 } 00566 /*gamma_act++; 00567 FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a"); 00568 fprintf(f,"size1: %10d, size: %10d\n",size1,size); 00569 fclose(f);*/ 00570 } 00571 00572 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k, 00573 const double *alpha, const double *beta, const double *gam, const 00574 double threshold) 00575 { 00576 /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector 00577 * of knots x[0], ..., x[size-1] by the Clenshaw algorithm 00578 */ 00579 int i,j; 00580 double a,b,x_val_act,a_old; 00581 const double *x_act; 00582 double *y_act, *z_act; 00583 const double *alpha_act, *beta_act, *gamma_act; 00584 R max = -nfft_float_property(NFFT_R_MAX); 00585 const R t = LOG10(FABS(threshold)); 00586 00587 /* Traverse all nodes. */ 00588 x_act = x; 00589 y_act = y; 00590 z_act = z; 00591 for (i = 0; i < size; i++) 00592 { 00593 a = 1.0; 00594 b = 0.0; 00595 x_val_act = *x_act; 00596 00597 if (k == 0) 00598 { 00599 *y_act = 1.0; 00600 *z_act = 0.0; 00601 } 00602 else 00603 { 00604 alpha_act = &(alpha[k]); 00605 beta_act = &(beta[k]); 00606 gamma_act = &(gam[k]); 00607 for (j = k; j > 1; j--) 00608 { 00609 a_old = a; 00610 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act)); 00611 b = a_old*(*gamma_act); 00612 alpha_act--; 00613 beta_act--; 00614 gamma_act--; 00615 } 00616 *z_act = a; 00617 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b); 00618 max = FMAX(max,LOG10(FABS(*y_act))); 00619 if (max > t) 00620 return 1; 00621 } 00622 x_act++; 00623 y_act++; 00624 z_act++; 00625 } 00626 return 0; 00627 } 00628 00629 static inline void eval_sum_clenshaw_fast(const int N, const int M, 00630 const double _Complex *a, const double *x, double _Complex *y, 00631 const double *alpha, const double *beta, const double *gam, 00632 const double lambda) 00633 { 00634 int j,k; 00635 double _Complex tmp1, tmp2, tmp3; 00636 double xc; 00637 00638 /*fprintf(stderr, "Executing eval_sum_clenshaw_fast.\n"); 00639 fprintf(stderr, "Before transform:\n"); 00640 for (j = 0; j < N; j++) 00641 fprintf(stderr, "a[%4d] = %e.\n", j, a[j]); 00642 for (j = 0; j <= M; j++) 00643 fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);*/ 00644 00645 if (N == 0) 00646 for (j = 0; j <= M; j++) 00647 y[j] = a[0]; 00648 else 00649 { 00650 for (j = 0; j <= M; j++) 00651 { 00652 #if 0 00653 xc = x[j]; 00654 tmp2 = a[N]; 00655 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2; 00656 for (k = N-1; k > 0; k--) 00657 { 00658 tmp3 = a[k-1] 00659 + (alpha[k-1] * xc + beta[k-1]) * tmp1 00660 + gam[k] * tmp2; 00661 tmp2 = tmp1; 00662 tmp1 = tmp3; 00663 } 00664 y[j] = lambda * tmp1; 00665 #else 00666 xc = x[j]; 00667 tmp1 = a[N-1]; 00668 tmp2 = a[N]; 00669 for (k = N-1; k > 0; k--) 00670 { 00671 tmp3 = a[k-1] + tmp2 * gam[k]; 00672 tmp2 *= (alpha[k] * xc + beta[k]); 00673 tmp2 += tmp1; 00674 tmp1 = tmp3; 00675 /*if (j == 1515) 00676 { 00677 fprintf(stderr, "k = %d, tmp1 = %e, tmp2 = %e.\n", k, tmp1, tmp2); 00678 }*/ 00679 } 00680 tmp2 *= (alpha[0] * xc + beta[0]); 00681 //fprintf(stderr, "alpha[0] = %e, beta[0] = %e.\n", alpha[0], beta[0]); 00682 y[j] = lambda * (tmp2 + tmp1); 00683 //fprintf(stderr, "lambda = %e.\n", lambda); 00684 #endif 00685 } 00686 } 00687 /*fprintf(stderr, "Before transform:\n"); 00688 for (j = 0; j < N; j++) 00689 fprintf(stderr, "a[%4d] = %e.\n", j, a[j]); 00690 for (j = 0; j <= M; j++) 00691 fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]); */ 00692 } 00693 00712 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x, 00713 double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam, 00714 double lambda) 00715 { 00716 int j,k; 00717 double _Complex* it1 = temp; 00718 double _Complex* it2 = y; 00719 double _Complex aux; 00720 00721 /* Compute final result by multiplying with the constant lambda */ 00722 a[0] = 0.0; 00723 for (j = 0; j <= M; j++) 00724 { 00725 it2[j] = lambda * y[j]; 00726 a[0] += it2[j]; 00727 } 00728 00729 if (N > 0) 00730 { 00731 /* Compute final step. */ 00732 a[1] = 0.0; 00733 for (j = 0; j <= M; j++) 00734 { 00735 it1[j] = it2[j]; 00736 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]); 00737 a[1] += it2[j]; 00738 } 00739 00740 for (k = 2; k <= N; k++) 00741 { 00742 a[k] = 0.0; 00743 for (j = 0; j <= M; j++) 00744 { 00745 aux = it1[j]; 00746 it1[j] = it2[j]; 00747 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux; 00748 a[k] += it2[j]; 00749 } 00750 } 00751 } 00752 } 00753 00754 00755 fpt_set fpt_init(const int M, const int t, const unsigned int flags) 00756 { 00758 int plength; 00760 int tau; 00762 int m; 00763 int k; 00764 #ifdef _OPENMP 00765 int nthreads = X(get_num_threads)(); 00766 #endif 00767 00768 /* Allocate memory for new DPT set. */ 00769 fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s)); 00770 00771 /* Save parameters in structure. */ 00772 set->flags = flags; 00773 00774 set->M = M; 00775 set->t = t; 00776 set->N = 1<<t; 00777 00778 /* Allocate memory for M transforms. */ 00779 set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data)); 00780 00781 /* Initialize with NULL pointer. */ 00782 for (m = 0; m < set->M; m++) 00783 set->dpt[m].steps = 0; 00784 00785 /* Create arrays with Chebyshev nodes. */ 00786 00787 /* Initialize array with Chebyshev coefficients for the polynomial x. This 00788 * would be trivially an array containing a 1 as second entry with all other 00789 * coefficients set to zero. In order to compensate for the multiplicative 00790 * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */ 00791 00792 /* Allocate memory for array of pointers to node arrays. */ 00793 set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*)); 00794 /* For each polynomial length starting with 4, compute the Chebyshev nodes 00795 * using a DCT-III. */ 00796 plength = 4; 00797 for (tau = 1; tau < t+1; tau++) 00798 { 00799 /* Allocate memory for current array. */ 00800 set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double)); 00801 for (k = 0; k < plength; k++) 00802 { 00803 set->xcvecs[tau-1][k] = cos(((k+0.5)*KPI)/plength); 00804 } 00805 plength = plength << 1; 00806 } 00807 00809 set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex)); 00810 set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex)); 00811 00812 set->lengths = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int)); 00813 set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/)); 00814 set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind)); 00815 set->kindsr[0] = FFTW_REDFT10; 00816 set->kindsr[1] = FFTW_REDFT10; 00817 for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1) 00818 { 00819 set->lengths[tau] = plength; 00820 #ifdef _OPENMP 00821 #pragma omp critical (nfft_omp_critical_fftw_plan) 00822 { 00823 fftw_plan_with_nthreads(nthreads); 00824 #endif 00825 set->plans_dct2[tau] = 00826 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL, 00827 2, 1, (double*)set->result, NULL, 2, 1,set->kindsr, 00828 0); 00829 #ifdef _OPENMP 00830 } 00831 #endif 00832 } 00833 00834 /* Check if fast transform is activated. */ 00835 if (!(set->flags & FPT_NO_FAST_ALGORITHM)) 00836 { 00838 set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex)); 00839 set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex)); 00840 set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex)); 00841 00843 set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/)); 00844 set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind)); 00845 set->kinds[0] = FFTW_REDFT01; 00846 set->kinds[1] = FFTW_REDFT01; 00847 for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1) 00848 { 00849 set->lengths[tau] = plength; 00850 #ifdef _OPENMP 00851 #pragma omp critical (nfft_omp_critical_fftw_plan) 00852 { 00853 fftw_plan_with_nthreads(nthreads); 00854 #endif 00855 set->plans_dct3[tau] = 00856 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL, 00857 2, 1, (double*)set->result, NULL, 2, 1, set->kinds, 00858 0); 00859 #ifdef _OPENMP 00860 } 00861 #endif 00862 } 00863 nfft_free(set->lengths); 00864 nfft_free(set->kinds); 00865 nfft_free(set->kindsr); 00866 set->lengths = NULL; 00867 set->kinds = NULL; 00868 set->kindsr = NULL; 00869 } 00870 00871 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM)) 00872 { 00873 set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double)); 00874 set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex)); 00875 } 00876 00877 /* Return the newly created DPT set. */ 00878 return set; 00879 } 00880 00881 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, 00882 double *gam, int k_start, const double threshold) 00883 { 00884 00885 int tau; 00886 int l; 00887 int plength; 00889 int degree; 00891 int firstl; 00892 int lastl; 00893 int plength_stab; 00895 int degree_stab; 00897 double *a11; 00899 double *a12; 00901 double *a21; 00903 double *a22; 00905 const double *calpha; 00906 const double *cbeta; 00907 const double *cgamma; 00908 int needstab = 0; 00909 int k_start_tilde; 00910 int N_tilde; 00911 int clength; 00912 int clength_1; 00913 int clength_2; 00914 int t_stab, N_stab; 00915 fpt_data *data; 00916 00917 /* Get pointer to DPT data. */ 00918 data = &(set->dpt[m]); 00919 00920 /* Check, if already precomputed. */ 00921 if (data->steps != NULL) 00922 return; 00923 00924 /* Save k_start. */ 00925 data->k_start = k_start; 00926 00927 data->gamma_m1 = gam[0]; 00928 00929 /* Check if fast transform is activated. */ 00930 if (!(set->flags & FPT_NO_FAST_ALGORITHM)) 00931 { 00932 /* Save recursion coefficients. */ 00933 data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex)); 00934 data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex)); 00935 data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex)); 00936 00937 for (tau = 2; tau <= set->t; tau++) 00938 { 00939 00940 data->alphaN[tau-2] = alpha[1<<tau]; 00941 data->betaN[tau-2] = beta[1<<tau]; 00942 data->gammaN[tau-2] = gam[1<<tau]; 00943 } 00944 00945 data->alpha_0 = alpha[1]; 00946 data->beta_0 = beta[1]; 00947 00948 k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start) 00949 /*set->N*/); 00950 N_tilde = N_TILDE(set->N); 00951 00952 /* Allocate memory for the cascade with t = log_2(N) many levels. */ 00953 data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t); 00954 00955 /* For tau = 1,...t compute the matrices U_{n,tau,l}. */ 00956 plength = 4; 00957 for (tau = 1; tau < set->t; tau++) 00958 { 00959 /* Compute auxilliary values. */ 00960 degree = plength>>1; 00961 /* Compute first l. */ 00962 firstl = FIRST_L(k_start_tilde,plength); 00963 /* Compute last l. */ 00964 lastl = LAST_L(N_tilde,plength); 00965 00966 /* Allocate memory for current level. This level will contain 2^{t-tau-1} 00967 * many matrices. */ 00968 data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step) 00969 * (lastl+1)); 00970 00971 /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */ 00972 for (l = firstl; l <= lastl; l++) 00973 { 00974 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength)) 00975 { 00976 //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m); 00977 //fflush(stderr); 00978 clength = plength/2; 00979 } 00980 else 00981 { 00982 clength = plength; 00983 } 00984 00985 /* Allocate memory for the components of U_{n,tau,l}. */ 00986 a11 = (double*) nfft_malloc(sizeof(double)*clength); 00987 a12 = (double*) nfft_malloc(sizeof(double)*clength); 00988 a21 = (double*) nfft_malloc(sizeof(double)*clength); 00989 a22 = (double*) nfft_malloc(sizeof(double)*clength); 00990 00991 /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev 00992 * nodes. */ 00993 00994 /* Get the pointers to the three-term recurrence coeffcients. */ 00995 calpha = &(alpha[plength*l+1+1]); 00996 cbeta = &(beta[plength*l+1+1]); 00997 cgamma = &(gam[plength*l+1+1]); 00998 00999 if (set->flags & FPT_NO_STABILIZATION) 01000 { 01001 /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */ 01002 calpha--; 01003 cbeta--; 01004 cgamma--; 01005 eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta, 01006 cgamma); 01007 eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta, 01008 cgamma); 01009 needstab = 0; 01010 } 01011 else 01012 { 01013 calpha--; 01014 cbeta--; 01015 cgamma--; 01016 /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */ 01017 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength, 01018 degree-1, calpha, cbeta, cgamma, threshold); 01019 if (needstab == 0) 01020 { 01021 /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */ 01022 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength, 01023 degree, calpha, cbeta, cgamma, threshold); 01024 } 01025 } 01026 01027 01028 /* Check if stabilization needed. */ 01029 if (needstab == 0) 01030 { 01031 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*)); 01032 data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*)); 01033 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*)); 01034 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*)); 01035 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double)); 01036 /* No stabilization needed. */ 01037 data->steps[tau][l].a11[0] = a11; 01038 data->steps[tau][l].a12[0] = a12; 01039 data->steps[tau][l].a21[0] = a21; 01040 data->steps[tau][l].a22[0] = a22; 01041 data->steps[tau][l].g[0] = gam[plength*l+1+1]; 01042 data->steps[tau][l].stable = true; 01043 } 01044 else 01045 { 01046 /* Stabilize. */ 01047 degree_stab = degree*(2*l+1); 01048 X(next_power_of_2_exp_int)((l+1)*(1<<(tau+1)),&N_stab,&t_stab); 01049 01050 /* Old arrays are to small. */ 01051 nfft_free(a11); 01052 nfft_free(a12); 01053 nfft_free(a21); 01054 nfft_free(a22); 01055 01056 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*)); 01057 data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*)); 01058 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*)); 01059 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*)); 01060 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double)); 01061 01062 plength_stab = N_stab; 01063 01064 if (set->flags & FPT_AL_SYMMETRY) 01065 { 01066 if (m <= 1) 01067 { 01068 /* This should never be executed */ 01069 clength_1 = plength_stab; 01070 clength_2 = plength_stab; 01071 /* Allocate memory for arrays. */ 01072 a11 = (double*) nfft_malloc(sizeof(double)*clength_1); 01073 a12 = (double*) nfft_malloc(sizeof(double)*clength_1); 01074 a21 = (double*) nfft_malloc(sizeof(double)*clength_2); 01075 a22 = (double*) nfft_malloc(sizeof(double)*clength_2); 01076 /* Get the pointers to the three-term recurrence coeffcients. */ 01077 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]); 01078 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, 01079 clength_2, degree_stab-1, calpha, cbeta, cgamma); 01080 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, 01081 clength_2, degree_stab+0, calpha, cbeta, cgamma); 01082 } 01083 else 01084 { 01085 clength = plength_stab/2; 01086 if (m%2 == 0) 01087 { 01088 a11 = (double*) nfft_malloc(sizeof(double)*clength); 01089 a12 = (double*) nfft_malloc(sizeof(double)*clength); 01090 a21 = 0; 01091 a22 = 0; 01092 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]); 01093 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength, 01094 degree_stab-2, calpha, cbeta, cgamma); 01095 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength, 01096 degree_stab-1, calpha, cbeta, cgamma); 01097 } 01098 else 01099 { 01100 a11 = 0; 01101 a12 = 0; 01102 a21 = (double*) nfft_malloc(sizeof(double)*clength); 01103 a22 = (double*) nfft_malloc(sizeof(double)*clength); 01104 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]); 01105 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength, 01106 degree_stab-1,calpha, cbeta, cgamma); 01107 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength, 01108 degree_stab+0, calpha, cbeta, cgamma); 01109 } 01110 } 01111 } 01112 else 01113 { 01114 clength_1 = plength_stab; 01115 clength_2 = plength_stab; 01116 a11 = (double*) nfft_malloc(sizeof(double)*clength_1); 01117 a12 = (double*) nfft_malloc(sizeof(double)*clength_1); 01118 a21 = (double*) nfft_malloc(sizeof(double)*clength_2); 01119 a22 = (double*) nfft_malloc(sizeof(double)*clength_2); 01120 calpha = &(alpha[2]); 01121 cbeta = &(beta[2]); 01122 cgamma = &(gam[2]); 01123 calpha--; 01124 cbeta--; 01125 cgamma--; 01126 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1, 01127 calpha, cbeta, cgamma); 01128 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0, 01129 calpha, cbeta, cgamma); 01130 01131 } 01132 data->steps[tau][l].a11[0] = a11; 01133 data->steps[tau][l].a12[0] = a12; 01134 data->steps[tau][l].a21[0] = a21; 01135 data->steps[tau][l].a22[0] = a22; 01136 01137 data->steps[tau][l].g[0] = gam[1+1]; 01138 data->steps[tau][l].stable = false; 01139 data->steps[tau][l].ts = t_stab; 01140 data->steps[tau][l].Ns = N_stab; 01141 } 01142 } 01144 plength = plength << 1; 01145 } 01146 } 01147 01148 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM)) 01149 { 01150 /* Check, if recurrence coefficients must be copied. */ 01151 if (set->flags & FPT_PERSISTENT_DATA) 01152 { 01153 data->_alpha = (double*) alpha; 01154 data->_beta = (double*) beta; 01155 data->_gamma = (double*) gam; 01156 } 01157 else 01158 { 01159 data->_alpha = (double*) nfft_malloc((set->N+1)*sizeof(double)); 01160 data->_beta = (double*) nfft_malloc((set->N+1)*sizeof(double)); 01161 data->_gamma = (double*) nfft_malloc((set->N+1)*sizeof(double)); 01162 memcpy(data->_alpha,alpha,(set->N+1)*sizeof(double)); 01163 memcpy(data->_beta,beta,(set->N+1)*sizeof(double)); 01164 memcpy(data->_gamma,gam,(set->N+1)*sizeof(double)); 01165 } 01166 } 01167 } 01168 01169 void fpt_trafo_direct(fpt_set set, const int m, const double _Complex *x, double _Complex *y, 01170 const int k_end, const unsigned int flags) 01171 { 01172 int j; 01173 fpt_data *data = &(set->dpt[m]); 01174 int Nk; 01175 int tk; 01176 double norm; 01177 01178 //fprintf(stderr, "Executing dpt.\n"); 01179 01180 X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk); 01181 norm = 2.0/(Nk<<1); 01182 01183 //fprintf(stderr, "Norm = %e.\n", norm); 01184 01185 if (set->flags & FPT_NO_DIRECT_ALGORITHM) 01186 { 01187 return; 01188 } 01189 01190 if (flags & FPT_FUNCTION_VALUES) 01191 { 01192 /* Fill array with Chebyshev nodes. */ 01193 for (j = 0; j <= k_end; j++) 01194 { 01195 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1)); 01196 //fprintf(stderr, "x[%4d] = %e.\n", j, set->xc_slow[j]); 01197 } 01198 01199 memset(set->result,0U,data->k_start*sizeof(double _Complex)); 01200 memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex)); 01201 01202 /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow, 01203 y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1], 01204 data->gamma_m1);*/ 01205 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow, 01206 y, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], data->gamma_m1); 01207 } 01208 else 01209 { 01210 memset(set->temp,0U,data->k_start*sizeof(double _Complex)); 01211 memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex)); 01212 01213 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2], 01214 set->result, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], 01215 data->gamma_m1); 01216 01217 fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result, 01218 (double*)set->result); 01219 01220 set->result[0] *= 0.5; 01221 for (j = 0; j < Nk; j++) 01222 { 01223 set->result[j] *= norm; 01224 } 01225 01226 memcpy(y,set->result,(k_end+1)*sizeof(double _Complex)); 01227 } 01228 } 01229 01230 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y, 01231 const int k_end, const unsigned int flags) 01232 { 01233 /* Get transformation data. */ 01234 fpt_data *data = &(set->dpt[m]); 01236 int Nk; 01238 int tk; 01240 int k_start_tilde; 01242 int k_end_tilde; 01243 01245 int tau; 01247 int firstl; 01249 int lastl; 01251 int l; 01253 int plength; 01255 int plength_stab; 01256 int t_stab; 01258 fpt_step *step; 01260 fftw_plan plan = 0; 01261 int length = k_end+1; 01262 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01}; 01263 01265 int k; 01266 01267 double _Complex *work_ptr; 01268 const double _Complex *x_ptr; 01269 01270 /* Check, if slow transformation should be used due to small bandwidth. */ 01271 if (k_end < FPT_BREAK_EVEN) 01272 { 01273 /* Use NDSFT. */ 01274 fpt_trafo_direct(set, m, x, y, k_end, flags); 01275 return; 01276 } 01277 01278 X(next_power_of_2_exp_int)(k_end,&Nk,&tk); 01279 k_start_tilde = K_START_TILDE(data->k_start,Nk); 01280 k_end_tilde = K_END_TILDE(k_end,Nk); 01281 01282 /* Check if fast transform is activated. */ 01283 if (set->flags & FPT_NO_FAST_ALGORITHM) 01284 return; 01285 01286 if (flags & FPT_FUNCTION_VALUES) 01287 { 01288 #ifdef _OPENMP 01289 int nthreads = X(get_num_threads)(); 01290 #pragma omp critical (nfft_omp_critical_fftw_plan) 01291 { 01292 fftw_plan_with_nthreads(nthreads); 01293 #endif 01294 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1, 01295 (double*)set->work, NULL, 2, 1, kinds, 0U); 01296 #ifdef _OPENMP 01297 } 01298 #endif 01299 } 01300 01301 /* Initialize working arrays. */ 01302 memset(set->result,0U,2*Nk*sizeof(double _Complex)); 01303 01304 /* The first step. */ 01305 01306 /* Set the first 2*data->k_start coefficients to zero. */ 01307 memset(set->work,0U,2*data->k_start*sizeof(double _Complex)); 01308 01309 work_ptr = &set->work[2*data->k_start]; 01310 x_ptr = x; 01311 01312 for (k = 0; k <= k_end_tilde-data->k_start; k++) 01313 { 01314 *work_ptr++ = *x_ptr++; 01315 *work_ptr++ = K(0.0); 01316 } 01317 01318 /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */ 01319 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex)); 01320 01321 /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to 01322 * x_{Nk-1} and x_{Nk-2}. */ 01323 if (k_end == Nk) 01324 { 01325 set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start]; 01326 set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start]; 01327 set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start]; 01328 } 01329 01330 /* Compute the remaining steps. */ 01331 plength = 4; 01332 for (tau = 1; tau < tk; tau++) 01333 { 01334 /* Compute first l. */ 01335 firstl = FIRST_L(k_start_tilde,plength); 01336 /* Compute last l. */ 01337 lastl = LAST_L(k_end_tilde,plength); 01338 01339 /* Compute the multiplication steps. */ 01340 for (l = firstl; l <= lastl; l++) 01341 { 01342 /* Copy vectors to multiply into working arrays zero-padded to twice the length. */ 01343 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex)); 01344 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex)); 01345 memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex)); 01346 memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex)); 01347 01348 /* Copy coefficients into first half. */ 01349 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex)); 01350 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex)); 01351 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex)); 01352 01353 /* Get matrix U_{n,tau,l} */ 01354 step = &(data->steps[tau][l]); 01355 01356 /* Check if step is stable. */ 01357 if (step->stable) 01358 { 01359 /* Check, if we should do a symmetrizised step. */ 01360 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength)) 01361 { 01362 /*for (k = 0; k < plength; k++) 01363 { 01364 fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n", 01365 step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]); 01366 }*/ 01367 /* Multiply third and fourth polynomial with matrix U. */ 01368 //fprintf(stderr,"\nhallo\n"); 01369 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], 01370 step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set); 01371 } 01372 else 01373 { 01374 /* Multiply third and fourth polynomial with matrix U. */ 01375 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0], 01376 step->a21[0], step->a22[0], step->g[0], tau, set); 01377 } 01378 01379 if (step->g[0] != 0.0) 01380 { 01381 for (k = 0; k < plength; k++) 01382 { 01383 set->work[plength*2*l+k] += set->vec3[k]; 01384 } 01385 } 01386 for (k = 0; k < plength; k++) 01387 { 01388 set->work[plength*(2*l+1)+k] += set->vec4[k]; 01389 } 01390 } 01391 else 01392 { 01393 /* Stabilize. */ 01394 01395 /* The lengh of the polynomials */ 01396 plength_stab = step->Ns; 01397 t_stab = step->ts; 01398 01399 /*---------*/ 01400 /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l); 01401 fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab); 01402 fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk); 01403 fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/ 01404 /*---------*/ 01405 01406 /* Set rest of vectors explicitely to zero */ 01407 /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n", 01408 plength, plength_stab);*/ 01409 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex)); 01410 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex)); 01411 01412 /* Multiply third and fourth polynomial with matrix U. */ 01413 /* Check for symmetry. */ 01414 if (set->flags & FPT_AL_SYMMETRY) 01415 { 01416 if (m <= 1) 01417 { 01418 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0], 01419 step->a21[0], step->a22[0], step->g[0], t_stab-1, set); 01420 } 01421 else if (m%2 == 0) 01422 { 01423 /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0], 01424 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/ 01425 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0], 01426 step->a21[0], step->a22[0], 01427 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set); 01428 /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0], 01429 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/ 01430 } 01431 else 01432 { 01433 /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0], 01434 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/ 01435 fpt_do_step_symmetric_l(set->vec3, set->vec4, 01436 step->a11[0], step->a12[0], 01437 step->a21[0], 01438 step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set); 01439 /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0], 01440 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/ 01441 } 01442 } 01443 else 01444 { 01445 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0], 01446 step->a21[0], step->a22[0], step->g[0], t_stab-1, set); 01447 } 01448 01449 if (step->g[0] != 0.0) 01450 { 01451 for (k = 0; k < plength_stab; k++) 01452 { 01453 set->result[k] += set->vec3[k]; 01454 } 01455 } 01456 01457 for (k = 0; k < plength_stab; k++) 01458 { 01459 set->result[Nk+k] += set->vec4[k]; 01460 } 01461 } 01462 } 01463 /* Double length of polynomials. */ 01464 plength = plength<<1; 01465 01466 /*--------*/ 01467 /*for (k = 0; k < 2*Nk; k++) 01468 { 01469 fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n", 01470 k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]), 01471 cimag(set->result[k])); 01472 }*/ 01473 /*--------*/ 01474 } 01475 01476 /* Add the resulting cascade coeffcients to the coeffcients accumulated from 01477 * the stabilization steps. */ 01478 for (k = 0; k < 2*Nk; k++) 01479 { 01480 set->result[k] += set->work[k]; 01481 } 01482 01483 /* The last step. Compute the Chebyshev coeffcients c_k^n from the 01484 * polynomials in front of P_0^n and P_1^n. */ 01485 y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] + 01486 data->alpha_0*set->result[Nk+1]*0.5); 01487 y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+ 01488 data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5)); 01489 y[k_end-1] = data->gamma_m1*(set->result[k_end-1] + 01490 data->beta_0*set->result[Nk+k_end-1] + 01491 data->alpha_0*set->result[Nk+k_end-2]*0.5); 01492 y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]); 01493 for (k = 2; k <= k_end-2; k++) 01494 { 01495 y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] + 01496 data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1])); 01497 } 01498 01499 if (flags & FPT_FUNCTION_VALUES) 01500 { 01501 y[0] *= 2.0; 01502 fftw_execute_r2r(plan,(double*)y,(double*)y); 01503 #ifdef _OPENMP 01504 #pragma omp critical (nfft_omp_critical_fftw_plan) 01505 #endif 01506 fftw_destroy_plan(plan); 01507 for (k = 0; k <= k_end; k++) 01508 { 01509 y[k] *= 0.5; 01510 } 01511 } 01512 } 01513 01514 void fpt_transposed_direct(fpt_set set, const int m, double _Complex *x, 01515 double _Complex *y, const int k_end, const unsigned int flags) 01516 { 01517 int j; 01518 fpt_data *data = &(set->dpt[m]); 01519 int Nk; 01520 int tk; 01521 double norm; 01522 01523 X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk); 01524 norm = 2.0/(Nk<<1); 01525 01526 if (set->flags & FPT_NO_DIRECT_ALGORITHM) 01527 { 01528 return; 01529 } 01530 01531 if (flags & FPT_FUNCTION_VALUES) 01532 { 01533 for (j = 0; j <= k_end; j++) 01534 { 01535 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1)); 01536 } 01537 01538 eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow, 01539 y, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], 01540 data->gamma_m1); 01541 01542 memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)* 01543 sizeof(double _Complex)); 01544 } 01545 else 01546 { 01547 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex)); 01548 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex)); 01549 01550 for (j = 0; j < Nk; j++) 01551 { 01552 set->result[j] *= norm; 01553 } 01554 01555 fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result, 01556 (double*)set->result); 01557 01558 eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2], 01559 set->result, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], 01560 data->gamma_m1); 01561 01562 memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex)); 01563 } 01564 } 01565 01566 void fpt_transposed(fpt_set set, const int m, double _Complex *x, 01567 double _Complex *y, const int k_end, const unsigned int flags) 01568 { 01569 /* Get transformation data. */ 01570 fpt_data *data = &(set->dpt[m]); 01572 int Nk; 01574 int tk; 01576 int k_start_tilde; 01578 int k_end_tilde; 01579 01581 int tau; 01583 int firstl; 01585 int lastl; 01587 int l; 01589 int plength; 01591 int plength_stab; 01593 fpt_step *step; 01595 fftw_plan plan; 01596 int length = k_end+1; 01597 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10}; 01599 int k; 01600 int t_stab; 01601 01602 /* Check, if slow transformation should be used due to small bandwidth. */ 01603 if (k_end < FPT_BREAK_EVEN) 01604 { 01605 /* Use NDSFT. */ 01606 fpt_transposed_direct(set, m, x, y, k_end, flags); 01607 return; 01608 } 01609 01610 X(next_power_of_2_exp_int)(k_end,&Nk,&tk); 01611 k_start_tilde = K_START_TILDE(data->k_start,Nk); 01612 k_end_tilde = K_END_TILDE(k_end,Nk); 01613 01614 /* Check if fast transform is activated. */ 01615 if (set->flags & FPT_NO_FAST_ALGORITHM) 01616 { 01617 return; 01618 } 01619 01620 if (flags & FPT_FUNCTION_VALUES) 01621 { 01622 #ifdef _OPENMP 01623 int nthreads = X(get_num_threads)(); 01624 #pragma omp critical (nfft_omp_critical_fftw_plan) 01625 { 01626 fftw_plan_with_nthreads(nthreads); 01627 #endif 01628 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1, 01629 (double*)set->work, NULL, 2, 1, kinds, 0U); 01630 #ifdef _OPENMP 01631 } 01632 #endif 01633 fftw_execute_r2r(plan,(double*)y,(double*)set->result); 01634 #ifdef _OPENMP 01635 #pragma omp critical (nfft_omp_critical_fftw_plan) 01636 #endif 01637 fftw_destroy_plan(plan); 01638 for (k = 0; k <= k_end; k++) 01639 { 01640 set->result[k] *= 0.5; 01641 } 01642 } 01643 else 01644 { 01645 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex)); 01646 } 01647 01648 /* Initialize working arrays. */ 01649 memset(set->work,0U,2*Nk*sizeof(double _Complex)); 01650 01651 /* The last step is now the first step. */ 01652 for (k = 0; k <= k_end; k++) 01653 { 01654 set->work[k] = data->gamma_m1*set->result[k]; 01655 } 01656 //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex)); 01657 01658 set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] + 01659 data->alpha_0*set->result[1]); 01660 for (k = 1; k < k_end; k++) 01661 { 01662 set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] + 01663 data->alpha_0*0.5*(set->result[k-1]+set->result[k+1])); 01664 } 01665 if (k_end<Nk) 01666 { 01667 memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex)); 01668 } 01669 01671 memcpy(set->result,set->work,2*Nk*sizeof(double _Complex)); 01672 01673 /* Compute the remaining steps. */ 01674 plength = Nk; 01675 for (tau = tk-1; tau >= 1; tau--) 01676 { 01677 /* Compute first l. */ 01678 firstl = FIRST_L(k_start_tilde,plength); 01679 /* Compute last l. */ 01680 lastl = LAST_L(k_end_tilde,plength); 01681 01682 /* Compute the multiplication steps. */ 01683 for (l = firstl; l <= lastl; l++) 01684 { 01685 /* Initialize second half of coefficient arrays with zeros. */ 01686 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex)); 01687 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex)); 01688 01689 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]), 01690 (plength/2)*sizeof(double _Complex)); 01691 01692 /* Get matrix U_{n,tau,l} */ 01693 step = &(data->steps[tau][l]); 01694 01695 /* Check if step is stable. */ 01696 if (step->stable) 01697 { 01698 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength)) 01699 { 01700 /* Multiply third and fourth polynomial with matrix U. */ 01701 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0], 01702 step->a21[0], step->a22[0], step->g[0], tau, set); 01703 } 01704 else 01705 { 01706 /* Multiply third and fourth polynomial with matrix U. */ 01707 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0], 01708 step->a21[0], step->a22[0], step->g[0], tau, set); 01709 } 01710 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex)); 01711 01712 for (k = 0; k < plength; k++) 01713 { 01714 set->work[plength*(4*l+2)/2+k] = set->vec3[k]; 01715 } 01716 } 01717 else 01718 { 01719 /* Stabilize. */ 01720 plength_stab = step->Ns; 01721 t_stab = step->ts; 01722 01723 memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex)); 01724 memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex)); 01725 01726 /* Multiply third and fourth polynomial with matrix U. */ 01727 if (set->flags & FPT_AL_SYMMETRY) 01728 { 01729 if (m <= 1) 01730 { 01731 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0], 01732 step->a21[0], step->a22[0], step->g[0], t_stab-1, set); 01733 } 01734 else if (m%2 == 0) 01735 { 01736 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0], 01737 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set); 01738 } 01739 else 01740 { 01741 fpt_do_step_t_symmetric_l(set->vec3, set->vec4, 01742 step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set); 01743 } 01744 } 01745 else 01746 { 01747 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0], 01748 step->a21[0], step->a22[0], step->g[0], t_stab-1, set); 01749 } 01750 01751 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex)); 01752 01753 for (k = 0; k < plength; k++) 01754 { 01755 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k]; 01756 } 01757 } 01758 } 01759 /* Half the length of polynomial arrays. */ 01760 plength = plength>>1; 01761 } 01762 01763 /* First step */ 01764 for (k = 0; k <= k_end_tilde-data->k_start; k++) 01765 { 01766 x[k] = set->work[2*(data->k_start+k)]; 01767 } 01768 if (k_end == Nk) 01769 { 01770 x[Nk-data->k_start] = 01771 data->gammaN[tk-2]*set->work[2*(Nk-2)] 01772 + data->betaN[tk-2] *set->work[2*(Nk-1)] 01773 + data->alphaN[tk-2]*set->work[2*(Nk-1)+1]; 01774 } 01775 } 01776 01777 void fpt_finalize(fpt_set set) 01778 { 01779 int tau; 01780 int l; 01781 int m; 01782 fpt_data *data; 01783 int k_start_tilde; 01784 int N_tilde; 01785 int firstl, lastl; 01786 int plength; 01787 const int M = set->M; 01788 01789 /* TODO Clean up DPT transform data structures. */ 01790 for (m = 0; m < M; m++) 01791 { 01792 /* Check if precomputed. */ 01793 data = &set->dpt[m]; 01794 if (data->steps != (fpt_step**)NULL) 01795 { 01796 nfft_free(data->alphaN); 01797 nfft_free(data->betaN); 01798 nfft_free(data->gammaN); 01799 data->alphaN = NULL; 01800 data->betaN = NULL; 01801 data->gammaN = NULL; 01802 01803 /* Free precomputed data. */ 01804 k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start) 01805 /*set->N*/); 01806 N_tilde = N_TILDE(set->N); 01807 plength = 4; 01808 for (tau = 1; tau < set->t; tau++) 01809 { 01810 /* Compute first l. */ 01811 firstl = FIRST_L(k_start_tilde,plength); 01812 /* Compute last l. */ 01813 lastl = LAST_L(N_tilde,plength); 01814 01815 /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */ 01816 for (l = firstl; l <= lastl; l++) 01817 { 01818 /* Free components. */ 01819 nfft_free(data->steps[tau][l].a11[0]); 01820 nfft_free(data->steps[tau][l].a12[0]); 01821 nfft_free(data->steps[tau][l].a21[0]); 01822 nfft_free(data->steps[tau][l].a22[0]); 01823 data->steps[tau][l].a11[0] = NULL; 01824 data->steps[tau][l].a12[0] = NULL; 01825 data->steps[tau][l].a21[0] = NULL; 01826 data->steps[tau][l].a22[0] = NULL; 01827 /* Free components. */ 01828 nfft_free(data->steps[tau][l].a11); 01829 nfft_free(data->steps[tau][l].a12); 01830 nfft_free(data->steps[tau][l].a21); 01831 nfft_free(data->steps[tau][l].a22); 01832 nfft_free(data->steps[tau][l].g); 01833 data->steps[tau][l].a11 = NULL; 01834 data->steps[tau][l].a12 = NULL; 01835 data->steps[tau][l].a21 = NULL; 01836 data->steps[tau][l].a22 = NULL; 01837 data->steps[tau][l].g = NULL; 01838 } 01839 /* Free pointers for current level. */ 01840 nfft_free(data->steps[tau]); 01841 data->steps[tau] = NULL; 01842 /* Double length of polynomials. */ 01843 plength = plength<<1; 01844 } 01845 /* Free steps. */ 01846 nfft_free(data->steps); 01847 data->steps = NULL; 01848 } 01849 01850 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM)) 01851 { 01852 /* Check, if recurrence coefficients must be copied. */ 01853 //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA); 01854 if (!(set->flags & FPT_PERSISTENT_DATA)) 01855 { 01856 nfft_free(data->_alpha); 01857 nfft_free(data->_beta); 01858 nfft_free(data->_gamma); 01859 } 01860 data->_alpha = NULL; 01861 data->_beta = NULL; 01862 data->_gamma = NULL; 01863 } 01864 } 01865 01866 /* Delete array of DPT transform data. */ 01867 nfft_free(set->dpt); 01868 set->dpt = NULL; 01869 01870 for (tau = 1; tau < set->t+1; tau++) 01871 { 01872 nfft_free(set->xcvecs[tau-1]); 01873 set->xcvecs[tau-1] = NULL; 01874 } 01875 nfft_free(set->xcvecs); 01876 set->xcvecs = NULL; 01877 01878 /* Free auxilliary arrays. */ 01879 nfft_free(set->work); 01880 nfft_free(set->result); 01881 01882 /* Check if fast transform is activated. */ 01883 if (!(set->flags & FPT_NO_FAST_ALGORITHM)) 01884 { 01885 /* Free auxilliary arrays. */ 01886 nfft_free(set->vec3); 01887 nfft_free(set->vec4); 01888 nfft_free(set->z); 01889 set->work = NULL; 01890 set->result = NULL; 01891 set->vec3 = NULL; 01892 set->vec4 = NULL; 01893 set->z = NULL; 01894 01895 /* Free FFTW plans. */ 01896 for(tau = 0; tau < set->t/*-1*/; tau++) 01897 { 01898 #ifdef _OPENMP 01899 #pragma omp critical (nfft_omp_critical_fftw_plan) 01900 #endif 01901 { 01902 fftw_destroy_plan(set->plans_dct3[tau]); 01903 fftw_destroy_plan(set->plans_dct2[tau]); 01904 } 01905 set->plans_dct3[tau] = NULL; 01906 set->plans_dct2[tau] = NULL; 01907 } 01908 01909 nfft_free(set->plans_dct3); 01910 nfft_free(set->plans_dct2); 01911 set->plans_dct3 = NULL; 01912 set->plans_dct2 = NULL; 01913 } 01914 01915 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM)) 01916 { 01917 /* Delete arrays of Chebyshev nodes. */ 01918 nfft_free(set->xc_slow); 01919 set->xc_slow = NULL; 01920 nfft_free(set->temp); 01921 set->temp = NULL; 01922 } 01923 01924 /* Free DPT set structure. */ 01925 nfft_free(set); 01926 }