00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "config.h"
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #include <complex.h>
00034
00035 #include "nfft3.h"
00036 #include "nfft3util.h"
00037 #include "infft.h"
00038
00039
00040
00042 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00043
00045 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00046
00048 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00049
00051 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00052
00053 #define N_TILDE(y) (y-1)
00054
00055 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00056
00057 #define FPT_BREAK_EVEN 4
00058
00062 typedef struct fpt_step_
00063 {
00064 bool stable;
00067 int Ns;
00068 int ts;
00069 double **a11,**a12,**a21,**a22;
00070 double *g;
00071 } fpt_step;
00072
00076 typedef struct fpt_data_
00077 {
00078 fpt_step **steps;
00079 int k_start;
00080 double *alphaN;
00081 double *betaN;
00082 double *gammaN;
00083 double alpha_0;
00084 double beta_0;
00085 double gamma_m1;
00086
00087 double *alpha;
00088 double *beta;
00089 double *gamma;
00090 } fpt_data;
00091
00095 typedef struct fpt_set_s_
00096 {
00097 int flags;
00098 int M;
00099 int N;
00101 int t;
00102 fpt_data *dpt;
00103 double **xcvecs;
00106 double *xc;
00107 double _Complex *temp;
00108 double _Complex *work;
00109 double _Complex *result;
00110 double _Complex *vec3;
00111 double _Complex *vec4;
00112 double _Complex *z;
00113 fftw_plan *plans_dct3;
00115 fftw_plan *plans_dct2;
00117 fftw_r2r_kind *kinds;
00119 fftw_r2r_kind *kindsr;
00122 int *lengths;
00124
00125 double *xc_slow;
00126 } fpt_set_s;
00127
00128 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00129 double* v, double _Complex* y, double* w, int n)
00130 {
00131 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00132 double *v_ptr = v, *w_ptr = w;
00133 for (l = 0; l < n; l++)
00134 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00135 }
00136
00137 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00138 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00139 double* v, double _Complex* y, double* w, int n) \
00140 { \
00141 const int n2 = n>>1; \
00142 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00143 double *v_ptr = v, *w_ptr = w; \
00144 for (l = 0; l < n2; l++) \
00145 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00146 v_ptr--; w_ptr--; \
00147 for (l = 0; l < n2; l++) \
00148 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00149 }
00150
00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00153
00154 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00155 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00156 double* v, double _Complex* y, int n, double *xx) \
00157 { \
00158 const int n2 = n>>1; \
00159 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00160 double *v_ptr = v, *xx_ptr = xx; \
00161 for (l = 0; l < n2; l++, v_ptr++) \
00162 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00163 v_ptr--; \
00164 for (l = 0; l < n2; l++, v_ptr--) \
00165 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00166 }
00167
00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00170
00171 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00172 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00173 double _Complex* y, double* w, int n, double *xx) \
00174 { \
00175 const int n2 = n>>1; \
00176 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00177 double *w_ptr = w, *xx_ptr = xx; \
00178 for (l = 0; l < n2; l++, w_ptr++) \
00179 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00180 w_ptr--; \
00181 for (l = 0; l < n2; l++, w_ptr--) \
00182 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00183 }
00184
00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00187
00188 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00189 double _Complex* y, double* w, int n)
00190 {
00191 int l;
00192 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00193 double *v_ptr = v, *w_ptr = w;
00194 for (l = n; l > 0; l--)
00195 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00196 }
00197
00198 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00199 double* v, double _Complex* y, double* w, int n)
00200 {
00201 const int n2 = n>>1; \
00202 int l;
00203 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00204 double *v_ptr = v, *w_ptr = w;
00205 for (l = n2; l > 0; l--)
00206 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00207 v_ptr--; w_ptr--;
00208 for (l = n2; l > 0; l--)
00209 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00210 }
00211
00212 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00213 double* v, double _Complex* y, double* w, int n, double *xx)
00214 {
00215 const int n2 = n>>1; \
00216 int l;
00217 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00218 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00219 for (l = n2; l > 0; l--, xx_ptr++)
00220 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00221 v_ptr--; w_ptr--;
00222 for (l = n2; l > 0; l--, xx_ptr++)
00223 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00224 }
00225
00226 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00227 double* v, double _Complex* y, double* w, int n, double *xx)
00228 {
00229 const int n2 = n>>1; \
00230 int l;
00231 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00232 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00233 for (l = n2; l > 0; l--, xx_ptr++)
00234 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00235 v_ptr--; w_ptr--;
00236 for (l = n2; l > 0; l--, xx_ptr++)
00237 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00238 }
00239
00240 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00241 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
00242 double *a21, double *a22, double g, int tau, fpt_set set) \
00243 { \
00244 \
00245 int length = 1<<(tau+1); \
00246 \
00247 double norm = 1.0/(length<<1); \
00248 \
00249 \
00250 a[0] *= 2.0; \
00251 b[0] *= 2.0; \
00252 \
00253 \
00254 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00255 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00256 \
00257 \
00258 \
00259 \
00260 \
00261 \
00262 \
00263 \
00264 if (g == 0.0) \
00265 { \
00266 \
00267 \
00268 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00269 } \
00270 else \
00271 { \
00272 \
00273 \
00274 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00275 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00276 memcpy(b,set->z,length*sizeof(double _Complex)); \
00277 \
00278 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00279 \
00280 a[0] *= 0.5; \
00281 } \
00282 \
00283 \
00284 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00285 \
00286 b[0] *= 0.5; \
00287 }
00288
00289 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00290 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00291
00292
00293
00294 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00295 double *a11, double *a12, double *a21, double *a22, double *x,
00296 double gamma, int tau, fpt_set set)
00297 {
00299 int length = 1<<(tau+1);
00301 double norm = 1.0/(length<<1);
00302
00303 UNUSED(a21); UNUSED(a22);
00304
00305
00306 a[0] *= 2.0;
00307 b[0] *= 2.0;
00308
00309
00310 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00311 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00312
00313
00314
00315
00316
00317
00318
00319
00320 if (gamma == 0.0)
00321 {
00322
00323
00324 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00325 }
00326 else
00327 {
00328
00329
00330 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00331 auvxpwy_symmetric(norm*gamma,a,a,a11,b,a12,length);
00332 memcpy(b,set->z,length*sizeof(double _Complex));
00333
00334 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00335
00336 a[0] *= 0.5;
00337 }
00338
00339
00340 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00341
00342 b[0] *= 0.5;
00343 }
00344
00345 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b,
00346 double *a11, double *a12, double *a21, double *a22, double *x, double gamma, int tau, fpt_set set)
00347 {
00349 int length = 1<<(tau+1);
00351 double norm = 1.0/(length<<1);
00352
00353
00354 a[0] *= 2.0;
00355 b[0] *= 2.0;
00356
00357 UNUSED(a11); UNUSED(a12);
00358
00359
00360 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00361 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00362
00363
00364
00365
00366
00367
00368
00369
00370 if (gamma == 0.0)
00371 {
00372
00373
00374 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00375 }
00376 else
00377 {
00378
00379
00380 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00381 auvxpwy_symmetric_2(norm*gamma,a,a,a21,b,a22,length,x);
00382 memcpy(b,set->z,length*sizeof(double _Complex));
00383
00384 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00385
00386 a[0] *= 0.5;
00387 }
00388
00389
00390 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00391
00392 b[0] *= 0.5;
00393 }
00394
00395 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00396 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
00397 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00398 { \
00399 \
00400 int length = 1<<(tau+1); \
00401 \
00402 double norm = 1.0/(length<<1); \
00403 \
00404 \
00405 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00406 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00407 \
00408 \
00409 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00410 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00411 memcpy(a,set->z,length*sizeof(double _Complex)); \
00412 \
00413 \
00414 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00415 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00416 }
00417
00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00420
00421
00422
00423
00424 static inline void fpt_do_step_t_symmetric_u(double _Complex *a,
00425 double _Complex *b, double *a11, double *a12, double *x,
00426 double gamma, int tau, fpt_set set)
00427 {
00429 int length = 1<<(tau+1);
00431 double norm = 1.0/(length<<1);
00432
00433
00434 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00435 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00436
00437
00438 abuvxpwy_symmetric1_1(norm,gamma,set->z,a,a11,b,length,x);
00439 abuvxpwy_symmetric1_2(norm,gamma,b,a,a12,b,length,x);
00440 memcpy(a,set->z,length*sizeof(double _Complex));
00441
00442
00443 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00444 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00445 }
00446
00447 static inline void fpt_do_step_t_symmetric_l(double _Complex *a,
00448 double _Complex *b, double *a21, double *a22,
00449 double *x, double gamma, int tau, fpt_set set)
00450 {
00452 int length = 1<<(tau+1);
00454 double norm = 1.0/(length<<1);
00455
00456
00457 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00458 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00459
00460
00461 abuvxpwy_symmetric2_2(norm,gamma,set->z,a,b,a21,length,x);
00462 abuvxpwy_symmetric2_1(norm,gamma,b,a,b,a22,length,x);
00463 memcpy(a,set->z,length*sizeof(double _Complex));
00464
00465
00466 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00467 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00468 }
00469
00470
00471 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00472 const double *beta, const double *gamma)
00473 {
00474
00475
00476
00477 int i,j;
00478 double a,b,x_val_act,a_old;
00479 const double *x_act;
00480 double *y_act;
00481 const double *alpha_act, *beta_act, *gamma_act;
00482
00483
00484 x_act = x;
00485 y_act = y;
00486 for (i = 0; i < size; i++)
00487 {
00488 a = 1.0;
00489 b = 0.0;
00490 x_val_act = *x_act;
00491
00492 if (k == 0)
00493 {
00494 *y_act = 1.0;
00495 }
00496 else
00497 {
00498 alpha_act = &(alpha[k]);
00499 beta_act = &(beta[k]);
00500 gamma_act = &(gamma[k]);
00501 for (j = k; j > 1; j--)
00502 {
00503 a_old = a;
00504 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00505 b = a_old*(*gamma_act);
00506 alpha_act--;
00507 beta_act--;
00508 gamma_act--;
00509 }
00510 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00511 }
00512 x_act++;
00513 y_act++;
00514 }
00515 }
00516
00517 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00518 const double *beta, const double *gamma)
00519 {
00520
00521
00522
00523 int i,j;
00524 double a,b,x_val_act,a_old;
00525 const double *x_act;
00526 double *y_act, *z_act;
00527 const double *alpha_act, *beta_act, *gamma_act;
00528
00529
00530 x_act = x;
00531 y_act = y;
00532 z_act = z;
00533 for (i = 0; i < size; i++)
00534 {
00535 a = 1.0;
00536 b = 0.0;
00537 x_val_act = *x_act;
00538
00539 if (k == 0)
00540 {
00541 *y_act = 1.0;
00542 *z_act = 0.0;
00543 }
00544 else
00545 {
00546 alpha_act = &(alpha[k]);
00547 beta_act = &(beta[k]);
00548 gamma_act = &(gamma[k]);
00549 for (j = k; j > 1; j--)
00550 {
00551 a_old = a;
00552 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00553 b = a_old*(*gamma_act);
00554 alpha_act--;
00555 beta_act--;
00556 gamma_act--;
00557 }
00558 if (i < size1)
00559 *z_act = a;
00560 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00561 }
00562
00563 x_act++;
00564 y_act++;
00565 z_act++;
00566 }
00567
00568
00569
00570
00571 }
00572
00573 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00574 const double *alpha, const double *beta, const double *gamma, const
00575 double threshold)
00576 {
00577
00578
00579
00580 int i,j;
00581 double a,b,x_val_act,a_old;
00582 const double *x_act;
00583 double *y_act, *z_act;
00584 const double *alpha_act, *beta_act, *gamma_act;
00585 R min = INFINITY, max = -INFINITY;
00586 const R t = + LOG10(FABS(threshold));
00587
00588
00589 x_act = x;
00590 y_act = y;
00591 z_act = z;
00592 for (i = 0; i < size; i++)
00593 {
00594 a = 1.0;
00595 b = 0.0;
00596 x_val_act = *x_act;
00597
00598 if (k == 0)
00599 {
00600 *y_act = 1.0;
00601 *z_act = 0.0;
00602 }
00603 else
00604 {
00605 alpha_act = &(alpha[k]);
00606 beta_act = &(beta[k]);
00607 gamma_act = &(gamma[k]);
00608 for (j = k; j > 1; j--)
00609 {
00610 a_old = a;
00611 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00612 b = a_old*(*gamma_act);
00613 alpha_act--;
00614 beta_act--;
00615 gamma_act--;
00616 }
00617 *z_act = a;
00618 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00619 min = FMIN(min,LOG10(FABS(*y_act)));
00620 max = FMAX(max,LOG10(FABS(*y_act)));
00621
00622 if ((max - min) > t)
00623 return 1;
00624 }
00625 x_act++;
00626 y_act++;
00627 z_act++;
00628 }
00629 return 0;
00630 }
00631
00632 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00633 const double _Complex *a, const double *x, double _Complex *y,
00634 const double *alpha, const double *beta, const double *gamma,
00635 const double lambda)
00636 {
00637 int j,k;
00638 double _Complex tmp1, tmp2, tmp3;
00639 double xc;
00640
00641 if (N == 0)
00642 for (j = 0; j <= M; j++)
00643 y[j] = a[0];
00644 else
00645 {
00646 for (j = 0; j <= M; j++)
00647 {
00648 #if 0
00649 xc = x[j];
00650 tmp2 = a[N];
00651 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00652 for (k = N-1; k > 0; k--)
00653 {
00654 tmp3 = a[k-1]
00655 + (alpha[k-1] * xc + beta[k-1]) * tmp1
00656 + gamma[k] * tmp2;
00657 tmp2 = tmp1;
00658 tmp1 = tmp3;
00659 }
00660 y[j] = lambda * tmp1;
00661 #else
00662 xc = x[j];
00663 tmp1 = a[N-1];
00664 tmp2 = a[N];
00665 for (k = N-1; k > 0; k--)
00666 {
00667 tmp3 = a[k-1] + tmp2 * gamma[k];
00668 tmp2 *= (alpha[k] * xc + beta[k]);
00669 tmp2 += tmp1;
00670 tmp1 = tmp3;
00671 }
00672 tmp2 *= (alpha[0] * xc + beta[0]);
00673 y[j] = lambda * (tmp2 + tmp1);
00674 #endif
00675 }
00676 }
00677 }
00678
00697 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00698 double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gamma,
00699 double lambda)
00700 {
00701 int j,k;
00702 double _Complex* it1 = temp;
00703 double _Complex* it2 = y;
00704 double _Complex aux;
00705
00706
00707 a[0] = 0.0;
00708 for (j = 0; j <= M; j++)
00709 {
00710 it2[j] = lambda * y[j];
00711 a[0] += it2[j];
00712 }
00713
00714 if (N > 0)
00715 {
00716
00717 a[1] = 0.0;
00718 for (j = 0; j <= M; j++)
00719 {
00720 it1[j] = it2[j];
00721 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00722 a[1] += it2[j];
00723 }
00724
00725 for (k = 2; k <= N; k++)
00726 {
00727 a[k] = 0.0;
00728 for (j = 0; j <= M; j++)
00729 {
00730 aux = it1[j];
00731 it1[j] = it2[j];
00732 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00733 a[k] += it2[j];
00734 }
00735 }
00736 }
00737 }
00738
00739
00740 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00741 {
00743 int plength;
00745 int tau;
00747 int m;
00748 int k;
00749
00750
00751 fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00752
00753
00754 set->flags = flags;
00755
00756 set->M = M;
00757 set->t = t;
00758 set->N = 1<<t;
00759
00760
00761 set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00762
00763
00764 for (m = 0; m < set->M; m++)
00765 set->dpt[m].steps = 0;
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775 set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00776
00777
00778 plength = 4;
00779 for (tau = 1; tau < t+1; tau++)
00780 {
00781
00782 set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00783 for (k = 0; k < plength; k++)
00784 {
00785 set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00786 }
00787 plength = plength << 1;
00788 }
00789
00791 set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00792 set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00793
00794
00795 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00796 {
00798 set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00799 set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00800 set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00801
00803 set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00804 set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00805 set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00806 set->kinds[0] = FFTW_REDFT01;
00807 set->kinds[1] = FFTW_REDFT01;
00808 set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00809 set->kindsr[0] = FFTW_REDFT10;
00810 set->kindsr[1] = FFTW_REDFT10;
00811 set->lengths = (int*) nfft_malloc((set->t)*sizeof(int));
00812 for (tau = 0, plength = 4; tau < set->t; tau++, plength<<=1)
00813 {
00814 set->lengths[tau] = plength;
00815 set->plans_dct3[tau] =
00816 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00817 2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00818 0);
00819 set->plans_dct2[tau] =
00820 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00821 2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00822 0);
00823 }
00824 nfft_free(set->lengths);
00825 nfft_free(set->kinds);
00826 nfft_free(set->kindsr);
00827 set->lengths = NULL;
00828 set->kinds = NULL;
00829 set->kindsr = NULL;
00830 }
00831
00832 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00833 {
00834 set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00835 set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00836 }
00837
00838
00839 return set;
00840 }
00841
00842 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00843 double *gam, int k_start, const double threshold)
00844 {
00845
00846 int tau;
00847 int l;
00848 int plength;
00850 int degree;
00852 int firstl;
00853 int lastl;
00854 int plength_stab;
00856 int degree_stab;
00858 double *a11;
00860 double *a12;
00862 double *a21;
00864 double *a22;
00866 const double *calpha;
00867 const double *cbeta;
00868 const double *cgamma;
00869 int needstab = 0;
00870 int k_start_tilde;
00871 int N_tilde;
00872 int clength;
00873 int clength_1;
00874 int clength_2;
00875 int t_stab, N_stab;
00876 fpt_data *data;
00877
00878
00879 data = &(set->dpt[m]);
00880
00881
00882 if (data->steps != NULL)
00883 return;
00884
00885
00886 data->k_start = k_start;
00887
00888
00889 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00890 {
00891
00892 data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00893 data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00894 data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00895
00896 for (tau = 2; tau <= set->t; tau++)
00897 {
00898
00899 data->alphaN[tau-2] = alpha[1<<tau];
00900 data->betaN[tau-2] = beta[1<<tau];
00901 data->gammaN[tau-2] = gam[1<<tau];
00902 }
00903
00904 data->alpha_0 = alpha[1];
00905 data->beta_0 = beta[1];
00906 data->gamma_m1 = gam[0];
00907
00908 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00909 );
00910 N_tilde = N_TILDE(set->N);
00911
00912
00913 data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00914
00915
00916 plength = 4;
00917 for (tau = 1; tau < set->t; tau++)
00918 {
00919
00920 degree = plength>>1;
00921
00922 firstl = FIRST_L(k_start_tilde,plength);
00923
00924 lastl = LAST_L(N_tilde,plength);
00925
00926
00927
00928 data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00929 * (lastl+1));
00930
00931
00932 for (l = firstl; l <= lastl; l++)
00933 {
00934 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00935 {
00936
00937
00938 clength = plength/2;
00939 }
00940 else
00941 {
00942 clength = plength;
00943 }
00944
00945
00946 a11 = (double*) nfft_malloc(sizeof(double)*clength);
00947 a12 = (double*) nfft_malloc(sizeof(double)*clength);
00948 a21 = (double*) nfft_malloc(sizeof(double)*clength);
00949 a22 = (double*) nfft_malloc(sizeof(double)*clength);
00950
00951
00952
00953
00954
00955 calpha = &(alpha[plength*l+1+1]);
00956 cbeta = &(beta[plength*l+1+1]);
00957 cgamma = &(gam[plength*l+1+1]);
00958
00959 if (set->flags & FPT_NO_STABILIZATION)
00960 {
00961
00962 calpha--;
00963 cbeta--;
00964 cgamma--;
00965 eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
00966 cgamma);
00967 eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
00968 cgamma);
00969 needstab = 0;
00970 }
00971 else
00972 {
00973 calpha--;
00974 cbeta--;
00975 cgamma--;
00976
00977 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
00978 degree-1, calpha, cbeta, cgamma, threshold);
00979 if (needstab == 0)
00980 {
00981
00982 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
00983 degree, calpha, cbeta, cgamma, threshold);
00984 }
00985 }
00986
00987
00988 if (needstab == 0)
00989 {
00990 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
00991 data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
00992 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
00993 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
00994 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
00995
00996 data->steps[tau][l].a11[0] = a11;
00997 data->steps[tau][l].a12[0] = a12;
00998 data->steps[tau][l].a21[0] = a21;
00999 data->steps[tau][l].a22[0] = a22;
01000 data->steps[tau][l].g[0] = gam[plength*l+1+1];
01001 data->steps[tau][l].stable = true;
01002 }
01003 else
01004 {
01005
01006 degree_stab = degree*(2*l+1);
01007 nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01008
01009
01010 nfft_free(a11);
01011 nfft_free(a12);
01012 nfft_free(a21);
01013 nfft_free(a22);
01014
01015 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01016 data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01017 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01018 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01019 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01020
01021 plength_stab = N_stab;
01022
01023 if (set->flags & FPT_AL_SYMMETRY)
01024 {
01025 if (m <= 1)
01026 {
01027
01028 clength_1 = plength_stab;
01029 clength_2 = plength_stab;
01030
01031 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01032 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01033 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01034 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01035
01036 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01037 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01038 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01039 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01040 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01041 }
01042 else
01043 {
01044 clength = plength_stab/2;
01045 if (m%2 == 0)
01046 {
01047 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01048 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01049 a21 = 0;
01050 a22 = 0;
01051 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01052 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01053 degree_stab-2, calpha, cbeta, cgamma);
01054 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01055 degree_stab-1, calpha, cbeta, cgamma);
01056 }
01057 else
01058 {
01059 a11 = 0;
01060 a12 = 0;
01061 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01062 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01063 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01064 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01065 degree_stab-1,calpha, cbeta, cgamma);
01066 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01067 degree_stab+0, calpha, cbeta, cgamma);
01068 }
01069 }
01070 }
01071 else
01072 {
01073 clength_1 = plength_stab;
01074 clength_2 = plength_stab;
01075 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01076 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01077 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01078 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01079 calpha = &(alpha[2]);
01080 cbeta = &(beta[2]);
01081 cgamma = &(gam[2]);
01082 calpha--;
01083 cbeta--;
01084 cgamma--;
01085 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01086 calpha, cbeta, cgamma);
01087 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01088 calpha, cbeta, cgamma);
01089
01090 }
01091 data->steps[tau][l].a11[0] = a11;
01092 data->steps[tau][l].a12[0] = a12;
01093 data->steps[tau][l].a21[0] = a21;
01094 data->steps[tau][l].a22[0] = a22;
01095
01096 data->steps[tau][l].g[0] = gam[1+1];
01097 data->steps[tau][l].stable = false;
01098 data->steps[tau][l].ts = t_stab;
01099 data->steps[tau][l].Ns = N_stab;
01100 }
01101 }
01103 plength = plength << 1;
01104 }
01105 }
01106
01107 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01108 {
01109
01110 if (set->flags & FPT_PERSISTENT_DATA)
01111 {
01112 data->alpha = (double*) alpha;
01113 data->beta = (double*) beta;
01114 data->gamma = (double*) gam;
01115 }
01116 else
01117 {
01118 data->alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01119 data->beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01120 data->gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01121 memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
01122 memcpy(data->beta,beta,(set->N+1)*sizeof(double));
01123 memcpy(data->gamma,gam,(set->N+1)*sizeof(double));
01124 }
01125 }
01126 }
01127
01128 void dpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01129 const int k_end, const unsigned int flags)
01130 {
01131 int j;
01132 fpt_data *data = &(set->dpt[m]);
01133 int Nk;
01134 int tk;
01135 double norm;
01136
01137 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01138 norm = 2.0/(Nk<<1);
01139
01140 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01141 {
01142 return;
01143 }
01144
01145 if (flags & FPT_FUNCTION_VALUES)
01146 {
01147
01148 for (j = 0; j <= k_end; j++)
01149 {
01150 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01151 }
01152
01153 memset(set->result,0U,data->k_start*sizeof(double _Complex));
01154 memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01155
01156
01157
01158
01159 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01160 y, &data->alpha[1], &data->beta[1], &data->gamma[1], data->gamma_m1);
01161 }
01162 else
01163 {
01164 memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01165 memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01166
01167 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01168 set->result, &data->alpha[1], &data->beta[1], &data->gamma[1],
01169 data->gamma_m1);
01170
01171 fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01172 (double*)set->result);
01173
01174 set->result[0] *= 0.5;
01175 for (j = 0; j < Nk; j++)
01176 {
01177 set->result[j] *= norm;
01178 }
01179
01180 memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01181 }
01182 }
01183
01184 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01185 const int k_end, const unsigned int flags)
01186 {
01187
01188 fpt_data *data = &(set->dpt[m]);
01190 int Nk;
01192 int tk;
01194 int k_start_tilde;
01196 int k_end_tilde;
01197
01199 int tau;
01201 int firstl;
01203 int lastl;
01205 int l;
01207 int plength;
01209 int plength_stab;
01210 int t_stab;
01212 fpt_step *step;
01214 fftw_plan plan = 0;
01215 int length = k_end+1;
01216 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01217
01219 int k;
01220
01221 double _Complex *work_ptr;
01222 const double _Complex *x_ptr;
01223
01224
01225 if (k_end < FPT_BREAK_EVEN)
01226 {
01227
01228 dpt_trafo(set, m, x, y, k_end, flags);
01229 return;
01230 }
01231
01232 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01233 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01234 k_end_tilde = K_END_TILDE(k_end,Nk);
01235
01236
01237 if (set->flags & FPT_NO_FAST_ALGORITHM)
01238 return;
01239
01240 if (flags & FPT_FUNCTION_VALUES)
01241 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01242 (double*)set->work, NULL, 2, 1, kinds, 0U);
01243
01244
01245 memset(set->result,0U,2*Nk*sizeof(double _Complex));
01246
01247
01248
01249
01250 memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01251
01252 work_ptr = &set->work[2*data->k_start];
01253 x_ptr = x;
01254
01255 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01256 {
01257 *work_ptr++ = *x_ptr++;
01258 *work_ptr++ = K(0.0);
01259 }
01260
01261
01262 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01263
01264
01265
01266 if (k_end == Nk)
01267 {
01268 set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01269 set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01270 set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01271 }
01272
01273
01274 plength = 4;
01275 for (tau = 1; tau < tk; tau++)
01276 {
01277
01278 firstl = FIRST_L(k_start_tilde,plength);
01279
01280 lastl = LAST_L(k_end_tilde,plength);
01281
01282
01283 for (l = firstl; l <= lastl; l++)
01284 {
01285
01286 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01287 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01288 memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01289 memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01290
01291
01292 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01293 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01294 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01295
01296
01297 step = &(data->steps[tau][l]);
01298
01299
01300 if (step->stable)
01301 {
01302
01303 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01304 {
01305
01306
01307
01308
01309
01310
01311
01312 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01313 step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01314 }
01315 else
01316 {
01317
01318 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01319 step->a21[0], step->a22[0], step->g[0], tau, set);
01320 }
01321
01322 if (step->g[0] != 0.0)
01323 {
01324 for (k = 0; k < plength; k++)
01325 {
01326 set->work[plength*2*l+k] += set->vec3[k];
01327 }
01328 }
01329 for (k = 0; k < plength; k++)
01330 {
01331 set->work[plength*(2*l+1)+k] += set->vec4[k];
01332 }
01333 }
01334 else
01335 {
01336
01337
01338
01339 plength_stab = step->Ns;
01340 t_stab = step->ts;
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01353 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01354
01355
01356
01357 if (set->flags & FPT_AL_SYMMETRY)
01358 {
01359 if (m <= 1)
01360 {
01361 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01362 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01363 }
01364 else if (m%2 == 0)
01365 {
01366
01367
01368 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01369 step->a21[0], step->a22[0],
01370 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01371
01372
01373 }
01374 else
01375 {
01376
01377
01378 fpt_do_step_symmetric_l(set->vec3, set->vec4,
01379 step->a11[0], step->a12[0],
01380 step->a21[0],
01381 step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01382
01383
01384 }
01385 }
01386 else
01387 {
01388 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01389 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01390 }
01391
01392 if (step->g[0] != 0.0)
01393 {
01394 for (k = 0; k < plength_stab; k++)
01395 {
01396 set->result[k] += set->vec3[k];
01397 }
01398 }
01399
01400 for (k = 0; k < plength_stab; k++)
01401 {
01402 set->result[Nk+k] += set->vec4[k];
01403 }
01404 }
01405 }
01406
01407 plength = plength<<1;
01408
01409
01410
01411
01412
01413
01414
01415
01416
01417 }
01418
01419
01420
01421 for (k = 0; k < 2*Nk; k++)
01422 {
01423 set->result[k] += set->work[k];
01424 }
01425
01426
01427
01428 y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01429 data->alpha_0*set->result[Nk+1]*0.5);
01430 y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01431 data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01432 y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01433 data->beta_0*set->result[Nk+k_end-1] +
01434 data->alpha_0*set->result[Nk+k_end-2]*0.5);
01435 y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01436 for (k = 2; k <= k_end-2; k++)
01437 {
01438 y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01439 data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01440 }
01441
01442 if (flags & FPT_FUNCTION_VALUES)
01443 {
01444 y[0] *= 2.0;
01445 fftw_execute_r2r(plan,(double*)y,(double*)y);
01446 fftw_destroy_plan(plan);
01447 for (k = 0; k <= k_end; k++)
01448 {
01449 y[k] *= 0.5;
01450 }
01451 }
01452 }
01453
01454 void dpt_transposed(fpt_set set, const int m, double _Complex *x,
01455 double _Complex *y, const int k_end, const unsigned int flags)
01456 {
01457 int j;
01458 fpt_data *data = &(set->dpt[m]);
01459 int Nk;
01460 int tk;
01461 double norm;
01462
01463 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01464 norm = 2.0/(Nk<<1);
01465
01466 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01467 {
01468 return;
01469 }
01470
01471 if (flags & FPT_FUNCTION_VALUES)
01472 {
01473 for (j = 0; j <= k_end; j++)
01474 {
01475 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01476 }
01477
01478 eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01479 y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01480 data->gamma_m1);
01481
01482 memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01483 sizeof(double _Complex));
01484 }
01485 else
01486 {
01487 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01488 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01489
01490 for (j = 0; j < Nk; j++)
01491 {
01492 set->result[j] *= norm;
01493 }
01494
01495 fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01496 (double*)set->result);
01497
01498 eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01499 set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01500 data->gamma_m1);
01501
01502 memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01503 }
01504 }
01505
01506 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01507 double _Complex *y, const int k_end, const unsigned int flags)
01508 {
01509
01510 fpt_data *data = &(set->dpt[m]);
01512 int Nk;
01514 int tk;
01516 int k_start_tilde;
01518 int k_end_tilde;
01519
01521 int tau;
01523 int firstl;
01525 int lastl;
01527 int l;
01529 int plength;
01531 int plength_stab;
01533 fpt_step *step;
01535 fftw_plan plan;
01536 int length = k_end+1;
01537 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01539 int k;
01540 int t_stab;
01541
01542
01543 if (k_end < FPT_BREAK_EVEN)
01544 {
01545
01546 dpt_transposed(set, m, x, y, k_end, flags);
01547 return;
01548 }
01549
01550 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01551 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01552 k_end_tilde = K_END_TILDE(k_end,Nk);
01553
01554
01555 if (set->flags & FPT_NO_FAST_ALGORITHM)
01556 {
01557 return;
01558 }
01559
01560 if (flags & FPT_FUNCTION_VALUES)
01561 {
01562 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01563 (double*)set->work, NULL, 2, 1, kinds, 0U);
01564 fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01565 fftw_destroy_plan(plan);
01566 for (k = 0; k <= k_end; k++)
01567 {
01568 set->result[k] *= 0.5;
01569 }
01570 }
01571 else
01572 {
01573 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01574 }
01575
01576
01577 memset(set->work,0U,2*Nk*sizeof(double _Complex));
01578
01579
01580 for (k = 0; k <= k_end; k++)
01581 {
01582 set->work[k] = data->gamma_m1*set->result[k];
01583 }
01584
01585
01586 set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01587 data->alpha_0*set->result[1]);
01588 for (k = 1; k < k_end; k++)
01589 {
01590 set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01591 data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01592 }
01593 if (k_end<Nk)
01594 {
01595 memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01596 }
01597
01599 memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01600
01601
01602 plength = Nk;
01603 for (tau = tk-1; tau >= 1; tau--)
01604 {
01605
01606 firstl = FIRST_L(k_start_tilde,plength);
01607
01608 lastl = LAST_L(k_end_tilde,plength);
01609
01610
01611 for (l = firstl; l <= lastl; l++)
01612 {
01613
01614 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01615 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01616
01617 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01618 (plength/2)*sizeof(double _Complex));
01619
01620
01621 step = &(data->steps[tau][l]);
01622
01623
01624 if (step->stable)
01625 {
01626 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01627 {
01628
01629 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01630 step->a21[0], step->a22[0], step->g[0], tau, set);
01631 }
01632 else
01633 {
01634
01635 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01636 step->a21[0], step->a22[0], step->g[0], tau, set);
01637 }
01638 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01639
01640 for (k = 0; k < plength; k++)
01641 {
01642 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01643 }
01644 }
01645 else
01646 {
01647
01648 plength_stab = step->Ns;
01649 t_stab = step->ts;
01650
01651 memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01652 memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01653
01654
01655 if (set->flags & FPT_AL_SYMMETRY)
01656 {
01657 if (m <= 1)
01658 {
01659 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01660 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01661 }
01662 else if (m%2 == 0)
01663 {
01664 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01665 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01666 }
01667 else
01668 {
01669 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01670 step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01671 }
01672 }
01673 else
01674 {
01675 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01676 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01677 }
01678
01679 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01680
01681 for (k = 0; k < plength; k++)
01682 {
01683 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01684 }
01685 }
01686 }
01687
01688 plength = plength>>1;
01689 }
01690
01691
01692 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01693 {
01694 x[k] = set->work[2*(data->k_start+k)];
01695 }
01696 if (k_end == Nk)
01697 {
01698 x[Nk-data->k_start] =
01699 data->gammaN[tk-2]*set->work[2*(Nk-2)]
01700 + data->betaN[tk-2] *set->work[2*(Nk-1)]
01701 + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01702 }
01703 }
01704
01705 void fpt_finalize(fpt_set set)
01706 {
01707 int tau;
01708 int l;
01709 int m;
01710 fpt_data *data;
01711 int k_start_tilde;
01712 int N_tilde;
01713 int firstl, lastl;
01714 int plength;
01715 const int M = set->M;
01716
01717
01718 for (m = 0; m < M; m++)
01719 {
01720
01721 data = &set->dpt[m];
01722 if (data->steps != (fpt_step**)NULL)
01723 {
01724 nfft_free(data->alphaN);
01725 nfft_free(data->betaN);
01726 nfft_free(data->gammaN);
01727 data->alphaN = NULL;
01728 data->betaN = NULL;
01729 data->gammaN = NULL;
01730
01731
01732 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01733 );
01734 N_tilde = N_TILDE(set->N);
01735 plength = 4;
01736 for (tau = 1; tau < set->t; tau++)
01737 {
01738
01739 firstl = FIRST_L(k_start_tilde,plength);
01740
01741 lastl = LAST_L(N_tilde,plength);
01742
01743
01744 for (l = firstl; l <= lastl; l++)
01745 {
01746
01747 nfft_free(data->steps[tau][l].a11[0]);
01748 nfft_free(data->steps[tau][l].a12[0]);
01749 nfft_free(data->steps[tau][l].a21[0]);
01750 nfft_free(data->steps[tau][l].a22[0]);
01751 data->steps[tau][l].a11[0] = NULL;
01752 data->steps[tau][l].a12[0] = NULL;
01753 data->steps[tau][l].a21[0] = NULL;
01754 data->steps[tau][l].a22[0] = NULL;
01755
01756 nfft_free(data->steps[tau][l].a11);
01757 nfft_free(data->steps[tau][l].a12);
01758 nfft_free(data->steps[tau][l].a21);
01759 nfft_free(data->steps[tau][l].a22);
01760 nfft_free(data->steps[tau][l].g);
01761 data->steps[tau][l].a11 = NULL;
01762 data->steps[tau][l].a12 = NULL;
01763 data->steps[tau][l].a21 = NULL;
01764 data->steps[tau][l].a22 = NULL;
01765 data->steps[tau][l].g = NULL;
01766 }
01767
01768 nfft_free(data->steps[tau]);
01769 data->steps[tau] = NULL;
01770
01771 plength = plength<<1;
01772 }
01773
01774 nfft_free(data->steps);
01775 data->steps = NULL;
01776 }
01777
01778 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01779 {
01780
01781
01782 if (!(set->flags & FPT_PERSISTENT_DATA))
01783 {
01784 nfft_free(data->alpha);
01785 nfft_free(data->beta);
01786 nfft_free(data->gamma);
01787 }
01788 data->alpha = NULL;
01789 data->beta = NULL;
01790 data->gamma = NULL;
01791 }
01792 }
01793
01794
01795 nfft_free(set->dpt);
01796 set->dpt = NULL;
01797
01798 for (tau = 1; tau < set->t+1; tau++)
01799 {
01800 nfft_free(set->xcvecs[tau-1]);
01801 set->xcvecs[tau-1] = NULL;
01802 }
01803 nfft_free(set->xcvecs);
01804 set->xcvecs = NULL;
01805
01806
01807 nfft_free(set->work);
01808 nfft_free(set->result);
01809
01810
01811 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01812 {
01813
01814 nfft_free(set->vec3);
01815 nfft_free(set->vec4);
01816 nfft_free(set->z);
01817 set->work = NULL;
01818 set->result = NULL;
01819 set->vec3 = NULL;
01820 set->vec4 = NULL;
01821 set->z = NULL;
01822
01823
01824 for(tau = 0; tau < set->t; tau++)
01825 {
01826 fftw_destroy_plan(set->plans_dct3[tau]);
01827 fftw_destroy_plan(set->plans_dct2[tau]);
01828 set->plans_dct3[tau] = NULL;
01829 set->plans_dct2[tau] = NULL;
01830 }
01831
01832 nfft_free(set->plans_dct3);
01833 nfft_free(set->plans_dct2);
01834 set->plans_dct3 = NULL;
01835 set->plans_dct2 = NULL;
01836 }
01837
01838 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01839 {
01840
01841 nfft_free(set->xc_slow);
01842 set->xc_slow = NULL;
01843 nfft_free(set->temp);
01844 set->temp = NULL;
01845 }
01846
01847
01848 nfft_free(set);
01849 }