00001
00007
00008 #include <math.h>
00009 #include <string.h>
00010 #include <stdlib.h>
00011 #include <stdbool.h>
00012
00013
00014 #include "nfft3.h"
00015
00016
00017 #include "util.h"
00018
00019
00020
00022 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00023
00024 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00025
00029 #define FIRST_L(x,y) ((int)floor((x)/(double)y))
00030
00034 #define LAST_L(x,y) ((int)ceil(((x)+1)/(double)y)-1)
00035
00036 #define N_TILDE(y) (y-1)
00037
00038 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00039
00040
00041 #ifdef TEST_STAB
00042 #define NFFT_MAX(a,b) ((a>b)?(a):(b))
00043 #endif
00044
00045 #define FPT_BREAK_EVEN 4
00046
00050 typedef struct fpt_step_
00051 {
00052 bool stable;
00055 int N_stab;
00056 int t_stab;
00057 double **a11,**a12,**a21,**a22;
00058 double *gamma;
00059 } fpt_step;
00060
00064 typedef struct fpt_data_
00065 {
00066 fpt_step **steps;
00067 int k_start;
00068 double *alphaN;
00069 double *betaN;
00070 double *gammaN;
00071 double alpha_0;
00072 double beta_0;
00073 double gamma_m1;
00074
00075 double *alpha;
00076 double *beta;
00077 double *gamma;
00078 } fpt_data;
00079
00083 typedef struct fpt_set_s_
00084 {
00085 int flags;
00086 int M;
00087 int N;
00089 int t;
00090 fpt_data *dpt;
00091 double **xcvecs;
00094 double *xc;
00095 double complex *temp;
00096 double complex *work;
00097 double complex *result;
00098 double complex *vec3;
00099 double complex *vec4;
00100 double complex *z;
00101 fftw_plan *plans_dct3;
00103 fftw_plan *plans_dct2;
00105 fftw_r2r_kind *kinds;
00107 fftw_r2r_kind *kindsr;
00110 int *lengths;
00112
00113 double *xc_slow;
00114 } fpt_set_s;
00115
00116 inline void abuvxpwy(double a, double b, double complex* u, double complex* x, double* v,
00117 double complex* y, double* w, int n)
00118 {
00119 int l;
00120 double complex *u_ptr, *x_ptr, *y_ptr;
00121 double *v_ptr, *w_ptr;
00122
00123 u_ptr = u;
00124 x_ptr = x;
00125 v_ptr = v;
00126 y_ptr = y;
00127 w_ptr = w;
00128
00129 for (l = 0; l < n; l++)
00130 {
00131 *u++ = a * (b * (*v++) * (*x++) + (*w++) * (*y++));
00132 }
00133 }
00134
00135 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00136 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00137 double complex* y, double* w, int n) \
00138 { \
00139 int l; \
00140 double complex *u_ptr, *x_ptr, *y_ptr; \
00141 double *v_ptr, *w_ptr; \
00142 \
00143 u_ptr = u; \
00144 x_ptr = x; \
00145 v_ptr = v; \
00146 y_ptr = y; \
00147 w_ptr = w; \
00148 \
00149 for (l = 0; l < n/2; l++) \
00150 { \
00151 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00152 } \
00153 v_ptr--; \
00154 w_ptr--; \
00155 for (l = 0; l < n/2; l++) \
00156 { \
00157 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00158 } \
00159 }
00160
00161 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00162 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00163
00164 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00165 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00166 double complex* y, double* w, int n) \
00167 { \
00168 int l; \
00169 double complex *u_ptr, *x_ptr, *y_ptr; \
00170 double *v_ptr, *w_ptr; \
00171 \
00172 u_ptr = u; \
00173 x_ptr = x; \
00174 v_ptr = v; \
00175 y_ptr = y; \
00176 w_ptr = w; \
00177 \
00178 for (l = 0; l < n/2; l++) \
00179 { \
00180 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00181 } \
00182 v_ptr--; \
00183 \
00184 for (l = 0; l < n/2; l++) \
00185 { \
00186 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00187 } \
00188 }
00189
00190 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00191 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00192
00193 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00194 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00195 double complex* y, double* w, int n) \
00196 { \
00197 int l; \
00198 double complex *u_ptr, *x_ptr, *y_ptr; \
00199 double *v_ptr, *w_ptr; \
00200 \
00201 u_ptr = u; \
00202 x_ptr = x; \
00203 v_ptr = v; \
00204 y_ptr = y; \
00205 w_ptr = w; \
00206 \
00207 for (l = 0; l < n/2; l++) \
00208 { \
00209 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00210 } \
00211 \
00212 w_ptr--; \
00213 for (l = 0; l < n/2; l++) \
00214 { \
00215 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + S1 * (*w_ptr--) * (*y_ptr++)); \
00216 } \
00217 }
00218
00219 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00220 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00221
00222 inline void auvxpwy(double a, double complex* u, double complex* x, double* v, double complex* y,
00223 double* w, int n)
00224 {
00225 int l;
00226 double complex *u_ptr, *x_ptr, *y_ptr;
00227 double *v_ptr, *w_ptr;
00228
00229 u_ptr = u;
00230 x_ptr = x;
00231 v_ptr = v;
00232 y_ptr = y;
00233 w_ptr = w;
00234
00235 for (l = 0; l < n; l++)
00236 {
00237
00238 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00239 }
00240
00241 }
00242
00243 #define AUVXPWY_SYMMETRIC(NAME,S1,S2) \
00244 inline void NAME(double a, double complex* u, double complex* x, double* v, double complex* y, \
00245 double* w, int n) \
00246 { \
00247 int l; \
00248 double complex *u_ptr, *x_ptr, *y_ptr; \
00249 double *v_ptr, *w_ptr; \
00250 \
00251 u_ptr = u; \
00252 x_ptr = x; \
00253 v_ptr = v; \
00254 y_ptr = y; \
00255 w_ptr = w; \
00256 \
00257 \
00258 \
00259 \
00260 \
00261 \
00262 \
00263 \
00264 for (l = 0; l < n/2; l++) \
00265 { \
00266 \
00267 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00268 } \
00269 v_ptr--; \
00270 w_ptr--; \
00271 for (l = 0; l < n/2; l++) \
00272 { \
00273 \
00274 *u_ptr++ = a * (S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00275 } \
00276 \
00277 }
00278
00279 AUVXPWY_SYMMETRIC(auvxpwy_symmetric,1.0,-1.0)
00280
00281 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00282 inline void NAME(double complex *a, double complex *b, double *a11, double *a12, \
00283 double *a21, double *a22, double gamma, int tau, fpt_set set) \
00284 { \ \
00286 int length = 1<<(tau+1); \ \
00288 double norm = 1.0/(length<<1); \
00289 \
00290 \
00291 a[0] *= 2.0; \
00292 b[0] *= 2.0; \
00293 \
00294 \
00295 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00296 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00297 \
00298 \
00299 \
00300 \
00301 \
00302 \
00303 \
00304 \
00305 if (gamma == 0.0) \
00306 { \
00307 \
00308 \
00309 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00310 } \
00311 else \
00312 { \
00313 \
00314 \
00315 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00316 M1_FUNCTION(norm*gamma,a,a,a11,b,a12,length); \
00317 memcpy(b,set->z,length*sizeof(double complex)); \
00318 \
00319 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00320 \
00321 a[0] *= 0.5; \
00322 } \
00323 \
00324 \
00325 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00326 \
00327 b[0] *= 0.5; \
00328 }
00329
00330 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00331 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00332 FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00333 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)
00334
00335 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00336 inline void NAME(double complex *a, double complex *b, double *a11, \
00337 double *a12, double *a21, double *a22, double gamma, int tau, fpt_set set) \
00338 { \ \
00340 int length = 1<<(tau+1); \ \
00342 double norm = 1.0/(length<<1); \
00343 \
00344 \
00345 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00346 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00347 \
00348 \
00349 M1_FUNCTION(norm,gamma,set->z,a,a11,b,a21,length); \
00350 M2_FUNCTION(norm,gamma,b,a,a12,b,a22,length); \
00351 memcpy(a,set->z,length*sizeof(double complex)); \
00352 \
00353 \
00354 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00355 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00356 }
00357
00358 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed,abuvxpwy,abuvxpwy)
00359 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00360 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)
00361 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)
00362
00363 void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00364 const double *beta, const double *gamma)
00365 {
00366
00367
00368
00369 int i,j;
00370 double a,b,x_val_act,a_old;
00371 const double *x_act;
00372 double *y_act;
00373 const double *alpha_act, *beta_act, *gamma_act;
00374
00375
00376 x_act = x;
00377 y_act = y;
00378 for (i = 0; i < size; i++)
00379 {
00380 a = 1.0;
00381 b = 0.0;
00382 x_val_act = *x_act;
00383
00384 if (k == 0)
00385 {
00386 *y_act = 1.0;
00387 }
00388 else
00389 {
00390 alpha_act = &(alpha[k]);
00391 beta_act = &(beta[k]);
00392 gamma_act = &(gamma[k]);
00393 for (j = k; j > 1; j--)
00394 {
00395 a_old = a;
00396 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00397 b = a_old*(*gamma_act);
00398 alpha_act--;
00399 beta_act--;
00400 gamma_act--;
00401 }
00402 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00403 }
00404 x_act++;
00405 y_act++;
00406 }
00407 }
00408
00409 int eval_clenshaw_thresh(const double *x, double *y, int size, int k,
00410 const double *alpha, const double *beta, const double *gamma, const
00411 double threshold)
00412 {
00413
00414
00415
00416 int i,j;
00417 double a,b,x_val_act,a_old;
00418 const double *x_act;
00419 double *y_act;
00420 const double *alpha_act, *beta_act, *gamma_act;
00421
00422
00423 x_act = x;
00424 y_act = y;
00425 for (i = 0; i < size; i++)
00426 {
00427 a = 1.0;
00428 b = 0.0;
00429 x_val_act = *x_act;
00430
00431 if (k == 0)
00432 {
00433 *y_act = 1.0;
00434 }
00435 else
00436 {
00437 alpha_act = &(alpha[k]);
00438 beta_act = &(beta[k]);
00439 gamma_act = &(gamma[k]);
00440 for (j = k; j > 1; j--)
00441 {
00442 a_old = a;
00443 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00444 b = a_old*(*gamma_act);
00445 alpha_act--;
00446 beta_act--;
00447 gamma_act--;
00448 }
00449 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00450 if (fabs(*y_act) > threshold)
00451 {
00452 return 1;
00453 }
00454 }
00455 x_act++;
00456 y_act++;
00457 }
00458 return 0;
00459 }
00460
00479 void eval_sum_clenshaw(int N, int M, double complex* a, double *x, double complex *y,
00480 double complex *temp, double *alpha, double *beta, double *gamma, double lambda)
00481 {
00482 int j,k;
00483 double complex* it1 = temp;
00484 double complex* it2 = y;
00485 double complex aux;
00486
00487
00488 for (j = 0; j <= M; j++)
00489 {
00490 it2[j] = a[N];
00491 }
00492
00493 if (N > 0)
00494 {
00495 for (j = 0; j <= M; j++)
00496 {
00497 it1[j] = a[N-1];
00498 }
00499
00500
00501 for (k = N; k > 1; k--)
00502 {
00503
00504 for (j = 0; j <= M; j++)
00505 {
00506 aux = a[k-2] + it2[j] * gamma[k-1];
00507 it2[j] = it1[j] + it2[j] * (alpha[k-1] * x[j] + beta[k-1]);
00508 it1[j] = aux;
00509 }
00510 }
00511
00512
00513
00514 for (j = 0; j <= M; j++)
00515 {
00516 it2[j] = it1[j] + it2[j] * (alpha[0] * x[j] + beta[0]);
00517 }
00518 }
00519
00520
00521 for (j = 0; j <= M; j++)
00522 {
00523 y[j] = lambda * it2[j];
00524 }
00525 }
00526
00545 void eval_sum_clenshaw_transposed(int N, int M, double complex* a, double *x,
00546 double complex *y, double complex *temp, double *alpha, double *beta, double *gamma,
00547 double lambda)
00548 {
00549 int j,k;
00550 double complex* it1 = temp;
00551 double complex* it2 = y;
00552 double complex aux;
00553
00554
00555 a[0] = 0.0;
00556 for (j = 0; j <= M; j++)
00557 {
00558 it2[j] = lambda * y[j];
00559 a[0] += it2[j];
00560 }
00561
00562 if (N > 0)
00563 {
00564
00565 a[1] = 0.0;
00566 for (j = 0; j <= M; j++)
00567 {
00568 it1[j] = it2[j];
00569 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00570 a[1] += it2[j];
00571 }
00572
00573 for (k = 2; k <= N; k++)
00574 {
00575 a[k] = 0.0;
00576 for (j = 0; j <= M; j++)
00577 {
00578 aux = it1[j];
00579 it1[j] = it2[j];
00580 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00581 a[k] += it2[j];
00582 }
00583 }
00584 }
00585 }
00586
00587
00588 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00589 {
00591 int plength;
00593 int tau;
00595 int m;
00596 int k;
00597
00598
00599 fpt_set_s *set = (fpt_set_s*)malloc(sizeof(fpt_set_s));
00600
00601
00602 set->flags = flags;
00603
00604
00605
00606 set->M = M;
00607 set->t = t;
00608 set->N = 1<<t;
00609
00610
00611 set->dpt = (fpt_data*) malloc((M+1)*sizeof(fpt_data));
00612
00613
00614 for (m = 0; m <= set->M; m++)
00615 {
00616 set->dpt[m].steps = (fpt_step**) NULL;
00617 }
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627 set->xcvecs = (double**) malloc((set->t)*sizeof(double*));
00628
00629
00630 plength = 4;
00631 for (tau = 1; tau < t+1; tau++)
00632 {
00633
00634 set->xcvecs[tau-1] = (double*) malloc(plength*sizeof(double));
00635 for (k = 0; k < plength; k++)
00636 {
00637 set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00638 }
00639 plength = plength << 1;
00640 }
00641
00643 set->work = (double complex*) malloc((2*set->N)*sizeof(double complex));
00644 set->result = (double complex*) malloc((2*set->N)*sizeof(double complex));
00645
00646
00647 if (set->flags & FPT_NO_FAST_ALGORITHM)
00648 {
00649 }
00650 else
00651 {
00653 set->vec3 = (double complex*) malloc(set->N*sizeof(double complex));
00654 set->vec4 = (double complex*) malloc(set->N*sizeof(double complex));
00655 set->z = (double complex*) malloc(set->N*sizeof(double complex));
00656
00658 set->plans_dct3 = (fftw_plan*) fftw_malloc(sizeof(fftw_plan)*(set->t));
00659 set->plans_dct2 = (fftw_plan*) fftw_malloc(sizeof(fftw_plan)*(set->t));
00660 set->kinds = (fftw_r2r_kind*) malloc(2*sizeof(fftw_r2r_kind));
00661 set->kinds[0] = FFTW_REDFT01;
00662 set->kinds[1] = FFTW_REDFT01;
00663 set->kindsr = (fftw_r2r_kind*) malloc(2*sizeof(fftw_r2r_kind));
00664 set->kindsr[0] = FFTW_REDFT10;
00665 set->kindsr[1] = FFTW_REDFT10;
00666 set->lengths = (int*) malloc((set->t)*sizeof(int));
00667 for (tau = 0, plength = 4; tau < set->t; tau++, plength<<=1)
00668 {
00669 set->lengths[tau] = plength;
00670 set->plans_dct3[tau] =
00671 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00672 2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00673 0);
00674 set->plans_dct2[tau] =
00675 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00676 2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00677 0);
00678 }
00679 free(set->lengths);
00680 free(set->kinds);
00681 free(set->kindsr);
00682 set->lengths = NULL;
00683 set->kinds = NULL;
00684 set->kindsr = NULL;
00685 }
00686
00687 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
00688 {
00689 }
00690 else
00691 {
00692 set->xc_slow = (double*) malloc((set->N+1)*sizeof(double));
00693 set->temp = (double complex*) calloc((set->N+1),sizeof(double complex));
00694 }
00695
00696
00697 return set;
00698 }
00699
00700 void fpt_precompute(fpt_set set, const int m, const double *alpha,
00701 const double *beta, const double *gamma, int k_start,
00702 const double threshold)
00703 {
00704
00705 int tau;
00706 int l;
00707 int plength;
00709 int degree;
00711 int firstl;
00712 int lastl;
00713 int plength_stab;
00715 int degree_stab;
00717 double *a11;
00719 double *a12;
00721 double *a21;
00723 double *a22;
00725 const double *calpha;
00726 const double *cbeta;
00727 const double *cgamma;
00728 int needstab = 0;
00729 int k_start_tilde;
00730 int N_tilde;
00731 int clength;
00732 int clength_1;
00733 int clength_2;
00734 int t_stab, N_stab;
00735 int ell;
00736
00737
00738
00739
00740 fpt_data *data;
00741
00742
00743
00744
00745
00746 data = &(set->dpt[m]);
00747
00748
00749 if (data->steps != NULL)
00750 {
00751 return;
00752 }
00753
00754
00755 data->k_start = k_start;
00756
00757
00758 if (set->flags & FPT_NO_FAST_ALGORITHM)
00759 {
00760 }
00761 else
00762 {
00763
00764 data->alphaN = (double*) malloc((set->t-1)*sizeof(double complex));
00765 data->betaN = (double*) malloc((set->t-1)*sizeof(double complex));
00766 data->gammaN = (double*) malloc((set->t-1)*sizeof(double complex));
00767
00768 for (tau = 2; tau <= set->t; tau++)
00769 {
00770
00771 data->alphaN[tau-2] = alpha[1<<tau];
00772 data->betaN[tau-2] = beta[1<<tau];
00773 data->gammaN[tau-2] = gamma[1<<tau];
00774 }
00775
00776 data->alpha_0 = alpha[1];
00777 data->beta_0 = beta[1];
00778 data->gamma_m1 = gamma[0];
00779
00780 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00781 );
00782 N_tilde = N_TILDE(set->N);
00783
00784
00785 data->steps = (fpt_step**) malloc(sizeof(fpt_step*)*set->t);
00786
00787
00788 plength = 4;
00789 for (tau = 1; tau < set->t; tau++)
00790 {
00791
00792 degree = plength>>1;
00793
00794 firstl = FIRST_L(k_start_tilde,plength);
00795
00796 lastl = LAST_L(N_tilde,plength);
00797
00798
00799
00800 data->steps[tau] = (fpt_step*) fftw_malloc(sizeof(fpt_step)
00801 * (lastl+1));
00802
00803
00804 for (l = firstl; l <= lastl; l++)
00805 {
00806 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00807 {
00808
00809
00810 clength = plength/2;
00811 }
00812 else
00813 {
00814 clength = plength;
00815 }
00816
00817
00818 a11 = (double*) fftw_malloc(sizeof(double)*clength);
00819 a12 = (double*) fftw_malloc(sizeof(double)*clength);
00820 a21 = (double*) fftw_malloc(sizeof(double)*clength);
00821 a22 = (double*) fftw_malloc(sizeof(double)*clength);
00822
00823
00824
00825
00826
00827 calpha = &(alpha[plength*l+1+1]);
00828 cbeta = &(beta[plength*l+1+1]);
00829 cgamma = &(gamma[plength*l+1+1]);
00830
00831 if (set->flags & FPT_NO_STABILIZATION)
00832 {
00833
00834 eval_clenshaw(set->xcvecs[tau-1], a11, clength, degree-2, calpha, cbeta,
00835 cgamma);
00836 eval_clenshaw(set->xcvecs[tau-1], a12, clength, degree-1, calpha, cbeta,
00837 cgamma);
00838 calpha--;
00839 cbeta--;
00840 cgamma--;
00841 eval_clenshaw(set->xcvecs[tau-1], a21, clength, degree-1, calpha, cbeta,
00842 cgamma);
00843 eval_clenshaw(set->xcvecs[tau-1], a22, clength, degree, calpha, cbeta,
00844 cgamma);
00845 needstab = 0;
00846 }
00847 else
00848 {
00849 needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a11, clength, degree-2,
00850 calpha, cbeta, cgamma, threshold);
00851 if (needstab == 0)
00852 {
00853
00854 needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a12, clength, degree-1,
00855 calpha, cbeta, cgamma, threshold);
00856 if (needstab == 0)
00857 {
00858 calpha--;
00859 cbeta--;
00860 cgamma--;
00861
00862 needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a21, clength,
00863 degree-1, calpha, cbeta, cgamma, threshold);
00864 if (needstab == 0)
00865 {
00866
00867 needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a22, clength,
00868 degree, calpha, cbeta, cgamma, threshold);
00869 }
00870 }
00871 }
00872 }
00873
00874
00875 if (needstab == 0)
00876 {
00877 data->steps[tau][l].a11 = (double**) fftw_malloc(sizeof(double*));
00878 data->steps[tau][l].a12 = (double**) fftw_malloc(sizeof(double*));
00879 data->steps[tau][l].a21 = (double**) fftw_malloc(sizeof(double*));
00880 data->steps[tau][l].a22 = (double**) fftw_malloc(sizeof(double*));
00881 data->steps[tau][l].gamma = (double*) fftw_malloc(sizeof(double));
00882
00883 data->steps[tau][l].a11[0] = a11;
00884 data->steps[tau][l].a12[0] = a12;
00885 data->steps[tau][l].a21[0] = a21;
00886 data->steps[tau][l].a22[0] = a22;
00887 data->steps[tau][l].gamma[0] = gamma[plength*l+1+1];
00888 data->steps[tau][l].stable = true;
00889 }
00890 else
00891 {
00892
00893
00894 degree_stab = degree*(2*l+1);
00895 nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
00896
00897
00898
00899
00900 fftw_free(a11);
00901 fftw_free(a12);
00902 fftw_free(a21);
00903 fftw_free(a22);
00904
00905 data->steps[tau][l].a11 = (double**) fftw_malloc(sizeof(double*));
00906 data->steps[tau][l].a12 = (double**) fftw_malloc(sizeof(double*));
00907 data->steps[tau][l].a21 = (double**) fftw_malloc(sizeof(double*));
00908 data->steps[tau][l].a22 = (double**) fftw_malloc(sizeof(double*));
00909 data->steps[tau][l].gamma = (double*) fftw_malloc(sizeof(double));
00910
00911 plength_stab = N_stab;
00912
00913 if (set->flags & FPT_AL_SYMMETRY)
00914 {
00915 if (m <= 1)
00916 {
00917 clength_1 = plength_stab/2;
00918 clength_2 = plength_stab/2;
00919 }
00920 else if (m%2 == 0)
00921 {
00922 clength_1 = plength_stab/2;
00923 clength_2 = plength_stab;
00924 }
00925 else
00926 {
00927 clength_1 = plength_stab;
00928 clength_2 = plength_stab/2;
00929 }
00930 }
00931 else
00932 {
00933 clength_1 = plength_stab;
00934 clength_2 = plength_stab;
00935 }
00936
00937
00938
00939 a11 = (double*) fftw_malloc(sizeof(double)*clength_1);
00940 a12 = (double*) fftw_malloc(sizeof(double)*clength_1);
00941 a21 = (double*) fftw_malloc(sizeof(double)*clength_2);
00942 a22 = (double*) fftw_malloc(sizeof(double)*clength_2);
00943
00944
00945 calpha = &(alpha[2]);
00946 cbeta = &(beta[2]);
00947 cgamma = &(gamma[2]);
00948
00949 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength_1, degree_stab-2,
00950 calpha, cbeta, cgamma);
00951
00952 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength_1, degree_stab-1,
00953 calpha, cbeta, cgamma);
00954 calpha--;
00955 cbeta--;
00956 cgamma--;
00957
00958 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength_2, degree_stab-1,
00959 calpha, cbeta, cgamma);
00960
00961 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength_2, degree_stab+0,
00962 calpha, cbeta, cgamma);
00963
00964 data->steps[tau][l].a11[0] = a11;
00965 data->steps[tau][l].a12[0] = a12;
00966 data->steps[tau][l].a21[0] = a21;
00967 data->steps[tau][l].a22[0] = a22;
00968 data->steps[tau][l].gamma[0] = gamma[1+1];
00969 data->steps[tau][l].stable = false;
00970 data->steps[tau][l].t_stab = t_stab;
00971 data->steps[tau][l].N_stab = N_stab;
00972 }
00973 }
00975 plength = plength << 1;
00976 }
00977 }
00978
00979 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
00980 {
00981 }
00982 else
00983 {
00984
00985 if (set->flags & FPT_PERSISTENT_DATA)
00986 {
00987 data->alpha = (double*) alpha;
00988 data->beta = (double*) beta;
00989 data->gamma = (double*) gamma;
00990 }
00991 else
00992 {
00993 data->alpha = (double*) malloc((set->N+1)*sizeof(double));
00994 data->beta = (double*) malloc((set->N+1)*sizeof(double));
00995 data->gamma = (double*) malloc((set->N+1)*sizeof(double));
00996 memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
00997 memcpy(data->beta,beta,(set->N+1)*sizeof(double));
00998 memcpy(data->gamma,gamma,(set->N+1)*sizeof(double));
00999 }
01000 }
01001 }
01002
01003 void dpt_trafo(fpt_set set, const int m, const double complex *x, double complex *y,
01004 const int k_end, const unsigned int flags)
01005 {
01006 int j;
01007 fpt_data *data = &(set->dpt[m]);
01008 int Nk;
01009 int tk;
01010 double norm;
01011
01012 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01013 norm = 2.0/(Nk<<1);
01014
01015 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01016 {
01017 return;
01018 }
01019
01020 if (flags & FPT_FUNCTION_VALUES)
01021 {
01022
01023 for (j = 0; j <= k_end; j++)
01024 {
01025 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01026 }
01027
01028 memset(set->result,0U,data->k_start*sizeof(double complex));
01029 memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double complex));
01030
01031 eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01032 y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01033 data->gamma_m1);
01034 }
01035 else
01036 {
01037 memset(set->temp,0U,data->k_start*sizeof(double complex));
01038 memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double complex));
01039
01040 eval_sum_clenshaw(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01041 set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01042 data->gamma_m1);
01043
01044 fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01045 (double*)set->result);
01046
01047 set->result[0] *= 0.5;
01048 for (j = 0; j < Nk; j++)
01049 {
01050 set->result[j] *= norm;
01051 }
01052
01053 memcpy(y,set->result,(k_end+1)*sizeof(double complex));
01054 }
01055 }
01056
01057 void fpt_trafo(fpt_set set, const int m, const double complex *x, double complex *y,
01058 const int k_end, const unsigned int flags)
01059 {
01060
01061 fpt_data *data = &(set->dpt[m]);
01063 int Nk;
01065 int tk;
01067 int k_start_tilde;
01069 int k_end_tilde;
01070
01072 int tau;
01074 int firstl;
01076 int lastl;
01078 int l;
01080 int plength;
01082 int plength_stab;
01083 int t_stab;
01085 fpt_step *step;
01087 fftw_plan plan;
01088 int length = k_end+1;
01089 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01090
01092 int k;
01093
01094 double complex *work_ptr;
01095 const double complex *x_ptr;
01096
01097
01098 if (k_end < FPT_BREAK_EVEN)
01099 {
01100
01101 dpt_trafo(set, m, x, y, k_end, flags);
01102 }
01103
01104 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01105 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01106 k_end_tilde = K_END_TILDE(k_end,Nk);
01107
01108
01109 if (set->flags & FPT_NO_FAST_ALGORITHM)
01110 {
01111 return;
01112 }
01113
01114 if (flags & FPT_FUNCTION_VALUES)
01115 {
01116 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01117 (double*)set->work, NULL, 2, 1, kinds, 0U);
01118 }
01119
01120
01121 memset(set->result,0U,2*Nk*sizeof(double complex));
01122
01123
01124
01125
01126 memset(set->work,0U,2*data->k_start*sizeof(double complex));
01127
01128 work_ptr = &set->work[2*data->k_start];
01129 x_ptr = x;
01130
01131 for (k = 0; k < k_end_tilde-data->k_start+1; k++)
01132 {
01133 *work_ptr++ = *x_ptr++;
01134 *work_ptr++ = 0;
01135 }
01136
01137
01138 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double complex));
01139
01140
01141
01142 if (k_end == Nk)
01143 {
01144 set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01145 set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01146 set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01147 }
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159 plength = 4;
01160 for (tau = 1; tau < tk; tau++)
01161 {
01162
01163 firstl = FIRST_L(k_start_tilde,plength);
01164
01165 lastl = LAST_L(k_end_tilde,plength);
01166
01167
01168 for (l = firstl; l <= lastl; l++)
01169 {
01170
01171 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double complex));
01172 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double complex));
01173 memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double complex));
01174 memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double complex));
01175
01176
01177 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double complex));
01178 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double complex));
01179 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double complex));
01180
01181
01182 step = &(data->steps[tau][l]);
01183
01184
01185 if (step->stable)
01186 {
01187
01188 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01189 {
01190
01191
01192
01193
01194
01195
01196
01197 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01198 step->a12[0], step->a21[0], step->a22[0], step->gamma[0], tau, set);
01199 }
01200 else
01201 {
01202
01203 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01204 step->a21[0], step->a22[0], step->gamma[0], tau, set);
01205 }
01206
01207 if (step->gamma[0] != 0.0)
01208 {
01209 for (k = 0; k < plength; k++)
01210 {
01211 set->work[plength*2*l+k] += set->vec3[k];
01212 }
01213 }
01214 for (k = 0; k < plength; k++)
01215 {
01216 set->work[plength*(2*l+1)+k] += set->vec4[k];
01217 }
01218 }
01219 else
01220 {
01221
01222
01223
01224 plength_stab = step->N_stab;
01225 t_stab = step->t_stab;
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double complex));
01238 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double complex));
01239
01240
01241
01242 if (set->flags & FPT_AL_SYMMETRY)
01243 {
01244 if (m <= 1)
01245 {
01246 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01247 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01248 }
01249 else if (m%2 == 0)
01250 {
01251 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01252 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01253 }
01254 else
01255 {
01256 fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01257 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01258 }
01259 }
01260 else
01261 {
01262 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01263 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01264 }
01265
01266 if (step->gamma[0] != 0.0)
01267 {
01268 for (k = 0; k < plength_stab; k++)
01269 {
01270 set->result[k] += set->vec3[k];
01271 }
01272 }
01273
01274 for (k = 0; k < plength_stab; k++)
01275 {
01276 set->result[Nk+k] += set->vec4[k];
01277 }
01278 }
01279 }
01280
01281 plength = plength<<1;
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291 }
01292
01293
01294
01295 for (k = 0; k < 2*Nk; k++)
01296 {
01297 set->result[k] += set->work[k];
01298 }
01299
01300
01301
01302 y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01303 data->alpha_0*set->result[Nk+1]*0.5);
01304 y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01305 data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01306 y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01307 data->beta_0*set->result[Nk+k_end-1] +
01308 data->alpha_0*set->result[Nk+k_end-2]*0.5);
01309 y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01310 for (k = 2; k <= k_end-2; k++)
01311 {
01312 y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01313 data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01314 }
01315
01316 if (flags & FPT_FUNCTION_VALUES)
01317 {
01318 y[0] *= 2.0;
01319 fftw_execute_r2r(plan,(double*)y,(double*)y);
01320 fftw_destroy_plan(plan);
01321 for (k = 0; k <= k_end; k++)
01322 {
01323 y[k] *= 0.5;
01324 }
01325 }
01326 }
01327
01328 void dpt_transposed(fpt_set set, const int m, double complex *x,
01329 double complex *y, const int k_end, const unsigned int flags)
01330 {
01331 int j;
01332 fpt_data *data = &(set->dpt[m]);
01333 int Nk;
01334 int tk;
01335 double norm;
01336
01337 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01338 norm = 2.0/(Nk<<1);
01339
01340 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01341 {
01342 return;
01343 }
01344
01345 if (flags & FPT_FUNCTION_VALUES)
01346 {
01347 for (j = 0; j <= k_end; j++)
01348 {
01349 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01350 }
01351
01352 eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01353 y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01354 data->gamma_m1);
01355
01356 memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01357 sizeof(double complex));
01358 }
01359 else
01360 {
01361 memcpy(set->result,y,(k_end+1)*sizeof(double complex));
01362 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double complex));
01363
01364 for (j = 0; j < Nk; j++)
01365 {
01366 set->result[j] *= norm;
01367 }
01368
01369 fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01370 (double*)set->result);
01371
01372 eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01373 set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01374 data->gamma_m1);
01375
01376 memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double complex));
01377 }
01378 }
01379
01380 void fpt_transposed(fpt_set set, const int m, double complex *x,
01381 const double complex *y, const int k_end, const unsigned int flags)
01382 {
01383
01384 fpt_data *data = &(set->dpt[m]);
01386 int Nk;
01388 int tk;
01390 int k_start_tilde;
01392 int k_end_tilde;
01393
01395 int tau;
01397 int firstl;
01399 int lastl;
01401 int l;
01403 int plength;
01405 int plength_stab;
01407 fpt_step *step;
01409 fftw_plan plan;
01410 int length = k_end+1;
01411 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01413 int k;
01414 int t_stab;
01415
01416
01417 if (k_end < FPT_BREAK_EVEN)
01418 {
01419
01420 dpt_transposed(set, m, x, y, k_end, flags);
01421 }
01422
01423 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01424 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01425 k_end_tilde = K_END_TILDE(k_end,Nk);
01426
01427
01428 if (set->flags & FPT_NO_FAST_ALGORITHM)
01429 {
01430 return;
01431 }
01432
01433 if (flags & FPT_FUNCTION_VALUES)
01434 {
01435 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01436 (double*)set->work, NULL, 2, 1, kinds, 0U);
01437 fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01438 fftw_destroy_plan(plan);
01439 for (k = 0; k <= k_end; k++)
01440 {
01441 set->result[k] *= 0.5;
01442 }
01443 }
01444 else
01445 {
01446 memcpy(set->result,y,(k_end+1)*sizeof(double complex));
01447 }
01448
01449
01450 memset(set->work,0U,2*Nk*sizeof(double complex));
01451
01452
01453 for (k = 0; k <= k_end; k++)
01454 {
01455 set->work[k] = data->gamma_m1*set->result[k];
01456 }
01457
01458
01459 set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01460 data->alpha_0*set->result[1]);
01461 for (k = 1; k < k_end; k++)
01462 {
01463 set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01464 data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01465 }
01466 if (k_end<Nk)
01467 {
01468 memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double complex));
01469 }
01470
01472 memcpy(set->result,set->work,2*Nk*sizeof(double complex));
01473
01474
01475 plength = Nk;
01476 for (tau = tk-1; tau >= 1; tau--)
01477 {
01478
01479 firstl = FIRST_L(k_start_tilde,plength);
01480
01481 lastl = LAST_L(k_end_tilde,plength);
01482
01483
01484 for (l = firstl; l <= lastl; l++)
01485 {
01486
01487 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double complex));
01488 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double complex));
01489
01490 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01491 (plength/2)*sizeof(double complex));
01492
01493
01494 step = &(data->steps[tau][l]);
01495
01496
01497 if (step->stable)
01498 {
01499 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01500 {
01501
01502 fpt_do_step_transposed_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01503 step->a21[0], step->a22[0], step->gamma[0], tau, set);
01504 }
01505 else
01506 {
01507
01508 fpt_do_step_transposed(set->vec3, set->vec4, step->a11[0], step->a12[0],
01509 step->a21[0], step->a22[0], step->gamma[0], tau, set);
01510 }
01511 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double complex));
01512
01513 for (k = 0; k < plength; k++)
01514 {
01515 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01516 }
01517 }
01518 else
01519 {
01520
01521 plength_stab = step->N_stab;
01522 t_stab = step->t_stab;
01523
01524 memcpy(set->vec3,set->result,plength_stab*sizeof(double complex));
01525 memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double complex));
01526
01527
01528 if (set->flags & FPT_AL_SYMMETRY)
01529 {
01530 if (m <= 1)
01531 {
01532 fpt_do_step_transposed_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01533 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01534 }
01535 else if (m%2 == 0)
01536 {
01537 fpt_do_step_transposed_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01538 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01539 }
01540 else
01541 {
01542 fpt_do_step_transposed_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01543 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01544 }
01545 }
01546 else
01547 {
01548 fpt_do_step_transposed(set->vec3, set->vec4, step->a11[0], step->a12[0],
01549 step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01550 }
01551
01552 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double complex));
01553
01554 for (k = 0; k < plength; k++)
01555 {
01556 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01557 }
01558 }
01559 }
01560
01561 plength = plength>>1;
01562 }
01563
01564
01565 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01566 {
01567 x[k] = set->work[2*(data->k_start+k)];
01568 }
01569 if (k_end == Nk)
01570 {
01571 x[Nk-data->k_start] =
01572 data->gammaN[tk-2]*set->work[2*(Nk-2)]
01573 + data->betaN[tk-2] *set->work[2*(Nk-1)]
01574 + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01575 }
01576 }
01577
01578 void fpt_finalize(fpt_set set)
01579 {
01580 int tau;
01581 int l;
01582 int m;
01583 fpt_data *data;
01584 int k_start_tilde;
01585 int N_tilde;
01586 int firstl, lastl;
01587 int plength;
01588
01589
01590 for (m = 0; m <= set->M; m++)
01591 {
01592
01593 data = &set->dpt[m];
01594 if (data->steps != (fpt_step**)NULL)
01595 {
01596 free(data->alphaN);
01597 free(data->betaN);
01598 free(data->gammaN);
01599 data->alphaN = NULL;
01600 data->betaN = NULL;
01601 data->gammaN = NULL;
01602
01603
01604 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01605 );
01606 N_tilde = N_TILDE(set->N);
01607 plength = 4;
01608 for (tau = 1; tau < set->t; tau++)
01609 {
01610
01611 firstl = FIRST_L(k_start_tilde,plength);
01612
01613 lastl = LAST_L(N_tilde,plength);
01614
01615
01616 for (l = firstl; l <= lastl; l++)
01617 {
01618
01619 free(data->steps[tau][l].a11[0]);
01620 free(data->steps[tau][l].a12[0]);
01621 free(data->steps[tau][l].a21[0]);
01622 free(data->steps[tau][l].a22[0]);
01623 data->steps[tau][l].a11[0] = NULL;
01624 data->steps[tau][l].a12[0] = NULL;
01625 data->steps[tau][l].a21[0] = NULL;
01626 data->steps[tau][l].a22[0] = NULL;
01627
01628 free(data->steps[tau][l].a11);
01629 free(data->steps[tau][l].a12);
01630 free(data->steps[tau][l].a21);
01631 free(data->steps[tau][l].a22);
01632 free(data->steps[tau][l].gamma);
01633 data->steps[tau][l].a11 = NULL;
01634 data->steps[tau][l].a12 = NULL;
01635 data->steps[tau][l].a21 = NULL;
01636 data->steps[tau][l].a22 = NULL;
01637 data->steps[tau][l].gamma = NULL;
01638 }
01639
01640 free(data->steps[tau]);
01641 data->steps[tau] = NULL;
01642
01643 plength = plength<<1;
01644 }
01645
01646 free(data->steps);
01647 data->steps = NULL;
01648 }
01649
01650 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01651 {
01652 }
01653 else
01654 {
01655
01656
01657 if (set->flags & FPT_PERSISTENT_DATA)
01658 {
01659 }
01660 else
01661 {
01662 free(data->alpha);
01663 free(data->beta);
01664 free(data->gamma);
01665 }
01666 data->alpha = NULL;
01667 data->beta = NULL;
01668 data->gamma = NULL;
01669 }
01670 }
01671
01672
01673 free(set->dpt);
01674 set->dpt = NULL;
01675
01676 for (tau = 1; tau < /*set->t*/set->t+1; tau++)
01677 {
01678 free(set->xcvecs[tau-1]);
01679 set->xcvecs[tau-1] = NULL;
01680 }
01681 free(set->xcvecs);
01682 set->xcvecs = NULL;
01683
01684
01685 free(set->work);
01686 free(set->result);
01687
01688
01689 if (set->flags & FPT_NO_FAST_ALGORITHM)
01690 {
01691 }
01692 else
01693 {
01694
01695 free(set->vec3);
01696 free(set->vec4);
01697 free(set->z);
01698 set->work = NULL;
01699 set->result = NULL;
01700 set->vec3 = NULL;
01701 set->vec4 = NULL;
01702 set->z = NULL;
01703
01704
01705 for(tau = 0; tau < set->t; tau++)
01706 {
01707 fftw_destroy_plan(set->plans_dct3[tau]);
01708 fftw_destroy_plan(set->plans_dct2[tau]);
01709 set->plans_dct3[tau] = NULL;
01710 set->plans_dct2[tau] = NULL;
01711 }
01712
01713 free(set->plans_dct3);
01714 free(set->plans_dct2);
01715 set->plans_dct3 = NULL;
01716 set->plans_dct2 = NULL;
01717 }
01718
01719
01720
01721 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01722 {
01723 }
01724 else
01725 {
01726
01727 free(set->xc_slow);
01728 set->xc_slow = NULL;
01729 free(set->temp);
01730 set->temp = NULL;
01731 }
01732
01733
01734 free(set);
01735 }