NFFT Logo 3.0.2 API Reference
Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals

fpt.c

Go to the documentation of this file.
00001 
00007 /* Include standard C headers. */
00008 #include <math.h>
00009 #include <string.h>
00010 #include <stdlib.h>
00011 #include <stdbool.h>
00012 
00013 /* Include FPT module header. */
00014 #include "nfft3.h"
00015 
00016 /* Include NFFT3 utilities header. */
00017 #include "util.h"
00018 
00019 /* Some macros for index calculation. */
00020 
00022 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00023 
00024 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00025 
00029 #define FIRST_L(x,y) ((int)floor((x)/(double)y))
00030 
00034 #define LAST_L(x,y) ((int)ceil(((x)+1)/(double)y)-1)
00035 
00036 #define N_TILDE(y) (y-1)
00037 
00038 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00039 //#define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00040 
00041 #ifdef TEST_STAB
00042   #define NFFT_MAX(a,b) ((a>b)?(a):(b))
00043 #endif
00044 
00045 #define FPT_BREAK_EVEN 4
00046 
00050 typedef struct fpt_step_
00051 {
00052   bool stable;                            
00055   int N_stab;                             
00056   int t_stab;                             
00057   double **a11,**a12,**a21,**a22;         
00058   double *gamma;                          
00059 } fpt_step;
00060 
00064 typedef struct fpt_data_
00065 {
00066   fpt_step **steps;                       
00067   int k_start;
00068   double *alphaN;
00069   double *betaN;
00070   double *gammaN;
00071   double alpha_0;
00072   double beta_0;
00073   double gamma_m1;
00074   /* Data for direct transform. */
00075   double *alpha;
00076   double *beta;
00077   double *gamma;
00078 } fpt_data;
00079 
00083 typedef struct fpt_set_s_
00084 {
00085   int flags;                              
00086   int M;                                  
00087   int N;                                  
00089   int t;                                  
00090   fpt_data *dpt;                          
00091   double **xcvecs;                        
00094   double *xc;                             
00095   double complex *temp;                          
00096   double complex *work;                          
00097   double complex *result;                        
00098   double complex *vec3;
00099   double complex *vec4;
00100   double complex *z;
00101   fftw_plan *plans_dct3;                  
00103   fftw_plan *plans_dct2;                  
00105   fftw_r2r_kind *kinds;                   
00107   fftw_r2r_kind *kindsr;                  
00110   int *lengths; 
00112   /* Data for slow transforms. */
00113   double *xc_slow;
00114 } fpt_set_s;
00115 
00116 inline void abuvxpwy(double a, double b, double complex* u, double complex* x, double* v,
00117   double complex* y, double* w, int n)
00118 {
00119   int l;
00120   double complex *u_ptr, *x_ptr, *y_ptr;
00121   double *v_ptr, *w_ptr;
00122 
00123   u_ptr = u;
00124   x_ptr = x;
00125   v_ptr = v;
00126   y_ptr = y;
00127   w_ptr = w;
00128 
00129   for (l = 0; l < n; l++)
00130   {
00131     *u++ = a * (b * (*v++) * (*x++) + (*w++) * (*y++));
00132   }
00133 }
00134 
00135 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00136 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00137   double complex* y, double* w, int n) \
00138 { \
00139   int l; \
00140   double complex *u_ptr, *x_ptr, *y_ptr; \
00141   double *v_ptr, *w_ptr; \
00142   \
00143   u_ptr = u; \
00144   x_ptr = x; \
00145   v_ptr = v; \
00146   y_ptr = y; \
00147   w_ptr = w; \
00148   \
00149   for (l = 0; l < n/2; l++) \
00150   { \
00151     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00152   } \
00153   v_ptr--; \
00154   w_ptr--; \
00155   for (l = 0; l < n/2; l++) \
00156   { \
00157     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00158   } \
00159 }
00160 
00161 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00162 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00163 
00164 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00165 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00166   double complex* y, double* w, int n) \
00167 { \
00168   int l; \
00169   double complex *u_ptr, *x_ptr, *y_ptr; \
00170   double *v_ptr, *w_ptr; \
00171   \
00172   u_ptr = u; \
00173   x_ptr = x; \
00174   v_ptr = v; \
00175   y_ptr = y; \
00176   w_ptr = w; \
00177   \
00178   for (l = 0; l < n/2; l++) \
00179   { \
00180     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00181   } \
00182   v_ptr--; \
00183   /*w_ptr--;*/ \
00184   for (l = 0; l < n/2; l++) \
00185   { \
00186     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00187   } \
00188 }
00189 
00190 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00191 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00192 
00193 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00194 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00195   double complex* y, double* w, int n) \
00196 { \
00197   int l; \
00198   double complex *u_ptr, *x_ptr, *y_ptr; \
00199   double *v_ptr, *w_ptr; \
00200   \
00201   u_ptr = u; \
00202   x_ptr = x; \
00203   v_ptr = v; \
00204   y_ptr = y; \
00205   w_ptr = w; \
00206   \
00207   for (l = 0; l < n/2; l++) \
00208   { \
00209     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00210   } \
00211   /*v_ptr--;*/ \
00212   w_ptr--; \
00213   for (l = 0; l < n/2; l++) \
00214   { \
00215     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + S1 * (*w_ptr--) * (*y_ptr++)); \
00216   } \
00217 }
00218 
00219 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00220 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00221 
00222 inline void auvxpwy(double a, double complex* u, double complex* x, double* v, double complex* y,
00223   double* w, int n)
00224 {
00225   int l;
00226   double complex *u_ptr, *x_ptr, *y_ptr;
00227   double *v_ptr, *w_ptr;
00228 
00229   u_ptr = u;
00230   x_ptr = x;
00231   v_ptr = v;
00232   y_ptr = y;
00233   w_ptr = w;
00234 
00235   for (l = 0; l < n; l++)
00236   {
00237     /*fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*u_ptr,*v_ptr,*x_ptr,*w_ptr,*y_ptr);*/
00238     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00239   }
00240   /*fprintf(stderr,"\n");*/
00241 }
00242 
00243 #define AUVXPWY_SYMMETRIC(NAME,S1,S2) \
00244 inline void NAME(double a, double complex* u, double complex* x, double* v, double complex* y, \
00245   double* w, int n) \
00246 { \
00247   int l; \
00248   double complex *u_ptr, *x_ptr, *y_ptr; \
00249   double *v_ptr, *w_ptr; \
00250 \
00251   u_ptr = u; \
00252   x_ptr = x; \
00253   v_ptr = v; \
00254   y_ptr = y; \
00255   w_ptr = w; \
00256 \
00257   /*for (l = 0; l < n; l++)*/ \
00258   /*{*/ \
00259     /*fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*/ \
00260     /*  u_ptr[l],v_ptr[l],x_ptr[l],w_ptr[l],y_ptr[l]);*/ \
00261   /*}*/ \
00262   \
00263   \
00264   for (l = 0; l < n/2; l++) \
00265   { \
00266     /*fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*u,*v,*x,*w,*y);*/ \
00267     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00268   } \
00269   v_ptr--; \
00270   w_ptr--; \
00271   for (l = 0; l < n/2; l++) \
00272   { \
00273     /* fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*u,*v,*x,*w,*y);*/ \
00274     *u_ptr++ = a * (S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00275   } \
00276   /*fprintf(stderr,"\n");*/ \
00277 }
00278 
00279 AUVXPWY_SYMMETRIC(auvxpwy_symmetric,1.0,-1.0)
00280 
00281 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00282 inline void NAME(double complex  *a, double complex *b, double *a11, double *a12, \
00283   double *a21, double *a22, double gamma, int tau, fpt_set set) \
00284 { \ \
00286   int length = 1<<(tau+1); \ \
00288   double norm = 1.0/(length<<1); \
00289   \
00290   /* Compensate for factors introduced by a raw DCT-III. */ \
00291   a[0] *= 2.0; \
00292   b[0] *= 2.0; \
00293   \
00294   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00295   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00296   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00297   \
00298   /*for (k = 0; k < length; k++)*/ \
00299   /*{*/ \
00300     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
00301     /*  a11[k],a12[k],a21[k],a22[k]);*/ \
00302   /*}*/ \
00303   \
00304   /* Check, if gamma is zero. */ \
00305   if (gamma == 0.0) \
00306   { \
00307     /*fprintf(stderr,"gamma == 0!\n");*/ \
00308     /* Perform multiplication only for second row. */ \
00309     M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00310   } \
00311   else \
00312   { \
00313     /*fprintf(stderr,"gamma != 0!\n");*/ \
00314     /* Perform multiplication for both rows. */ \
00315     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00316     M1_FUNCTION(norm*gamma,a,a,a11,b,a12,length); \
00317     memcpy(b,set->z,length*sizeof(double complex)); \
00318     /* Compute Chebyshev-coefficients using a DCT-II. */ \
00319     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00320     /* Compensate for factors introduced by a raw DCT-II. */ \
00321     a[0] *= 0.5; \
00322   } \
00323   \
00324   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00325   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00326   /* Compensate for factors introduced by a raw DCT-II. */ \
00327   b[0] *= 0.5; \
00328 }
00329 
00330 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00331 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00332 FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00333 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)
00334 
00335 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00336 inline void NAME(double complex  *a, double complex *b, double *a11, \
00337   double *a12, double *a21, double *a22, double gamma, int tau, fpt_set set) \
00338 { \ \
00340   int length = 1<<(tau+1); \ \
00342   double norm = 1.0/(length<<1); \
00343   \
00344   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00345   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00346   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00347   \
00348   /* Perform matrix multiplication. */ \
00349   M1_FUNCTION(norm,gamma,set->z,a,a11,b,a21,length); \
00350   M2_FUNCTION(norm,gamma,b,a,a12,b,a22,length); \
00351   memcpy(a,set->z,length*sizeof(double complex)); \
00352   \
00353   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00354   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00355   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00356 }
00357 
00358 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed,abuvxpwy,abuvxpwy)
00359 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00360 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)
00361 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)
00362 
00363 void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00364   const double *beta, const double *gamma)
00365 {
00366   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00367    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00368    */
00369   int i,j;
00370   double a,b,x_val_act,a_old;
00371   const double *x_act;
00372   double *y_act;
00373   const double *alpha_act, *beta_act, *gamma_act;
00374 
00375   /* Traverse all nodes. */
00376   x_act = x;
00377   y_act = y;
00378   for (i = 0; i < size; i++)
00379   {
00380     a = 1.0;
00381     b = 0.0;
00382     x_val_act = *x_act;
00383 
00384     if (k == 0)
00385     {
00386       *y_act = 1.0;
00387     }
00388     else
00389     {
00390       alpha_act = &(alpha[k]);
00391       beta_act = &(beta[k]);
00392       gamma_act = &(gamma[k]);
00393       for (j = k; j > 1; j--)
00394       {
00395         a_old = a;
00396         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00397          b = a_old*(*gamma_act);
00398         alpha_act--;
00399         beta_act--;
00400         gamma_act--;
00401       }
00402       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00403     }
00404     x_act++;
00405     y_act++;
00406   }
00407 }
00408 
00409 int eval_clenshaw_thresh(const double *x, double *y, int size, int k,
00410   const double *alpha, const double *beta, const double *gamma, const
00411   double threshold)
00412 {
00413   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00414    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00415    */
00416   int i,j;
00417   double a,b,x_val_act,a_old;
00418   const double *x_act;
00419   double *y_act;
00420   const double *alpha_act, *beta_act, *gamma_act;
00421 
00422   /* Traverse all nodes. */
00423   x_act = x;
00424   y_act = y;
00425   for (i = 0; i < size; i++)
00426   {
00427     a = 1.0;
00428     b = 0.0;
00429     x_val_act = *x_act;
00430 
00431     if (k == 0)
00432     {
00433      *y_act = 1.0;
00434     }
00435     else
00436     {
00437       alpha_act = &(alpha[k]);
00438       beta_act = &(beta[k]);
00439       gamma_act = &(gamma[k]);
00440       for (j = k; j > 1; j--)
00441       {
00442         a_old = a;
00443         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00444          b = a_old*(*gamma_act);
00445         alpha_act--;
00446         beta_act--;
00447         gamma_act--;
00448       }
00449       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00450       if (fabs(*y_act) > threshold)
00451       {
00452         return 1;
00453       }
00454     }
00455     x_act++;
00456     y_act++;
00457   }
00458   return 0;
00459 }
00460 
00479 void eval_sum_clenshaw(int N, int M, double complex* a, double *x, double complex *y,
00480   double complex *temp, double *alpha, double *beta, double *gamma, double lambda)
00481 {
00482   int j,k;
00483   double complex* it1 = temp;
00484   double complex* it2 = y;
00485   double complex aux;
00486 
00487   /* Clenshaw's algorithm */
00488   for (j = 0; j <= M; j++)
00489   {
00490     it2[j] = a[N];
00491   }
00492 
00493   if (N > 0)
00494   {
00495     for (j = 0; j <= M; j++)
00496     {
00497       it1[j] = a[N-1];
00498     }
00499 
00500     //fprintf(stdout,"N = %d\n",N);
00501     for (k = N; k > 1; k--)
00502     {
00503 
00504       for (j = 0; j <= M; j++)
00505       {
00506         aux = a[k-2] + it2[j] * gamma[k-1];
00507         it2[j] = it1[j] + it2[j] * (alpha[k-1] * x[j] + beta[k-1]);
00508         it1[j] = aux;
00509       }
00510     }
00511 
00512 
00513     /* Compute final step. */
00514     for (j = 0; j <= M; j++)
00515     {
00516       it2[j] = it1[j] + it2[j] * (alpha[0] * x[j] + beta[0]);
00517     }
00518   }
00519 
00520   /* Compute final result by multiplying with the constant lambda */
00521   for (j = 0; j <= M; j++)
00522   {
00523     y[j] = lambda * it2[j];
00524   }
00525 }
00526 
00545 void eval_sum_clenshaw_transposed(int N, int M, double complex* a, double *x,
00546   double complex *y, double complex *temp, double *alpha, double *beta, double *gamma,
00547   double lambda)
00548 {
00549   int j,k;
00550   double complex* it1 = temp;
00551   double complex* it2 = y;
00552   double complex aux;
00553 
00554   /* Compute final result by multiplying with the constant lambda */
00555   a[0] = 0.0;
00556   for (j = 0; j <= M; j++)
00557   {
00558     it2[j] = lambda * y[j];
00559     a[0] += it2[j];
00560   }
00561 
00562   if (N > 0)
00563   {
00564     /* Compute final step. */
00565     a[1] = 0.0;
00566     for (j = 0; j <= M; j++)
00567     {
00568       it1[j] = it2[j];
00569       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00570       a[1] += it2[j];
00571     }
00572 
00573     for (k = 2; k <= N; k++)
00574     {
00575       a[k] = 0.0;
00576       for (j = 0; j <= M; j++)
00577       {
00578         aux = it1[j];
00579         it1[j] = it2[j];
00580         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00581         a[k] += it2[j];
00582       }
00583     }
00584   }
00585 }
00586 
00587 
00588 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00589 {
00591   int plength;
00593   int tau;
00595   int m;
00596   int k;
00597 
00598   /* Allocate memory for new DPT set. */
00599   fpt_set_s *set = (fpt_set_s*)malloc(sizeof(fpt_set_s));
00600 
00601   /* Save parameters in structure. */
00602   set->flags = flags;
00603 
00604   //fprintf(stderr,"\nfpt_init: flags = %d \t %d\n",set->flags,flags);
00605 
00606   set->M = M;
00607   set->t = t;
00608   set->N = 1<<t;
00609 
00610   /* Allocate memory for L transforms. */
00611   set->dpt = (fpt_data*) malloc((M+1)*sizeof(fpt_data));
00612 
00613   /* Initialize with NULL pointer. */
00614   for (m = 0; m <= set->M; m++)
00615   {
00616     set->dpt[m].steps = (fpt_step**) NULL;
00617   }
00618 
00619   /* Create arrays with Chebyshev nodes. */
00620 
00621   /* Initialize array with Chebyshev coefficients for the polynomial x. This
00622    * would be trivially an array containing a 1 as second entry with all other
00623    * coefficients set to zero. In order to compensate for the multiplicative
00624    * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
00625 
00626   /* Allocate memory for array of pointers to node arrays. */
00627   set->xcvecs = (double**) malloc((set->t/*-1*/)*sizeof(double*));
00628   /* For each polynomial length starting with 4, compute the Chebyshev nodes
00629    * using a DCT-III. */
00630   plength = 4;
00631   for (tau = 1; tau < /*t*/t+1; tau++)
00632   {
00633     /* Allocate memory for current array. */
00634     set->xcvecs[tau-1] = (double*) malloc(plength*sizeof(double));
00635     for (k = 0; k < plength; k++)
00636     {
00637       set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00638     }
00639     plength = plength << 1;
00640   }
00641 
00643   set->work = (double complex*) malloc((2*set->N)*sizeof(double complex));
00644   set->result = (double complex*) malloc((2*set->N)*sizeof(double complex));
00645 
00646   /* Check if fast transform is activated. */
00647   if (set->flags & FPT_NO_FAST_ALGORITHM)
00648   {
00649   }
00650   else
00651   {
00653     set->vec3 = (double complex*) malloc(set->N*sizeof(double complex));
00654     set->vec4 = (double complex*) malloc(set->N*sizeof(double complex));
00655     set->z = (double complex*) malloc(set->N*sizeof(double complex));
00656 
00658     set->plans_dct3 = (fftw_plan*) fftw_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00659     set->plans_dct2 = (fftw_plan*) fftw_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00660     set->kinds      = (fftw_r2r_kind*) malloc(2*sizeof(fftw_r2r_kind));
00661     set->kinds[0]   = FFTW_REDFT01;
00662     set->kinds[1]   = FFTW_REDFT01;
00663     set->kindsr     = (fftw_r2r_kind*) malloc(2*sizeof(fftw_r2r_kind));
00664     set->kindsr[0]  = FFTW_REDFT10;
00665     set->kindsr[1]  = FFTW_REDFT10;
00666     set->lengths    = (int*) malloc((set->t/*-1*/)*sizeof(int));
00667     for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00668     {
00669       set->lengths[tau] = plength;
00670       set->plans_dct3[tau] =
00671         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00672                            2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00673                            0);
00674       set->plans_dct2[tau] =
00675         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00676                            2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00677                            0);
00678     }
00679     free(set->lengths);
00680     free(set->kinds);
00681     free(set->kindsr);
00682     set->lengths = NULL;
00683     set->kinds = NULL;
00684     set->kindsr = NULL;
00685   }
00686 
00687   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
00688   {
00689   }
00690   else
00691   {
00692     set->xc_slow = (double*) malloc((set->N+1)*sizeof(double));
00693     set->temp = (double complex*) calloc((set->N+1),sizeof(double complex));
00694   }
00695 
00696   /* Return the newly created DPT set. */
00697   return set;
00698 }
00699 
00700 void fpt_precompute(fpt_set set, const int m, const double *alpha,
00701                     const double *beta, const double *gamma, int k_start,
00702                     const double threshold)
00703 {
00704 
00705   int tau;          
00706   int l;            
00707   int plength;      
00709   int degree;       
00711   int firstl;       
00712   int lastl;        
00713   int plength_stab; 
00715   int degree_stab;  
00717   double *a11;      
00719   double *a12;      
00721   double *a21;      
00723   double *a22;      
00725   const double *calpha;
00726   const double *cbeta;
00727   const double *cgamma;
00728   int needstab = 0; 
00729   int k_start_tilde;
00730   int N_tilde;
00731   int clength;
00732   int clength_1;
00733   int clength_2;
00734   int t_stab, N_stab;
00735   int ell;
00736 
00737   //fprintf(stderr,"fpt_precompute: Precomputing for m = %d\n",m);
00738   //fprintf(stderr,"fpt_precompute: k_start = %d\n",k_start);
00739 
00740   fpt_data *data;
00741 
00742   /* Allocate memory for DPT transform data. */
00743   //set->dpt[m] = (fpt_data*) malloc(sizeof(fpt_data));
00744 
00745   /* Get pointer to DPT data. */
00746   data = &(set->dpt[m]);
00747 
00748   /* Check, if already precomputed. */
00749   if (data->steps != NULL)
00750   {
00751     return;
00752   }
00753 
00754   /* Save k_start. */
00755   data->k_start = k_start;
00756 
00757   /* Check if fast transform is activated. */
00758   if (set->flags & FPT_NO_FAST_ALGORITHM)
00759   {
00760   }
00761   else
00762   {
00763     /* Save recursion coefficients. */
00764     data->alphaN = (double*) malloc((set->t-1)*sizeof(double complex));
00765     data->betaN = (double*) malloc((set->t-1)*sizeof(double complex));
00766     data->gammaN = (double*) malloc((set->t-1)*sizeof(double complex));
00767 
00768     for (tau = 2; tau <= set->t; tau++)
00769     {
00770 
00771       data->alphaN[tau-2] = alpha[1<<tau];
00772       data->betaN[tau-2] = beta[1<<tau];
00773       data->gammaN[tau-2] = gamma[1<<tau];
00774     }
00775 
00776     data->alpha_0 = alpha[1];
00777     data->beta_0 = beta[1];
00778     data->gamma_m1 = gamma[0];
00779 
00780     k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00781       /*set->N*/);
00782     N_tilde = N_TILDE(set->N);
00783 
00784     /* Allocate memory for the cascade with t = log_2(N) many levels. */
00785     data->steps = (fpt_step**) malloc(sizeof(fpt_step*)*set->t);
00786 
00787     /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
00788     plength = 4;
00789     for (tau = 1; tau < set->t; tau++)
00790     {
00791       /* Compute auxilliary values. */
00792       degree = plength>>1;
00793       /* Compute first l. */
00794       firstl = FIRST_L(k_start_tilde,plength);
00795       /* Compute last l. */
00796       lastl = LAST_L(N_tilde,plength);
00797 
00798       /* Allocate memory for current level. This level will contain 2^{t-tau-1}
00799        * many matrices. */
00800       data->steps[tau] = (fpt_step*) fftw_malloc(sizeof(fpt_step)
00801                          * (lastl+1));
00802 
00803       /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
00804       for (l = firstl; l <= lastl; l++)
00805       {
00806         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00807         {
00808           //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
00809           //fflush(stderr);
00810           clength = plength/2;
00811         }
00812         else
00813         {
00814           clength = plength;
00815         }
00816 
00817         /* Allocate memory for the components of U_{n,tau,l}. */
00818         a11 = (double*) fftw_malloc(sizeof(double)*clength);
00819         a12 = (double*) fftw_malloc(sizeof(double)*clength);
00820         a21 = (double*) fftw_malloc(sizeof(double)*clength);
00821         a22 = (double*) fftw_malloc(sizeof(double)*clength);
00822 
00823         /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
00824          * nodes. */
00825 
00826         /* Get the pointers to the three-term recurrence coeffcients. */
00827         calpha = &(alpha[plength*l+1+1]);
00828         cbeta = &(beta[plength*l+1+1]);
00829         cgamma = &(gamma[plength*l+1+1]);
00830 
00831         if (set->flags & FPT_NO_STABILIZATION)
00832         {
00833           /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
00834           eval_clenshaw(set->xcvecs[tau-1], a11, clength, degree-2, calpha, cbeta,
00835             cgamma);
00836           eval_clenshaw(set->xcvecs[tau-1], a12, clength, degree-1, calpha, cbeta,
00837             cgamma);
00838           calpha--;
00839           cbeta--;
00840           cgamma--;
00841           eval_clenshaw(set->xcvecs[tau-1], a21, clength, degree-1, calpha, cbeta,
00842             cgamma);
00843           eval_clenshaw(set->xcvecs[tau-1], a22, clength, degree, calpha, cbeta,
00844             cgamma);
00845           needstab = 0;
00846         }
00847         else
00848         {
00849           needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a11, clength, degree-2,
00850             calpha, cbeta, cgamma, threshold);
00851           if (needstab == 0)
00852           {
00853             /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+2). */
00854             needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a12, clength, degree-1,
00855               calpha, cbeta, cgamma, threshold);
00856             if (needstab == 0)
00857             {
00858               calpha--;
00859               cbeta--;
00860               cgamma--;
00861               /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
00862               needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a21, clength,
00863                 degree-1, calpha, cbeta, cgamma, threshold);
00864               if (needstab == 0)
00865               {
00866                 /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
00867                 needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a22, clength,
00868                   degree, calpha, cbeta, cgamma, threshold);
00869               }
00870             }
00871           }
00872         }
00873 
00874         /* Check if stabilization needed. */
00875         if (needstab == 0)
00876         {
00877           data->steps[tau][l].a11 = (double**) fftw_malloc(sizeof(double*));
00878           data->steps[tau][l].a12 = (double**) fftw_malloc(sizeof(double*));
00879           data->steps[tau][l].a21 = (double**) fftw_malloc(sizeof(double*));
00880           data->steps[tau][l].a22 = (double**) fftw_malloc(sizeof(double*));
00881           data->steps[tau][l].gamma = (double*) fftw_malloc(sizeof(double));
00882           /* No stabilization needed. */
00883           data->steps[tau][l].a11[0] = a11;
00884           data->steps[tau][l].a12[0] = a12;
00885           data->steps[tau][l].a21[0] = a21;
00886           data->steps[tau][l].a22[0] = a22;
00887           data->steps[tau][l].gamma[0] = gamma[plength*l+1+1];
00888           data->steps[tau][l].stable = true;
00889         }
00890         else
00891         {
00892           /* Stabilize. */
00893           //fprintf(stderr,"fpt_precompute: Stabilizing for tau = %d, l = %d\n",tau,l);
00894           degree_stab = degree*(2*l+1);
00895           nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
00896           /*fprintf(stderr,"(l+1)*(1<<(tau+2)) = %d, N_stab = %d, t_stab = %d\n",
00897             (l+1)*(1<<(tau+2)),N_stab,t_stab);*/
00898 
00899           /* Old arrays are to small. */
00900           fftw_free(a11);
00901           fftw_free(a12);
00902           fftw_free(a21);
00903           fftw_free(a22);
00904 
00905           data->steps[tau][l].a11 = (double**) fftw_malloc(sizeof(double*));
00906           data->steps[tau][l].a12 = (double**) fftw_malloc(sizeof(double*));
00907           data->steps[tau][l].a21 = (double**) fftw_malloc(sizeof(double*));
00908           data->steps[tau][l].a22 = (double**) fftw_malloc(sizeof(double*));
00909           data->steps[tau][l].gamma = (double*) fftw_malloc(sizeof(double));
00910 
00911           plength_stab = N_stab;
00912 
00913           if (set->flags & FPT_AL_SYMMETRY)
00914           {
00915             if (m <= 1)
00916             {
00917               clength_1 = plength_stab/2;
00918               clength_2 = plength_stab/2;
00919             }
00920             else if (m%2 == 0)
00921             {
00922               clength_1 = plength_stab/2;
00923               clength_2 = plength_stab;
00924             }
00925             else
00926             {
00927               clength_1 = plength_stab;
00928               clength_2 = plength_stab/2;
00929             }
00930           }
00931           else
00932           {
00933             clength_1 = plength_stab;
00934             clength_2 = plength_stab;
00935           }
00936 
00937 
00938           /* Allocate memory for arrays. */
00939           a11 = (double*) fftw_malloc(sizeof(double)*clength_1);
00940           a12 = (double*) fftw_malloc(sizeof(double)*clength_1);
00941           a21 = (double*) fftw_malloc(sizeof(double)*clength_2);
00942           a22 = (double*) fftw_malloc(sizeof(double)*clength_2);
00943 
00944           /* Get the pointers to the three-term recurrence coeffcients. */
00945           calpha = &(alpha[2]);
00946           cbeta = &(beta[2]);
00947           cgamma = &(gamma[2]);
00948           /* Evaluate P_{2^{tau}(2l+1)-2}^n(\cdot,2). */
00949           eval_clenshaw(set->xcvecs[t_stab-2], a11, clength_1, degree_stab-2,
00950             calpha, cbeta, cgamma);
00951           /* Evaluate P_{2^{tau}(2l+1)-1}^n(\cdot,2). */
00952           eval_clenshaw(set->xcvecs[t_stab-2], a12, clength_1, degree_stab-1,
00953             calpha, cbeta, cgamma);
00954           calpha--;
00955           cbeta--;
00956           cgamma--;
00957           /* Evaluate P_{2^{tau}(2l+1)-1}^n(\cdot,1). */
00958           eval_clenshaw(set->xcvecs[t_stab-2], a21, clength_2, degree_stab-1,
00959             calpha, cbeta, cgamma);
00960           /* Evaluate P_{2^{tau}(2l+1)}^n(\cdot,1). */
00961           eval_clenshaw(set->xcvecs[t_stab-2], a22, clength_2, degree_stab+0,
00962             calpha, cbeta, cgamma);
00963 
00964           data->steps[tau][l].a11[0] = a11;
00965           data->steps[tau][l].a12[0] = a12;
00966           data->steps[tau][l].a21[0] = a21;
00967           data->steps[tau][l].a22[0] = a22;
00968           data->steps[tau][l].gamma[0] =  gamma[1+1];
00969           data->steps[tau][l].stable = false;
00970           data->steps[tau][l].t_stab = t_stab;
00971           data->steps[tau][l].N_stab = N_stab;
00972         }
00973       }
00975       plength = plength << 1;
00976     }
00977   }
00978 
00979   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
00980   {
00981   }
00982   else
00983   {
00984     /* Check, if recurrence coefficients must be copied. */
00985     if (set->flags & FPT_PERSISTENT_DATA)
00986     {
00987       data->alpha = (double*) alpha;
00988       data->beta = (double*) beta;
00989       data->gamma = (double*) gamma;
00990     }
00991     else
00992     {
00993       data->alpha = (double*) malloc((set->N+1)*sizeof(double));
00994       data->beta = (double*) malloc((set->N+1)*sizeof(double));
00995       data->gamma = (double*) malloc((set->N+1)*sizeof(double));
00996       memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
00997       memcpy(data->beta,beta,(set->N+1)*sizeof(double));
00998       memcpy(data->gamma,gamma,(set->N+1)*sizeof(double));
00999     }
01000   }
01001 }
01002 
01003 void dpt_trafo(fpt_set set, const int m, const double complex *x, double complex *y,
01004   const int k_end, const unsigned int flags)
01005 {
01006   int j;
01007   fpt_data *data = &(set->dpt[m]);
01008   int Nk;
01009   int tk;
01010   double norm;
01011 
01012   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01013   norm = 2.0/(Nk<<1);
01014 
01015   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01016   {
01017     return;
01018   }
01019 
01020   if (flags & FPT_FUNCTION_VALUES)
01021   {
01022     /* Fill array with Chebyshev nodes. */
01023     for (j = 0; j <= k_end; j++)
01024     {
01025       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01026     }
01027 
01028     memset(set->result,0U,data->k_start*sizeof(double complex));
01029     memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double complex));
01030 
01031     eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01032       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01033       data->gamma_m1);
01034   }
01035   else
01036   {
01037     memset(set->temp,0U,data->k_start*sizeof(double complex));
01038     memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double complex));
01039 
01040     eval_sum_clenshaw(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01041       set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01042       data->gamma_m1);
01043 
01044     fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01045       (double*)set->result);
01046 
01047     set->result[0] *= 0.5;
01048     for (j = 0; j < Nk; j++)
01049     {
01050       set->result[j] *= norm;
01051     }
01052 
01053     memcpy(y,set->result,(k_end+1)*sizeof(double complex));
01054   }
01055 }
01056 
01057 void fpt_trafo(fpt_set set, const int m, const double complex *x, double complex *y,
01058   const int k_end, const unsigned int flags)
01059 {
01060   /* Get transformation data. */
01061   fpt_data *data = &(set->dpt[m]);
01063   int Nk;
01065   int tk;
01067   int k_start_tilde;
01069   int k_end_tilde;
01070 
01072   int tau;
01074   int firstl;
01076   int lastl;
01078   int l;
01080   int plength;
01082   int plength_stab;
01083   int t_stab;
01085   fpt_step *step;
01087   fftw_plan plan;
01088   int length = k_end+1;
01089   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01090 
01092   int k;
01093 
01094   double complex *work_ptr;
01095   const double complex *x_ptr;
01096 
01097   /* Check, if slow transformation should be used due to small bandwidth. */
01098   if (k_end < FPT_BREAK_EVEN)
01099   {
01100     /* Use NDSFT. */
01101     dpt_trafo(set, m, x, y, k_end, flags);
01102   }
01103 
01104   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01105   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01106   k_end_tilde = K_END_TILDE(k_end,Nk);
01107 
01108   /* Check if fast transform is activated. */
01109   if (set->flags & FPT_NO_FAST_ALGORITHM)
01110   {
01111     return;
01112   }
01113 
01114   if (flags & FPT_FUNCTION_VALUES)
01115   {
01116     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01117       (double*)set->work, NULL, 2, 1, kinds, 0U);
01118   }
01119 
01120   /* Initialize working arrays. */
01121   memset(set->result,0U,2*Nk*sizeof(double complex));
01122 
01123   /* The first step. */
01124 
01125   /* Set the first 2*data->k_start coefficients to zero. */
01126   memset(set->work,0U,2*data->k_start*sizeof(double complex));
01127 
01128   work_ptr = &set->work[2*data->k_start];
01129   x_ptr = x;
01130 
01131   for (k = 0; k < k_end_tilde-data->k_start+1; k++)
01132   {
01133     *work_ptr++ = *x_ptr++;
01134     *work_ptr++ = 0;
01135   }
01136 
01137   /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
01138   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double complex));
01139 
01140   /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
01141    * x_{Nk-1} and x_{Nk-2}. */
01142   if (k_end == Nk)
01143   {
01144     set->work[2*(Nk-2)]   += data->gammaN[tk-2]*x[Nk-data->k_start];
01145     set->work[2*(Nk-1)]   += data->betaN[tk-2]*x[Nk-data->k_start];
01146     set->work[2*(Nk-1)+1]  = data->alphaN[tk-2]*x[Nk-data->k_start];
01147   }
01148 
01149   /*--------*/
01150   /*for (k = 0; k < 2*Nk; k++)
01151   {
01152     fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01153       k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01154       cimag(set->result[k]));
01155   }*/
01156   /*--------*/
01157 
01158   /* Compute the remaining steps. */
01159   plength = 4;
01160   for (tau = 1; tau < tk; tau++)
01161   {
01162     /* Compute first l. */
01163     firstl = FIRST_L(k_start_tilde,plength);
01164     /* Compute last l. */
01165     lastl = LAST_L(k_end_tilde,plength);
01166 
01167     /* Compute the multiplication steps. */
01168     for (l = firstl; l <= lastl; l++)
01169     {
01170       /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
01171       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double complex));
01172       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double complex));
01173       memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double complex));
01174       memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double complex));
01175 
01176       /* Copy coefficients into first half. */
01177       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double complex));
01178       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double complex));
01179       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double complex));
01180 
01181       /* Get matrix U_{n,tau,l} */
01182       step = &(data->steps[tau][l]);
01183 
01184       /* Check if step is stable. */
01185       if (step->stable)
01186       {
01187         /* Check, if we should do a symmetrizised step. */
01188         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01189         {
01190           /*for (k = 0; k < plength; k++)
01191           {
01192             fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
01193               step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
01194           }*/
01195           /* Multiply third and fourth polynomial with matrix U. */
01196           //fprintf(stderr,"\nhallo\n");
01197           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01198             step->a12[0], step->a21[0], step->a22[0], step->gamma[0], tau, set);
01199         }
01200         else
01201         {
01202           /* Multiply third and fourth polynomial with matrix U. */
01203           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01204             step->a21[0], step->a22[0], step->gamma[0], tau, set);
01205         }
01206 
01207         if (step->gamma[0] != 0.0)
01208         {
01209           for (k = 0; k < plength; k++)
01210           {
01211             set->work[plength*2*l+k] += set->vec3[k];
01212           }
01213         }
01214         for (k = 0; k < plength; k++)
01215         {
01216           set->work[plength*(2*l+1)+k] += set->vec4[k];
01217         }
01218       }
01219       else
01220       {
01221         /* Stabilize. */
01222 
01223         /* The lengh of the polynomials */
01224         plength_stab = step->N_stab;
01225         t_stab = step->t_stab;
01226 
01227         /*---------*/
01228         /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
01229         fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
01230         fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
01231         fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
01232         /*---------*/
01233 
01234         /* Set rest of vectors explicitely to zero */
01235         /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
01236           plength, plength_stab);*/
01237         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double complex));
01238         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double complex));
01239 
01240         /* Multiply third and fourth polynomial with matrix U. */
01241         /* Check for symmetry. */
01242         if (set->flags & FPT_AL_SYMMETRY)
01243         {
01244           if (m <= 1)
01245           {
01246             fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01247               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01248           }
01249           else if (m%2 == 0)
01250           {
01251             fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01252               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01253           }
01254           else
01255           {
01256             fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01257               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01258           }
01259         }
01260         else
01261         {
01262             fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01263               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01264         }
01265 
01266         if (step->gamma[0] != 0.0)
01267         {
01268           for (k = 0; k < plength_stab; k++)
01269           {
01270             set->result[k] += set->vec3[k];
01271           }
01272         }
01273 
01274         for (k = 0; k < plength_stab; k++)
01275         {
01276           set->result[Nk+k] += set->vec4[k];
01277         }
01278       }
01279     }
01280     /* Double length of polynomials. */
01281     plength = plength<<1;
01282 
01283     /*--------*/
01284     /*for (k = 0; k < 2*Nk; k++)
01285     {
01286       fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01287         k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01288         cimag(set->result[k]));
01289     }*/
01290     /*--------*/
01291   }
01292 
01293   /* Add the resulting cascade coeffcients to the coeffcients accumulated from
01294    * the stabilization steps. */
01295   for (k = 0; k < 2*Nk; k++)
01296   {
01297     set->result[k] += set->work[k];
01298   }
01299 
01300   /* The last step. Compute the Chebyshev coeffcients c_k^n from the
01301    * polynomials in front of P_0^n and P_1^n. */
01302   y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01303     data->alpha_0*set->result[Nk+1]*0.5);
01304   y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01305     data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01306   y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01307     data->beta_0*set->result[Nk+k_end-1] +
01308     data->alpha_0*set->result[Nk+k_end-2]*0.5);
01309   y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01310   for (k = 2; k <= k_end-2; k++)
01311   {
01312     y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01313       data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01314   }
01315 
01316   if (flags & FPT_FUNCTION_VALUES)
01317   {
01318     y[0] *= 2.0;
01319     fftw_execute_r2r(plan,(double*)y,(double*)y);
01320     fftw_destroy_plan(plan);
01321     for (k = 0; k <= k_end; k++)
01322     {
01323       y[k] *= 0.5;
01324     }
01325   }
01326 }
01327 
01328 void dpt_transposed(fpt_set set, const int m, double complex *x,
01329   double complex *y, const int k_end, const unsigned int flags)
01330 {
01331   int j;
01332   fpt_data *data = &(set->dpt[m]);
01333   int Nk;
01334   int tk;
01335   double norm;
01336 
01337   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01338   norm = 2.0/(Nk<<1);
01339 
01340   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01341   {
01342     return;
01343   }
01344 
01345   if (flags & FPT_FUNCTION_VALUES)
01346   {
01347     for (j = 0; j <= k_end; j++)
01348     {
01349       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01350     }
01351 
01352     eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01353       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01354       data->gamma_m1);
01355 
01356     memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01357       sizeof(double complex));
01358   }
01359   else
01360   {
01361     memcpy(set->result,y,(k_end+1)*sizeof(double complex));
01362     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double complex));
01363 
01364     for (j = 0; j < Nk; j++)
01365     {
01366       set->result[j] *= norm;
01367     }
01368 
01369     fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01370       (double*)set->result);
01371 
01372     eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01373       set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01374       data->gamma_m1);
01375 
01376     memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double complex));
01377   }
01378 }
01379 
01380 void fpt_transposed(fpt_set set, const int m, double complex *x,
01381   const double complex *y, const int k_end, const unsigned int flags)
01382 {
01383   /* Get transformation data. */
01384   fpt_data *data = &(set->dpt[m]);
01386   int Nk;
01388   int tk;
01390   int k_start_tilde;
01392   int k_end_tilde;
01393 
01395   int tau;
01397   int firstl;
01399   int lastl;
01401   int l;
01403   int plength;
01405   int plength_stab;
01407   fpt_step *step;
01409   fftw_plan plan;
01410   int length = k_end+1;
01411   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01413   int k;
01414   int t_stab;
01415 
01416   /* Check, if slow transformation should be used due to small bandwidth. */
01417   if (k_end < FPT_BREAK_EVEN)
01418   {
01419     /* Use NDSFT. */
01420     dpt_transposed(set, m, x, y, k_end, flags);
01421   }
01422 
01423   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01424   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01425   k_end_tilde = K_END_TILDE(k_end,Nk);
01426 
01427   /* Check if fast transform is activated. */
01428   if (set->flags & FPT_NO_FAST_ALGORITHM)
01429   {
01430     return;
01431   }
01432 
01433   if (flags & FPT_FUNCTION_VALUES)
01434   {
01435     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01436       (double*)set->work, NULL, 2, 1, kinds, 0U);
01437     fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01438     fftw_destroy_plan(plan);
01439     for (k = 0; k <= k_end; k++)
01440     {
01441       set->result[k] *= 0.5;
01442     }
01443   }
01444   else
01445   {
01446     memcpy(set->result,y,(k_end+1)*sizeof(double complex));
01447   }
01448 
01449   /* Initialize working arrays. */
01450   memset(set->work,0U,2*Nk*sizeof(double complex));
01451 
01452   /* The last step is now the first step. */
01453   for (k = 0; k <= k_end; k++)
01454   {
01455     set->work[k] = data->gamma_m1*set->result[k];
01456   }
01457   //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double complex));
01458 
01459   set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01460     data->alpha_0*set->result[1]);
01461   for (k = 1; k < k_end; k++)
01462   {
01463     set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01464       data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01465   }
01466   if (k_end<Nk)
01467   {
01468     memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double complex));
01469   }
01470 
01472   memcpy(set->result,set->work,2*Nk*sizeof(double complex));
01473 
01474   /* Compute the remaining steps. */
01475   plength = Nk;
01476   for (tau = tk-1; tau >= 1; tau--)
01477   {
01478     /* Compute first l. */
01479     firstl = FIRST_L(k_start_tilde,plength);
01480     /* Compute last l. */
01481     lastl = LAST_L(k_end_tilde,plength);
01482 
01483     /* Compute the multiplication steps. */
01484     for (l = firstl; l <= lastl; l++)
01485     {
01486       /* Initialize second half of coefficient arrays with zeros. */
01487       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double complex));
01488       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double complex));
01489 
01490       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01491         (plength/2)*sizeof(double complex));
01492 
01493       /* Get matrix U_{n,tau,l} */
01494       step = &(data->steps[tau][l]);
01495 
01496       /* Check if step is stable. */
01497       if (step->stable)
01498       {
01499         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01500         {
01501           /* Multiply third and fourth polynomial with matrix U. */
01502           fpt_do_step_transposed_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01503             step->a21[0], step->a22[0], step->gamma[0], tau, set);
01504         }
01505         else
01506         {
01507           /* Multiply third and fourth polynomial with matrix U. */
01508           fpt_do_step_transposed(set->vec3, set->vec4, step->a11[0], step->a12[0],
01509             step->a21[0], step->a22[0], step->gamma[0], tau, set);
01510         }
01511         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double complex));
01512 
01513         for (k = 0; k < plength; k++)
01514         {
01515           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01516         }
01517       }
01518       else
01519       {
01520         /* Stabilize. */
01521         plength_stab = step->N_stab;
01522         t_stab = step->t_stab;
01523 
01524         memcpy(set->vec3,set->result,plength_stab*sizeof(double complex));
01525         memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double complex));
01526 
01527         /* Multiply third and fourth polynomial with matrix U. */
01528         if (set->flags & FPT_AL_SYMMETRY)
01529         {
01530           if (m <= 1)
01531           {
01532             fpt_do_step_transposed_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01533               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01534           }
01535           else if (m%2 == 0)
01536           {
01537             fpt_do_step_transposed_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01538               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01539           }
01540           else
01541           {
01542             fpt_do_step_transposed_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01543               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01544           }
01545         }
01546         else
01547         {
01548             fpt_do_step_transposed(set->vec3, set->vec4, step->a11[0], step->a12[0],
01549               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01550         }
01551 
01552         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double complex));
01553 
01554         for (k = 0; k < plength; k++)
01555         {
01556           set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01557         }
01558        }
01559     }
01560     /* Half the length of polynomial arrays. */
01561     plength = plength>>1;
01562   }
01563 
01564   /* First step */
01565   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01566   {
01567     x[k] = set->work[2*(data->k_start+k)];
01568   }
01569   if (k_end == Nk)
01570   {
01571     x[Nk-data->k_start] =
01572         data->gammaN[tk-2]*set->work[2*(Nk-2)]
01573       + data->betaN[tk-2] *set->work[2*(Nk-1)]
01574       + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01575   }
01576 }
01577 
01578 void fpt_finalize(fpt_set set)
01579 {
01580   int tau;
01581   int l;
01582   int m;
01583   fpt_data *data;
01584   int k_start_tilde;
01585   int N_tilde;
01586   int firstl, lastl;
01587   int plength;
01588 
01589   /* TODO Clean up DPT transform data structures. */
01590   for (m = 0; m <= set->M; m++)
01591   {
01592     /* Check if precomputed. */
01593     data = &set->dpt[m];
01594     if (data->steps != (fpt_step**)NULL)
01595     {
01596       free(data->alphaN);
01597       free(data->betaN);
01598       free(data->gammaN);
01599       data->alphaN = NULL;
01600       data->betaN = NULL;
01601       data->gammaN = NULL;
01602 
01603       /* Free precomputed data. */
01604       k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01605         /*set->N*/);
01606       N_tilde = N_TILDE(set->N);
01607       plength = 4;
01608       for (tau = 1; tau < set->t; tau++)
01609       {
01610         /* Compute first l. */
01611         firstl = FIRST_L(k_start_tilde,plength);
01612         /* Compute last l. */
01613         lastl = LAST_L(N_tilde,plength);
01614 
01615         /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
01616         for (l = firstl; l <= lastl; l++)
01617         {
01618           /* Free components. */
01619           free(data->steps[tau][l].a11[0]);
01620           free(data->steps[tau][l].a12[0]);
01621           free(data->steps[tau][l].a21[0]);
01622           free(data->steps[tau][l].a22[0]);
01623           data->steps[tau][l].a11[0] = NULL;
01624           data->steps[tau][l].a12[0] = NULL;
01625           data->steps[tau][l].a21[0] = NULL;
01626           data->steps[tau][l].a22[0] = NULL;
01627           /* Free components. */
01628           free(data->steps[tau][l].a11);
01629           free(data->steps[tau][l].a12);
01630           free(data->steps[tau][l].a21);
01631           free(data->steps[tau][l].a22);
01632           free(data->steps[tau][l].gamma);
01633           data->steps[tau][l].a11 = NULL;
01634           data->steps[tau][l].a12 = NULL;
01635           data->steps[tau][l].a21 = NULL;
01636           data->steps[tau][l].a22 = NULL;
01637           data->steps[tau][l].gamma = NULL;
01638         }
01639         /* Free pointers for current level. */
01640         free(data->steps[tau]);
01641         data->steps[tau] = NULL;
01642         /* Double length of polynomials. */
01643         plength = plength<<1;
01644       }
01645       /* Free steps. */
01646       free(data->steps);
01647       data->steps = NULL;
01648     }
01649 
01650     if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01651     {
01652     }
01653     else
01654     {
01655       /* Check, if recurrence coefficients must be copied. */
01656       //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
01657       if (set->flags & FPT_PERSISTENT_DATA)
01658       {
01659       }
01660       else
01661       {
01662         free(data->alpha);
01663         free(data->beta);
01664         free(data->gamma);
01665       }
01666       data->alpha = NULL;
01667       data->beta = NULL;
01668       data->gamma = NULL;
01669     }
01670   }
01671 
01672   /* Delete array of DPT transform data. */
01673   free(set->dpt);
01674   set->dpt = NULL;
01675 
01676   for (tau = 1; tau < /*set->t*/set->t+1; tau++)
01677   {
01678     free(set->xcvecs[tau-1]);
01679     set->xcvecs[tau-1] = NULL;
01680   }
01681   free(set->xcvecs);
01682   set->xcvecs = NULL;
01683 
01684   /* Free auxilliary arrays. */
01685   free(set->work);
01686   free(set->result);
01687 
01688   /* Check if fast transform is activated. */
01689   if (set->flags & FPT_NO_FAST_ALGORITHM)
01690   {
01691   }
01692   else
01693   {
01694     /* Free auxilliary arrays. */
01695     free(set->vec3);
01696     free(set->vec4);
01697     free(set->z);
01698     set->work = NULL;
01699     set->result = NULL;
01700     set->vec3 = NULL;
01701     set->vec4 = NULL;
01702     set->z = NULL;
01703 
01704     /* Free FFTW plans. */
01705     for(tau = 0; tau < set->t/*-1*/; tau++)
01706     {
01707       fftw_destroy_plan(set->plans_dct3[tau]);
01708       fftw_destroy_plan(set->plans_dct2[tau]);
01709       set->plans_dct3[tau] = NULL;
01710       set->plans_dct2[tau] = NULL;
01711     }
01712 
01713     free(set->plans_dct3);
01714     free(set->plans_dct2);
01715     set->plans_dct3 = NULL;
01716     set->plans_dct2 = NULL;
01717   }
01718 
01719   //fprintf(stderr,"fpt_finalize: flags = %d\n",set->flags);
01720 
01721   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01722   {
01723   }
01724   else
01725   {
01726     /* Delete arrays of Chebyshev nodes. */
01727     free(set->xc_slow);
01728     set->xc_slow = NULL;
01729     free(set->temp);
01730     set->temp = NULL;
01731   }
01732 
01733   /* Free DPT set structure. */
01734   free(set);
01735 }

Generated on 22 Jan 2007 by Doxygen 1.4.1