NFFT Logo 3.1.1 API Reference

fpt.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002, 2009 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: fpt.c 3258 2009-07-05 09:50:06Z keiner $ */
00020 
00027 #include "config.h"
00028 
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #include <complex.h>
00034 
00035 #include "nfft3.h"
00036 #include "nfft3util.h"
00037 #include "infft.h"
00038 
00039 /* Macros for index calculation. */
00040 
00042 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00043 
00045 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00046 
00048 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00049 
00051 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00052 
00053 #define N_TILDE(y) (y-1)
00054 
00055 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00056 
00057 #define FPT_BREAK_EVEN 4
00058 
00062 typedef struct fpt_step_
00063 {
00064   bool stable;                            
00067   int Ns;                                 
00068   int ts;                                 
00069   double **a11,**a12,**a21,**a22;         
00070   double *g;                              
00071 } fpt_step;
00072 
00076 typedef struct fpt_data_
00077 {
00078   fpt_step **steps;                       
00079   int k_start;                            
00080   double *alphaN;                         
00081   double *betaN;                          
00082   double *gammaN;                         
00083   double alpha_0;                         
00084   double beta_0;                          
00085   double gamma_m1;                        
00086   /* Data for direct transform. */        
00087   double *alpha;                          
00088   double *beta;                           
00089   double *gamma;                          
00090 } fpt_data;
00091 
00095 typedef struct fpt_set_s_
00096 {
00097   int flags;                              
00098   int M;                                  
00099   int N;                                  
00101   int t;                                  
00102   fpt_data *dpt;                          
00103   double **xcvecs;                        
00106   double *xc;                             
00107   double _Complex *temp;                          
00108   double _Complex *work;                          
00109   double _Complex *result;                        
00110   double _Complex *vec3;
00111   double _Complex *vec4;
00112   double _Complex *z;
00113   fftw_plan *plans_dct3;                  
00115   fftw_plan *plans_dct2;                  
00117   fftw_r2r_kind *kinds;                   
00119   fftw_r2r_kind *kindsr;                  
00122   int *lengths; 
00124   /* Data for slow transforms. */
00125   double *xc_slow;
00126 } fpt_set_s;
00127 
00128 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00129   double* v, double _Complex* y, double* w, int n)
00130 {
00131   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00132   double *v_ptr = v, *w_ptr = w;
00133   for (l = 0; l < n; l++)
00134     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00135 }
00136 
00137 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00138 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00139   double* v, double _Complex* y, double* w, int n) \
00140 { \
00141   const int n2 = n>>1; \
00142   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00143   double *v_ptr = v, *w_ptr = w; \
00144   for (l = 0; l < n2; l++) \
00145     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00146   v_ptr--; w_ptr--; \
00147   for (l = 0; l < n2; l++) \
00148     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00149 }
00150 
00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00153 
00154 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00155 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00156   double* v, double _Complex* y, int n, double *xx) \
00157 { \
00158   const int n2 = n>>1; \
00159   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00160   double *v_ptr = v, *xx_ptr = xx; \
00161   for (l = 0; l < n2; l++, v_ptr++) \
00162     *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00163   v_ptr--; \
00164   for (l = 0; l < n2; l++, v_ptr--) \
00165     *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00166 }
00167 
00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00170 
00171 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00172 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00173   double _Complex* y, double* w, int n, double *xx) \
00174 { \
00175   const int n2 = n>>1; \
00176   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00177   double *w_ptr = w, *xx_ptr = xx; \
00178   for (l = 0; l < n2; l++, w_ptr++) \
00179     *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00180   w_ptr--; \
00181   for (l = 0; l < n2; l++, w_ptr--) \
00182     *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00183 }
00184 
00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00187 
00188 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00189   double _Complex* y, double* w, int n)
00190 {
00191   int l;
00192   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00193   double *v_ptr = v, *w_ptr = w;
00194   for (l = n; l > 0; l--)
00195     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00196 }
00197 
00198 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00199   double* v, double _Complex* y, double* w, int n)
00200 {
00201   const int n2 = n>>1; \
00202   int l;
00203   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00204   double *v_ptr = v, *w_ptr = w;
00205   for (l = n2; l > 0; l--)
00206     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00207   v_ptr--; w_ptr--;
00208   for (l = n2; l > 0; l--)
00209     *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00210 }
00211 
00212 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00213   double* v, double _Complex* y, double* w, int n, double *xx)
00214 {
00215   const int n2 = n>>1; \
00216   int l;
00217   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00218   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00219   for (l = n2; l > 0; l--, xx_ptr++)
00220     *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00221   v_ptr--; w_ptr--;
00222   for (l = n2; l > 0; l--, xx_ptr++)
00223     *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00224 }
00225 
00226 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00227   double* v, double _Complex* y, double* w, int n, double *xx)
00228 {
00229   const int n2 = n>>1; \
00230   int l;
00231   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00232   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00233   for (l = n2; l > 0; l--, xx_ptr++)
00234     *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00235   v_ptr--; w_ptr--;
00236   for (l = n2; l > 0; l--, xx_ptr++)
00237     *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00238 }
00239 
00240 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00241 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, double *a12, \
00242   double *a21, double *a22, double g, int tau, fpt_set set) \
00243 { \
00244  \
00245   int length = 1<<(tau+1); \
00246  \
00247   double norm = 1.0/(length<<1); \
00248   \
00249   /* Compensate for factors introduced by a raw DCT-III. */ \
00250   a[0] *= 2.0; \
00251   b[0] *= 2.0; \
00252   \
00253   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00254   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00255   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00256   \
00257   /*for (k = 0; k < length; k++)*/ \
00258   /*{*/ \
00259     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
00260     /*  a11[k],a12[k],a21[k],a22[k]);*/ \
00261   /*}*/ \
00262   \
00263   /* Check, if gamma is zero. */ \
00264   if (g == 0.0) \
00265   { \
00266     /*fprintf(stderr,"gamma == 0!\n");*/ \
00267     /* Perform multiplication only for second row. */ \
00268     M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00269   } \
00270   else \
00271   { \
00272     /*fprintf(stderr,"gamma != 0!\n");*/ \
00273     /* Perform multiplication for both rows. */ \
00274     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00275     M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00276     memcpy(b,set->z,length*sizeof(double _Complex)); \
00277     /* Compute Chebyshev-coefficients using a DCT-II. */ \
00278     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00279     /* Compensate for factors introduced by a raw DCT-II. */ \
00280     a[0] *= 0.5; \
00281   } \
00282   \
00283   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00284   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00285   /* Compensate for factors introduced by a raw DCT-II. */ \
00286   b[0] *= 0.5; \
00287 }
00288 
00289 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00290 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00291 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00292 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
00293 
00294 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00295   double *a11, double *a12, double *a21, double *a22, double *x,
00296   double gamma, int tau, fpt_set set)
00297 {
00299   int length = 1<<(tau+1);
00301   double norm = 1.0/(length<<1);
00302 
00303   UNUSED(a21); UNUSED(a22);
00304 
00305   /* Compensate for factors introduced by a raw DCT-III. */
00306   a[0] *= 2.0;
00307   b[0] *= 2.0;
00308 
00309   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00310   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00311   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00312 
00313   /*for (k = 0; k < length; k++)*/
00314   /*{*/
00315     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00316     /*  a11[k],a12[k],a21[k],a22[k]);*/
00317   /*}*/
00318 
00319   /* Check, if gamma is zero. */
00320   if (gamma == 0.0)
00321   {
00322     /*fprintf(stderr,"gamma == 0!\n");*/
00323     /* Perform multiplication only for second row. */
00324     auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00325   }
00326   else
00327   {
00328     /*fprintf(stderr,"gamma != 0!\n");*/
00329     /* Perform multiplication for both rows. */
00330     auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00331     auvxpwy_symmetric(norm*gamma,a,a,a11,b,a12,length);
00332     memcpy(b,set->z,length*sizeof(double _Complex));
00333     /* Compute Chebyshev-coefficients using a DCT-II. */
00334     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00335     /* Compensate for factors introduced by a raw DCT-II. */
00336     a[0] *= 0.5;
00337   }
00338 
00339   /* Compute Chebyshev-coefficients using a DCT-II. */
00340   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00341   /* Compensate for factors introduced by a raw DCT-II. */
00342   b[0] *= 0.5;
00343 }
00344 
00345 static inline void fpt_do_step_symmetric_l(double _Complex  *a, double _Complex *b,
00346   double *a11, double *a12, double *a21, double *a22, double *x, double gamma, int tau, fpt_set set)
00347 {
00349   int length = 1<<(tau+1);
00351   double norm = 1.0/(length<<1);
00352 
00353   /* Compensate for factors introduced by a raw DCT-III. */
00354   a[0] *= 2.0;
00355   b[0] *= 2.0;
00356 
00357   UNUSED(a11); UNUSED(a12);
00358 
00359   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00360   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00361   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00362 
00363   /*for (k = 0; k < length; k++)*/
00364   /*{*/
00365     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00366     /*  a11[k],a12[k],a21[k],a22[k]);*/
00367   /*}*/
00368 
00369   /* Check, if gamma is zero. */
00370   if (gamma == 0.0)
00371   {
00372     /*fprintf(stderr,"gamma == 0!\n");*/
00373     /* Perform multiplication only for second row. */
00374     auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00375   }
00376   else
00377   {
00378     /*fprintf(stderr,"gamma != 0!\n");*/
00379     /* Perform multiplication for both rows. */
00380     auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00381     auvxpwy_symmetric_2(norm*gamma,a,a,a21,b,a22,length,x);
00382     memcpy(b,set->z,length*sizeof(double _Complex));
00383     /* Compute Chebyshev-coefficients using a DCT-II. */
00384     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00385     /* Compensate for factors introduced by a raw DCT-II. */
00386     a[0] *= 0.5;
00387   }
00388 
00389   /* Compute Chebyshev-coefficients using a DCT-II. */
00390   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00391   /* Compensate for factors introduced by a raw DCT-II. */
00392   b[0] *= 0.5;
00393 }
00394 
00395 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00396 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, \
00397   double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00398 { \
00399  \
00400   int length = 1<<(tau+1); \
00401  \
00402   double norm = 1.0/(length<<1); \
00403   \
00404   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00405   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00406   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00407   \
00408   /* Perform matrix multiplication. */ \
00409   M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00410   M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00411   memcpy(a,set->z,length*sizeof(double _Complex)); \
00412   \
00413   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00414   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00415   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00416 }
00417 
00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00420 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
00421 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
00422 
00423 
00424 static inline void fpt_do_step_t_symmetric_u(double _Complex  *a,
00425   double _Complex *b, double *a11, double *a12, double *x,
00426   double gamma, int tau, fpt_set set)
00427 {
00429   int length = 1<<(tau+1);
00431   double norm = 1.0/(length<<1);
00432 
00433   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00434   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00435   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00436 
00437   /* Perform matrix multiplication. */
00438   abuvxpwy_symmetric1_1(norm,gamma,set->z,a,a11,b,length,x);
00439   abuvxpwy_symmetric1_2(norm,gamma,b,a,a12,b,length,x);
00440   memcpy(a,set->z,length*sizeof(double _Complex));
00441 
00442   /* Compute Chebyshev-coefficients using a DCT-II. */
00443   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00444   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00445 }
00446 
00447 static inline void fpt_do_step_t_symmetric_l(double _Complex  *a,
00448   double _Complex *b, double *a21, double *a22,
00449   double *x, double gamma, int tau, fpt_set set)
00450 {
00452   int length = 1<<(tau+1);
00454   double norm = 1.0/(length<<1);
00455 
00456   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00457   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00458   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00459 
00460   /* Perform matrix multiplication. */
00461   abuvxpwy_symmetric2_2(norm,gamma,set->z,a,b,a21,length,x);
00462   abuvxpwy_symmetric2_1(norm,gamma,b,a,b,a22,length,x);
00463   memcpy(a,set->z,length*sizeof(double _Complex));
00464 
00465   /* Compute Chebyshev-coefficients using a DCT-II. */
00466   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00467   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00468 }
00469 
00470 
00471 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00472   const double *beta, const double *gamma)
00473 {
00474   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00475    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00476    */
00477   int i,j;
00478   double a,b,x_val_act,a_old;
00479   const double *x_act;
00480   double *y_act;
00481   const double *alpha_act, *beta_act, *gamma_act;
00482 
00483   /* Traverse all nodes. */
00484   x_act = x;
00485   y_act = y;
00486   for (i = 0; i < size; i++)
00487   {
00488     a = 1.0;
00489     b = 0.0;
00490     x_val_act = *x_act;
00491 
00492     if (k == 0)
00493     {
00494       *y_act = 1.0;
00495     }
00496     else
00497     {
00498       alpha_act = &(alpha[k]);
00499       beta_act = &(beta[k]);
00500       gamma_act = &(gamma[k]);
00501       for (j = k; j > 1; j--)
00502       {
00503         a_old = a;
00504         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00505          b = a_old*(*gamma_act);
00506         alpha_act--;
00507         beta_act--;
00508         gamma_act--;
00509       }
00510       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00511     }
00512     x_act++;
00513     y_act++;
00514   }
00515 }
00516 
00517 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00518   const double *beta, const double *gamma)
00519 {
00520   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00521    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00522    */
00523   int i,j;
00524   double a,b,x_val_act,a_old;
00525   const double *x_act;
00526   double *y_act, *z_act;
00527   const double *alpha_act, *beta_act, *gamma_act;
00528 
00529   /* Traverse all nodes. */
00530   x_act = x;
00531   y_act = y;
00532   z_act = z;
00533   for (i = 0; i < size; i++)
00534   {
00535     a = 1.0;
00536     b = 0.0;
00537     x_val_act = *x_act;
00538 
00539     if (k == 0)
00540     {
00541       *y_act = 1.0;
00542       *z_act = 0.0;
00543     }
00544     else
00545     {
00546       alpha_act = &(alpha[k]);
00547       beta_act = &(beta[k]);
00548       gamma_act = &(gamma[k]);
00549       for (j = k; j > 1; j--)
00550       {
00551         a_old = a;
00552         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00553         b = a_old*(*gamma_act);
00554         alpha_act--;
00555         beta_act--;
00556         gamma_act--;
00557       }
00558       if (i < size1)
00559         *z_act = a;
00560       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00561     }
00562 
00563     x_act++;
00564     y_act++;
00565     z_act++;
00566   }
00567   /*gamma_act++;
00568   FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
00569   fprintf(f,"size1: %10d, size: %10d\n",size1,size);
00570   fclose(f);*/
00571 }
00572 
00573 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00574   const double *alpha, const double *beta, const double *gamma, const
00575   double threshold)
00576 {
00577   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00578    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00579    */
00580   int i,j;
00581   double a,b,x_val_act,a_old;
00582   const double *x_act;
00583   double *y_act, *z_act;
00584   const double *alpha_act, *beta_act, *gamma_act;
00585   R min = INFINITY, max = -INFINITY;
00586   const R t = /*-LOG10(EPS)*/ + LOG10(FABS(threshold));
00587 
00588   /* Traverse all nodes. */
00589   x_act = x;
00590   y_act = y;
00591   z_act = z;
00592   for (i = 0; i < size; i++)
00593   {
00594     a = 1.0;
00595     b = 0.0;
00596     x_val_act = *x_act;
00597 
00598     if (k == 0)
00599     {
00600      *y_act = 1.0;
00601      *z_act = 0.0;
00602     }
00603     else
00604     {
00605       alpha_act = &(alpha[k]);
00606       beta_act = &(beta[k]);
00607       gamma_act = &(gamma[k]);
00608       for (j = k; j > 1; j--)
00609       {
00610         a_old = a;
00611         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00612          b = a_old*(*gamma_act);
00613         alpha_act--;
00614         beta_act--;
00615         gamma_act--;
00616       }
00617       *z_act = a;
00618       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00619       min = FMIN(min,LOG10(FABS(*y_act)));
00620       max = FMAX(max,LOG10(FABS(*y_act)));
00621 //      if (fabs(*y_act) > threshold)
00622       if ((max - min) > t)
00623         return 1;
00624     }
00625     x_act++;
00626     y_act++;
00627     z_act++;
00628   }
00629   return 0;
00630 }
00631 
00632 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00633   const double _Complex *a, const double *x, double _Complex *y,
00634   const double *alpha, const double *beta, const double *gamma,
00635   const double lambda)
00636 {
00637   int j,k;
00638   double _Complex tmp1, tmp2, tmp3;
00639   double xc;
00640 
00641   if (N == 0)
00642     for (j = 0; j <= M; j++)
00643       y[j] = a[0];
00644   else
00645   {
00646     for (j = 0; j <= M; j++)
00647     {
00648 #if 0
00649       xc = x[j];
00650       tmp2 = a[N];
00651       tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00652       for (k = N-1; k > 0; k--)
00653       {
00654         tmp3 =   a[k-1]
00655                + (alpha[k-1] * xc + beta[k-1]) * tmp1
00656                + gamma[k] * tmp2;
00657         tmp2 = tmp1;
00658         tmp1 = tmp3;
00659       }
00660       y[j] = lambda * tmp1;
00661 #else
00662       xc = x[j];
00663       tmp1 = a[N-1];
00664       tmp2 = a[N];
00665       for (k = N-1; k > 0; k--)
00666       {
00667         tmp3 = a[k-1] + tmp2 * gamma[k];
00668         tmp2 *= (alpha[k] * xc + beta[k]);
00669         tmp2 += tmp1;
00670         tmp1 = tmp3;
00671       }
00672       tmp2 *= (alpha[0] * xc + beta[0]);
00673       y[j] = lambda * (tmp2 + tmp1);
00674 #endif
00675     }
00676   }
00677 }
00678 
00697 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00698   double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gamma,
00699   double lambda)
00700 {
00701   int j,k;
00702   double _Complex* it1 = temp;
00703   double _Complex* it2 = y;
00704   double _Complex aux;
00705 
00706   /* Compute final result by multiplying with the constant lambda */
00707   a[0] = 0.0;
00708   for (j = 0; j <= M; j++)
00709   {
00710     it2[j] = lambda * y[j];
00711     a[0] += it2[j];
00712   }
00713 
00714   if (N > 0)
00715   {
00716     /* Compute final step. */
00717     a[1] = 0.0;
00718     for (j = 0; j <= M; j++)
00719     {
00720       it1[j] = it2[j];
00721       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00722       a[1] += it2[j];
00723     }
00724 
00725     for (k = 2; k <= N; k++)
00726     {
00727       a[k] = 0.0;
00728       for (j = 0; j <= M; j++)
00729       {
00730         aux = it1[j];
00731         it1[j] = it2[j];
00732         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00733         a[k] += it2[j];
00734       }
00735     }
00736   }
00737 }
00738 
00739 
00740 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00741 {
00743   int plength;
00745   int tau;
00747   int m;
00748   int k;
00749 
00750   /* Allocate memory for new DPT set. */
00751   fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00752 
00753   /* Save parameters in structure. */
00754   set->flags = flags;
00755 
00756   set->M = M;
00757   set->t = t;
00758   set->N = 1<<t;
00759 
00760   /* Allocate memory for M transforms. */
00761   set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00762 
00763   /* Initialize with NULL pointer. */
00764   for (m = 0; m < set->M; m++)
00765     set->dpt[m].steps = 0;
00766 
00767   /* Create arrays with Chebyshev nodes. */
00768 
00769   /* Initialize array with Chebyshev coefficients for the polynomial x. This
00770    * would be trivially an array containing a 1 as second entry with all other
00771    * coefficients set to zero. In order to compensate for the multiplicative
00772    * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
00773 
00774   /* Allocate memory for array of pointers to node arrays. */
00775   set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00776   /* For each polynomial length starting with 4, compute the Chebyshev nodes
00777    * using a DCT-III. */
00778   plength = 4;
00779   for (tau = 1; tau < t+1; tau++)
00780   {
00781     /* Allocate memory for current array. */
00782     set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00783     for (k = 0; k < plength; k++)
00784     {
00785       set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00786     }
00787     plength = plength << 1;
00788   }
00789 
00791   set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00792   set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00793 
00794   /* Check if fast transform is activated. */
00795   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00796   {
00798     set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00799     set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00800     set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00801 
00803     set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00804     set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00805     set->kinds      = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00806     set->kinds[0]   = FFTW_REDFT01;
00807     set->kinds[1]   = FFTW_REDFT01;
00808     set->kindsr     = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00809     set->kindsr[0]  = FFTW_REDFT10;
00810     set->kindsr[1]  = FFTW_REDFT10;
00811     set->lengths    = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
00812     for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00813     {
00814       set->lengths[tau] = plength;
00815       set->plans_dct3[tau] =
00816         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00817                            2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00818                            0);
00819       set->plans_dct2[tau] =
00820         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00821                            2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00822                            0);
00823     }
00824     nfft_free(set->lengths);
00825     nfft_free(set->kinds);
00826     nfft_free(set->kindsr);
00827     set->lengths = NULL;
00828     set->kinds = NULL;
00829     set->kindsr = NULL;
00830   }
00831 
00832   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00833   {
00834     set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00835     set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00836   }
00837 
00838   /* Return the newly created DPT set. */
00839   return set;
00840 }
00841 
00842 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00843   double *gam, int k_start, const double threshold)
00844 {
00845 
00846   int tau;          
00847   int l;            
00848   int plength;      
00850   int degree;       
00852   int firstl;       
00853   int lastl;        
00854   int plength_stab; 
00856   int degree_stab;  
00858   double *a11;      
00860   double *a12;      
00862   double *a21;      
00864   double *a22;      
00866   const double *calpha;
00867   const double *cbeta;
00868   const double *cgamma;
00869   int needstab = 0; 
00870   int k_start_tilde;
00871   int N_tilde;
00872   int clength;
00873   int clength_1;
00874   int clength_2;
00875   int t_stab, N_stab;
00876   fpt_data *data;
00877 
00878   /* Get pointer to DPT data. */
00879   data = &(set->dpt[m]);
00880 
00881   /* Check, if already precomputed. */
00882   if (data->steps != NULL)
00883     return;
00884 
00885   /* Save k_start. */
00886   data->k_start = k_start;
00887 
00888   /* Check if fast transform is activated. */
00889   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00890   {
00891     /* Save recursion coefficients. */
00892     data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00893     data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00894     data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00895 
00896     for (tau = 2; tau <= set->t; tau++)
00897     {
00898 
00899       data->alphaN[tau-2] = alpha[1<<tau];
00900       data->betaN[tau-2] = beta[1<<tau];
00901       data->gammaN[tau-2] = gam[1<<tau];
00902     }
00903 
00904     data->alpha_0 = alpha[1];
00905     data->beta_0 = beta[1];
00906     data->gamma_m1 = gam[0];
00907 
00908     k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00909       /*set->N*/);
00910     N_tilde = N_TILDE(set->N);
00911 
00912     /* Allocate memory for the cascade with t = log_2(N) many levels. */
00913     data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00914 
00915     /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
00916     plength = 4;
00917     for (tau = 1; tau < set->t; tau++)
00918     {
00919       /* Compute auxilliary values. */
00920       degree = plength>>1;
00921       /* Compute first l. */
00922       firstl = FIRST_L(k_start_tilde,plength);
00923       /* Compute last l. */
00924       lastl = LAST_L(N_tilde,plength);
00925 
00926       /* Allocate memory for current level. This level will contain 2^{t-tau-1}
00927        * many matrices. */
00928       data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00929                          * (lastl+1));
00930 
00931       /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
00932       for (l = firstl; l <= lastl; l++)
00933       {
00934         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00935         {
00936           //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
00937           //fflush(stderr);
00938           clength = plength/2;
00939         }
00940         else
00941         {
00942           clength = plength;
00943         }
00944 
00945         /* Allocate memory for the components of U_{n,tau,l}. */
00946         a11 = (double*) nfft_malloc(sizeof(double)*clength);
00947         a12 = (double*) nfft_malloc(sizeof(double)*clength);
00948         a21 = (double*) nfft_malloc(sizeof(double)*clength);
00949         a22 = (double*) nfft_malloc(sizeof(double)*clength);
00950 
00951         /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
00952          * nodes. */
00953 
00954         /* Get the pointers to the three-term recurrence coeffcients. */
00955         calpha = &(alpha[plength*l+1+1]);
00956         cbeta = &(beta[plength*l+1+1]);
00957         cgamma = &(gam[plength*l+1+1]);
00958 
00959         if (set->flags & FPT_NO_STABILIZATION)
00960         {
00961           /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
00962           calpha--;
00963           cbeta--;
00964           cgamma--;
00965           eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
00966             cgamma);
00967           eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
00968             cgamma);
00969           needstab = 0;
00970         }
00971         else
00972         {
00973           calpha--;
00974           cbeta--;
00975           cgamma--;
00976           /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
00977           needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
00978             degree-1, calpha, cbeta, cgamma, threshold);
00979           if (needstab == 0)
00980           {
00981             /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
00982             needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
00983               degree, calpha, cbeta, cgamma, threshold);
00984           }
00985         }
00986 
00987         /* Check if stabilization needed. */
00988         if (needstab == 0)
00989         {
00990           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
00991           data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
00992           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
00993           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
00994           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
00995           /* No stabilization needed. */
00996           data->steps[tau][l].a11[0] = a11;
00997           data->steps[tau][l].a12[0] = a12;
00998           data->steps[tau][l].a21[0] = a21;
00999           data->steps[tau][l].a22[0] = a22;
01000           data->steps[tau][l].g[0] = gam[plength*l+1+1];
01001           data->steps[tau][l].stable = true;
01002         }
01003         else
01004         {
01005           /* Stabilize. */
01006           degree_stab = degree*(2*l+1);
01007           nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01008 
01009           /* Old arrays are to small. */
01010           nfft_free(a11);
01011           nfft_free(a12);
01012           nfft_free(a21);
01013           nfft_free(a22);
01014 
01015           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01016           data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01017           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01018           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01019           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01020 
01021           plength_stab = N_stab;
01022 
01023           if (set->flags & FPT_AL_SYMMETRY)
01024           {
01025             if (m <= 1)
01026             {
01027               /* This should never be executed */
01028               clength_1 = plength_stab;
01029               clength_2 = plength_stab;
01030               /* Allocate memory for arrays. */
01031               a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01032               a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01033               a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01034               a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01035               /* Get the pointers to the three-term recurrence coeffcients. */
01036               calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01037               eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01038                 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01039               eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01040                 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01041             }
01042             else
01043             {
01044               clength = plength_stab/2;
01045               if (m%2 == 0)
01046               {
01047                 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01048                 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01049                 a21 = 0;
01050                 a22 = 0;
01051                 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01052                 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01053                   degree_stab-2, calpha, cbeta, cgamma);
01054                 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01055                   degree_stab-1, calpha, cbeta, cgamma);
01056               }
01057               else
01058               {
01059                 a11 = 0;
01060                 a12 = 0;
01061                 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01062                 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01063                 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01064                 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01065                   degree_stab-1,calpha, cbeta, cgamma);
01066                 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01067                   degree_stab+0, calpha, cbeta, cgamma);
01068               }
01069             }
01070           }
01071           else
01072           {
01073             clength_1 = plength_stab;
01074             clength_2 = plength_stab;
01075             a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01076             a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01077             a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01078             a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01079             calpha = &(alpha[2]);
01080             cbeta = &(beta[2]);
01081             cgamma = &(gam[2]);
01082             calpha--;
01083             cbeta--;
01084             cgamma--;
01085             eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01086               calpha, cbeta, cgamma);
01087             eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01088               calpha, cbeta, cgamma);
01089 
01090           }
01091           data->steps[tau][l].a11[0] = a11;
01092           data->steps[tau][l].a12[0] = a12;
01093           data->steps[tau][l].a21[0] = a21;
01094           data->steps[tau][l].a22[0] = a22;
01095 
01096           data->steps[tau][l].g[0] =  gam[1+1];
01097           data->steps[tau][l].stable = false;
01098           data->steps[tau][l].ts = t_stab;
01099           data->steps[tau][l].Ns = N_stab;
01100         }
01101       }
01103       plength = plength << 1;
01104     }
01105   }
01106 
01107   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01108   {
01109     /* Check, if recurrence coefficients must be copied. */
01110     if (set->flags & FPT_PERSISTENT_DATA)
01111     {
01112       data->alpha = (double*) alpha;
01113       data->beta = (double*) beta;
01114       data->gamma = (double*) gam;
01115     }
01116     else
01117     {
01118       data->alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01119       data->beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01120       data->gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01121       memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
01122       memcpy(data->beta,beta,(set->N+1)*sizeof(double));
01123       memcpy(data->gamma,gam,(set->N+1)*sizeof(double));
01124     }
01125   }
01126 }
01127 
01128 void dpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01129   const int k_end, const unsigned int flags)
01130 {
01131   int j;
01132   fpt_data *data = &(set->dpt[m]);
01133   int Nk;
01134   int tk;
01135   double norm;
01136 
01137   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01138   norm = 2.0/(Nk<<1);
01139 
01140   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01141   {
01142     return;
01143   }
01144 
01145   if (flags & FPT_FUNCTION_VALUES)
01146   {
01147     /* Fill array with Chebyshev nodes. */
01148     for (j = 0; j <= k_end; j++)
01149     {
01150       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01151     }
01152 
01153     memset(set->result,0U,data->k_start*sizeof(double _Complex));
01154     memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01155 
01156     /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01157       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01158       data->gamma_m1);*/
01159     eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01160       y, &data->alpha[1], &data->beta[1], &data->gamma[1], data->gamma_m1);
01161   }
01162   else
01163   {
01164     memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01165     memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01166 
01167     eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01168       set->result, &data->alpha[1], &data->beta[1], &data->gamma[1],
01169       data->gamma_m1);
01170 
01171     fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01172       (double*)set->result);
01173 
01174     set->result[0] *= 0.5;
01175     for (j = 0; j < Nk; j++)
01176     {
01177       set->result[j] *= norm;
01178     }
01179 
01180     memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01181   }
01182 }
01183 
01184 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01185   const int k_end, const unsigned int flags)
01186 {
01187   /* Get transformation data. */
01188   fpt_data *data = &(set->dpt[m]);
01190   int Nk;
01192   int tk;
01194   int k_start_tilde;
01196   int k_end_tilde;
01197 
01199   int tau;
01201   int firstl;
01203   int lastl;
01205   int l;
01207   int plength;
01209   int plength_stab;
01210   int t_stab;
01212   fpt_step *step;
01214   fftw_plan plan = 0;
01215   int length = k_end+1;
01216   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01217 
01219   int k;
01220 
01221   double _Complex *work_ptr;
01222   const double _Complex *x_ptr;
01223 
01224   /* Check, if slow transformation should be used due to small bandwidth. */
01225   if (k_end < FPT_BREAK_EVEN)
01226   {
01227     /* Use NDSFT. */
01228     dpt_trafo(set, m, x, y, k_end, flags);
01229     return;
01230   }
01231 
01232   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01233   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01234   k_end_tilde = K_END_TILDE(k_end,Nk);
01235 
01236   /* Check if fast transform is activated. */
01237   if (set->flags & FPT_NO_FAST_ALGORITHM)
01238     return;
01239 
01240   if (flags & FPT_FUNCTION_VALUES)
01241     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01242       (double*)set->work, NULL, 2, 1, kinds, 0U);
01243 
01244   /* Initialize working arrays. */
01245   memset(set->result,0U,2*Nk*sizeof(double _Complex));
01246 
01247   /* The first step. */
01248 
01249   /* Set the first 2*data->k_start coefficients to zero. */
01250   memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01251 
01252   work_ptr = &set->work[2*data->k_start];
01253   x_ptr = x;
01254 
01255   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01256   {
01257     *work_ptr++ = *x_ptr++;
01258     *work_ptr++ = K(0.0);
01259   }
01260 
01261   /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
01262   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01263 
01264   /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
01265    * x_{Nk-1} and x_{Nk-2}. */
01266   if (k_end == Nk)
01267   {
01268     set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01269     set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01270     set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01271   }
01272 
01273   /* Compute the remaining steps. */
01274   plength = 4;
01275   for (tau = 1; tau < tk; tau++)
01276   {
01277     /* Compute first l. */
01278     firstl = FIRST_L(k_start_tilde,plength);
01279     /* Compute last l. */
01280     lastl = LAST_L(k_end_tilde,plength);
01281 
01282     /* Compute the multiplication steps. */
01283     for (l = firstl; l <= lastl; l++)
01284     {
01285       /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
01286       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01287       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01288       memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01289       memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01290 
01291       /* Copy coefficients into first half. */
01292       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01293       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01294       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01295 
01296       /* Get matrix U_{n,tau,l} */
01297       step = &(data->steps[tau][l]);
01298 
01299       /* Check if step is stable. */
01300       if (step->stable)
01301       {
01302         /* Check, if we should do a symmetrizised step. */
01303         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01304         {
01305           /*for (k = 0; k < plength; k++)
01306           {
01307             fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
01308               step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
01309           }*/
01310           /* Multiply third and fourth polynomial with matrix U. */
01311           //fprintf(stderr,"\nhallo\n");
01312           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01313             step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01314         }
01315         else
01316         {
01317           /* Multiply third and fourth polynomial with matrix U. */
01318           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01319             step->a21[0], step->a22[0], step->g[0], tau, set);
01320         }
01321 
01322         if (step->g[0] != 0.0)
01323         {
01324           for (k = 0; k < plength; k++)
01325           {
01326             set->work[plength*2*l+k] += set->vec3[k];
01327           }
01328         }
01329         for (k = 0; k < plength; k++)
01330         {
01331           set->work[plength*(2*l+1)+k] += set->vec4[k];
01332         }
01333       }
01334       else
01335       {
01336         /* Stabilize. */
01337 
01338         /* The lengh of the polynomials */
01339         plength_stab = step->Ns;
01340         t_stab = step->ts;
01341 
01342         /*---------*/
01343         /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
01344         fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
01345         fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
01346         fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
01347         /*---------*/
01348 
01349         /* Set rest of vectors explicitely to zero */
01350         /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
01351           plength, plength_stab);*/
01352         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01353         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01354 
01355         /* Multiply third and fourth polynomial with matrix U. */
01356         /* Check for symmetry. */
01357         if (set->flags & FPT_AL_SYMMETRY)
01358         {
01359           if (m <= 1)
01360           {
01361             fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01362               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01363           }
01364           else if (m%2 == 0)
01365           {
01366             /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01367               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01368             fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01369               step->a21[0], step->a22[0],
01370               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01371             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01372               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01373           }
01374           else
01375           {
01376             /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01377               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01378             fpt_do_step_symmetric_l(set->vec3, set->vec4,
01379               step->a11[0], step->a12[0],
01380               step->a21[0],
01381               step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01382             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01383               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01384           }
01385         }
01386         else
01387         {
01388             fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01389               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01390         }
01391 
01392         if (step->g[0] != 0.0)
01393         {
01394           for (k = 0; k < plength_stab; k++)
01395           {
01396             set->result[k] += set->vec3[k];
01397           }
01398         }
01399 
01400         for (k = 0; k < plength_stab; k++)
01401         {
01402           set->result[Nk+k] += set->vec4[k];
01403         }
01404       }
01405     }
01406     /* Double length of polynomials. */
01407     plength = plength<<1;
01408 
01409     /*--------*/
01410     /*for (k = 0; k < 2*Nk; k++)
01411     {
01412       fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01413         k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01414         cimag(set->result[k]));
01415     }*/
01416     /*--------*/
01417   }
01418 
01419   /* Add the resulting cascade coeffcients to the coeffcients accumulated from
01420    * the stabilization steps. */
01421   for (k = 0; k < 2*Nk; k++)
01422   {
01423     set->result[k] += set->work[k];
01424   }
01425 
01426   /* The last step. Compute the Chebyshev coeffcients c_k^n from the
01427    * polynomials in front of P_0^n and P_1^n. */
01428   y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01429     data->alpha_0*set->result[Nk+1]*0.5);
01430   y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01431     data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01432   y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01433     data->beta_0*set->result[Nk+k_end-1] +
01434     data->alpha_0*set->result[Nk+k_end-2]*0.5);
01435   y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01436   for (k = 2; k <= k_end-2; k++)
01437   {
01438     y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01439       data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01440   }
01441 
01442   if (flags & FPT_FUNCTION_VALUES)
01443   {
01444     y[0] *= 2.0;
01445     fftw_execute_r2r(plan,(double*)y,(double*)y);
01446     fftw_destroy_plan(plan);
01447     for (k = 0; k <= k_end; k++)
01448     {
01449       y[k] *= 0.5;
01450     }
01451   }
01452 }
01453 
01454 void dpt_transposed(fpt_set set, const int m, double _Complex *x,
01455   double _Complex *y, const int k_end, const unsigned int flags)
01456 {
01457   int j;
01458   fpt_data *data = &(set->dpt[m]);
01459   int Nk;
01460   int tk;
01461   double norm;
01462 
01463   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01464   norm = 2.0/(Nk<<1);
01465 
01466   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01467   {
01468     return;
01469   }
01470 
01471   if (flags & FPT_FUNCTION_VALUES)
01472   {
01473     for (j = 0; j <= k_end; j++)
01474     {
01475       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01476     }
01477 
01478     eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01479       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01480       data->gamma_m1);
01481 
01482     memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01483       sizeof(double _Complex));
01484   }
01485   else
01486   {
01487     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01488     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01489 
01490     for (j = 0; j < Nk; j++)
01491     {
01492       set->result[j] *= norm;
01493     }
01494 
01495     fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01496       (double*)set->result);
01497 
01498     eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01499       set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01500       data->gamma_m1);
01501 
01502     memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01503   }
01504 }
01505 
01506 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01507   double _Complex *y, const int k_end, const unsigned int flags)
01508 {
01509   /* Get transformation data. */
01510   fpt_data *data = &(set->dpt[m]);
01512   int Nk;
01514   int tk;
01516   int k_start_tilde;
01518   int k_end_tilde;
01519 
01521   int tau;
01523   int firstl;
01525   int lastl;
01527   int l;
01529   int plength;
01531   int plength_stab;
01533   fpt_step *step;
01535   fftw_plan plan;
01536   int length = k_end+1;
01537   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01539   int k;
01540   int t_stab;
01541 
01542   /* Check, if slow transformation should be used due to small bandwidth. */
01543   if (k_end < FPT_BREAK_EVEN)
01544   {
01545     /* Use NDSFT. */
01546     dpt_transposed(set, m, x, y, k_end, flags);
01547     return;
01548   }
01549 
01550   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01551   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01552   k_end_tilde = K_END_TILDE(k_end,Nk);
01553 
01554   /* Check if fast transform is activated. */
01555   if (set->flags & FPT_NO_FAST_ALGORITHM)
01556   {
01557     return;
01558   }
01559 
01560   if (flags & FPT_FUNCTION_VALUES)
01561   {
01562     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01563       (double*)set->work, NULL, 2, 1, kinds, 0U);
01564     fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01565     fftw_destroy_plan(plan);
01566     for (k = 0; k <= k_end; k++)
01567     {
01568       set->result[k] *= 0.5;
01569     }
01570   }
01571   else
01572   {
01573     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01574   }
01575 
01576   /* Initialize working arrays. */
01577   memset(set->work,0U,2*Nk*sizeof(double _Complex));
01578 
01579   /* The last step is now the first step. */
01580   for (k = 0; k <= k_end; k++)
01581   {
01582     set->work[k] = data->gamma_m1*set->result[k];
01583   }
01584   //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
01585 
01586   set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01587     data->alpha_0*set->result[1]);
01588   for (k = 1; k < k_end; k++)
01589   {
01590     set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01591       data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01592   }
01593   if (k_end<Nk)
01594   {
01595     memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01596   }
01597 
01599   memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01600 
01601   /* Compute the remaining steps. */
01602   plength = Nk;
01603   for (tau = tk-1; tau >= 1; tau--)
01604   {
01605     /* Compute first l. */
01606     firstl = FIRST_L(k_start_tilde,plength);
01607     /* Compute last l. */
01608     lastl = LAST_L(k_end_tilde,plength);
01609 
01610     /* Compute the multiplication steps. */
01611     for (l = firstl; l <= lastl; l++)
01612     {
01613       /* Initialize second half of coefficient arrays with zeros. */
01614       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01615       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01616 
01617       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01618         (plength/2)*sizeof(double _Complex));
01619 
01620       /* Get matrix U_{n,tau,l} */
01621       step = &(data->steps[tau][l]);
01622 
01623       /* Check if step is stable. */
01624       if (step->stable)
01625       {
01626         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01627         {
01628           /* Multiply third and fourth polynomial with matrix U. */
01629           fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01630             step->a21[0], step->a22[0], step->g[0], tau, set);
01631         }
01632         else
01633         {
01634           /* Multiply third and fourth polynomial with matrix U. */
01635           fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01636             step->a21[0], step->a22[0], step->g[0], tau, set);
01637         }
01638         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01639 
01640         for (k = 0; k < plength; k++)
01641         {
01642           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01643         }
01644       }
01645       else
01646       {
01647         /* Stabilize. */
01648         plength_stab = step->Ns;
01649         t_stab = step->ts;
01650 
01651         memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01652         memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01653 
01654         /* Multiply third and fourth polynomial with matrix U. */
01655         if (set->flags & FPT_AL_SYMMETRY)
01656         {
01657           if (m <= 1)
01658           {
01659             fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01660               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01661           }
01662           else if (m%2 == 0)
01663           {
01664             fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01665               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01666           }
01667           else
01668           {
01669             fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01670               step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01671           }
01672         }
01673         else
01674         {
01675             fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01676               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01677         }
01678 
01679         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01680 
01681         for (k = 0; k < plength; k++)
01682         {
01683           set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01684         }
01685        }
01686     }
01687     /* Half the length of polynomial arrays. */
01688     plength = plength>>1;
01689   }
01690 
01691   /* First step */
01692   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01693   {
01694     x[k] = set->work[2*(data->k_start+k)];
01695   }
01696   if (k_end == Nk)
01697   {
01698     x[Nk-data->k_start] =
01699         data->gammaN[tk-2]*set->work[2*(Nk-2)]
01700       + data->betaN[tk-2] *set->work[2*(Nk-1)]
01701       + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01702   }
01703 }
01704 
01705 void fpt_finalize(fpt_set set)
01706 {
01707   int tau;
01708   int l;
01709   int m;
01710   fpt_data *data;
01711   int k_start_tilde;
01712   int N_tilde;
01713   int firstl, lastl;
01714   int plength;
01715   const int M = set->M;
01716 
01717   /* TODO Clean up DPT transform data structures. */
01718   for (m = 0; m < M; m++)
01719   {
01720     /* Check if precomputed. */
01721     data = &set->dpt[m];
01722     if (data->steps != (fpt_step**)NULL)
01723     {
01724       nfft_free(data->alphaN);
01725       nfft_free(data->betaN);
01726       nfft_free(data->gammaN);
01727       data->alphaN = NULL;
01728       data->betaN = NULL;
01729       data->gammaN = NULL;
01730 
01731       /* Free precomputed data. */
01732       k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01733         /*set->N*/);
01734       N_tilde = N_TILDE(set->N);
01735       plength = 4;
01736       for (tau = 1; tau < set->t; tau++)
01737       {
01738         /* Compute first l. */
01739         firstl = FIRST_L(k_start_tilde,plength);
01740         /* Compute last l. */
01741         lastl = LAST_L(N_tilde,plength);
01742 
01743         /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
01744         for (l = firstl; l <= lastl; l++)
01745         {
01746           /* Free components. */
01747           nfft_free(data->steps[tau][l].a11[0]);
01748           nfft_free(data->steps[tau][l].a12[0]);
01749           nfft_free(data->steps[tau][l].a21[0]);
01750           nfft_free(data->steps[tau][l].a22[0]);
01751           data->steps[tau][l].a11[0] = NULL;
01752           data->steps[tau][l].a12[0] = NULL;
01753           data->steps[tau][l].a21[0] = NULL;
01754           data->steps[tau][l].a22[0] = NULL;
01755           /* Free components. */
01756           nfft_free(data->steps[tau][l].a11);
01757           nfft_free(data->steps[tau][l].a12);
01758           nfft_free(data->steps[tau][l].a21);
01759           nfft_free(data->steps[tau][l].a22);
01760           nfft_free(data->steps[tau][l].g);
01761           data->steps[tau][l].a11 = NULL;
01762           data->steps[tau][l].a12 = NULL;
01763           data->steps[tau][l].a21 = NULL;
01764           data->steps[tau][l].a22 = NULL;
01765           data->steps[tau][l].g = NULL;
01766         }
01767         /* Free pointers for current level. */
01768         nfft_free(data->steps[tau]);
01769         data->steps[tau] = NULL;
01770         /* Double length of polynomials. */
01771         plength = plength<<1;
01772       }
01773       /* Free steps. */
01774       nfft_free(data->steps);
01775       data->steps = NULL;
01776     }
01777 
01778     if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01779     {
01780       /* Check, if recurrence coefficients must be copied. */
01781       //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
01782       if (!(set->flags & FPT_PERSISTENT_DATA))
01783       {
01784         nfft_free(data->alpha);
01785         nfft_free(data->beta);
01786         nfft_free(data->gamma);
01787       }
01788       data->alpha = NULL;
01789       data->beta = NULL;
01790       data->gamma = NULL;
01791     }
01792   }
01793 
01794   /* Delete array of DPT transform data. */
01795   nfft_free(set->dpt);
01796   set->dpt = NULL;
01797 
01798   for (tau = 1; tau < set->t+1; tau++)
01799   {
01800     nfft_free(set->xcvecs[tau-1]);
01801     set->xcvecs[tau-1] = NULL;
01802   }
01803   nfft_free(set->xcvecs);
01804   set->xcvecs = NULL;
01805 
01806   /* Free auxilliary arrays. */
01807   nfft_free(set->work);
01808   nfft_free(set->result);
01809 
01810   /* Check if fast transform is activated. */
01811   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01812   {
01813     /* Free auxilliary arrays. */
01814     nfft_free(set->vec3);
01815     nfft_free(set->vec4);
01816     nfft_free(set->z);
01817     set->work = NULL;
01818     set->result = NULL;
01819     set->vec3 = NULL;
01820     set->vec4 = NULL;
01821     set->z = NULL;
01822 
01823     /* Free FFTW plans. */
01824     for(tau = 0; tau < set->t/*-1*/; tau++)
01825     {
01826       fftw_destroy_plan(set->plans_dct3[tau]);
01827       fftw_destroy_plan(set->plans_dct2[tau]);
01828       set->plans_dct3[tau] = NULL;
01829       set->plans_dct2[tau] = NULL;
01830     }
01831 
01832     nfft_free(set->plans_dct3);
01833     nfft_free(set->plans_dct2);
01834     set->plans_dct3 = NULL;
01835     set->plans_dct2 = NULL;
01836   }
01837 
01838   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01839   {
01840     /* Delete arrays of Chebyshev nodes. */
01841     nfft_free(set->xc_slow);
01842     set->xc_slow = NULL;
01843     nfft_free(set->temp);
01844     set->temp = NULL;
01845   }
01846 
01847   /* Free DPT set structure. */
01848   nfft_free(set);
01849 }

Generated on 17 Aug 2009 by Doxygen 1.5.3