NFFT Logo 3.1.2 API Reference

fpt.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002, 2009 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: fpt.c 3333 2009-09-14 21:54:14Z keiner $ */
00020 
00027 #include "config.h"
00028 
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #include <complex.h>
00034 
00035 #include "nfft3.h"
00036 #include "nfft3util.h"
00037 #include "infft.h"
00038 
00039 /* Macros for index calculation. */
00040 
00042 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00043 
00045 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00046 
00048 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00049 
00051 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00052 
00053 #define N_TILDE(y) (y-1)
00054 
00055 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00056 
00057 #define FPT_BREAK_EVEN 4
00058 
00062 typedef struct fpt_step_
00063 {
00064   bool stable;                            
00067   int Ns;                                 
00068   int ts;                                 
00069   double **a11,**a12,**a21,**a22;         
00070   double *g;                              
00071 } fpt_step;
00072 
00076 typedef struct fpt_data_
00077 {
00078   fpt_step **steps;                       
00079   int k_start;                            
00080   double *alphaN;                         
00081   double *betaN;                          
00082   double *gammaN;                         
00083   double alpha_0;                         
00084   double beta_0;                          
00085   double gamma_m1;                        
00086   /* Data for direct transform. */        
00087   double *alpha;                          
00088   double *beta;                           
00089   double *gamma;                          
00090 } fpt_data;
00091 
00095 typedef struct fpt_set_s_
00096 {
00097   int flags;                              
00098   int M;                                  
00099   int N;                                  
00101   int t;                                  
00102   fpt_data *dpt;                          
00103   double **xcvecs;                        
00106   double *xc;                             
00107   double _Complex *temp;                          
00108   double _Complex *work;                          
00109   double _Complex *result;                        
00110   double _Complex *vec3;
00111   double _Complex *vec4;
00112   double _Complex *z;
00113   fftw_plan *plans_dct3;                  
00115   fftw_plan *plans_dct2;                  
00117   fftw_r2r_kind *kinds;                   
00119   fftw_r2r_kind *kindsr;                  
00122   int *lengths; 
00124   /* Data for slow transforms. */
00125   double *xc_slow;
00126 } fpt_set_s;
00127 
00128 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00129   double* v, double _Complex* y, double* w, int n)
00130 {
00131   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00132   double *v_ptr = v, *w_ptr = w;
00133   for (l = 0; l < n; l++)
00134     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00135 }
00136 
00137 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00138 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00139   double* v, double _Complex* y, double* w, int n) \
00140 { \
00141   const int n2 = n>>1; \
00142   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00143   double *v_ptr = v, *w_ptr = w; \
00144   for (l = 0; l < n2; l++) \
00145     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00146   v_ptr--; w_ptr--; \
00147   for (l = 0; l < n2; l++) \
00148     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00149 }
00150 
00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00153 
00154 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00155 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00156   double* v, double _Complex* y, int n, double *xx) \
00157 { \
00158   const int n2 = n>>1; \
00159   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00160   double *v_ptr = v, *xx_ptr = xx; \
00161   for (l = 0; l < n2; l++, v_ptr++) \
00162     *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00163   v_ptr--; \
00164   for (l = 0; l < n2; l++, v_ptr--) \
00165     *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00166 }
00167 
00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00170 
00171 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00172 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00173   double _Complex* y, double* w, int n, double *xx) \
00174 { \
00175   const int n2 = n>>1; \
00176   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00177   double *w_ptr = w, *xx_ptr = xx; \
00178   for (l = 0; l < n2; l++, w_ptr++) \
00179     *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00180   w_ptr--; \
00181   for (l = 0; l < n2; l++, w_ptr--) \
00182     *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00183 }
00184 
00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00187 
00188 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00189   double _Complex* y, double* w, int n)
00190 {
00191   int l;
00192   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00193   double *v_ptr = v, *w_ptr = w;
00194   for (l = n; l > 0; l--)
00195     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00196 }
00197 
00198 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00199   double* v, double _Complex* y, double* w, int n)
00200 {
00201   const int n2 = n>>1; \
00202   int l;
00203   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00204   double *v_ptr = v, *w_ptr = w;
00205   for (l = n2; l > 0; l--)
00206     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00207   v_ptr--; w_ptr--;
00208   for (l = n2; l > 0; l--)
00209     *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00210 }
00211 
00212 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00213   double* v, double _Complex* y, double* w, int n, double *xx)
00214 {
00215   const int n2 = n>>1; \
00216   int l;
00217   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00218   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00219   for (l = n2; l > 0; l--, xx_ptr++)
00220     *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00221   v_ptr--; w_ptr--;
00222   for (l = n2; l > 0; l--, xx_ptr++)
00223     *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00224 }
00225 
00226 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00227   double* v, double _Complex* y, double* w, int n, double *xx)
00228 {
00229   const int n2 = n>>1; \
00230   int l;
00231   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00232   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00233   for (l = n2; l > 0; l--, xx_ptr++)
00234     *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00235   v_ptr--; w_ptr--;
00236   for (l = n2; l > 0; l--, xx_ptr++)
00237     *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00238 }
00239 
00240 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00241 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, double *a12, \
00242   double *a21, double *a22, double g, int tau, fpt_set set) \
00243 { \
00244  \
00245   int length = 1<<(tau+1); \
00246  \
00247   double norm = 1.0/(length<<1); \
00248   \
00249   /* Compensate for factors introduced by a raw DCT-III. */ \
00250   a[0] *= 2.0; \
00251   b[0] *= 2.0; \
00252   \
00253   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00254   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00255   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00256   \
00257   /*for (k = 0; k < length; k++)*/ \
00258   /*{*/ \
00259     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
00260     /*  a11[k],a12[k],a21[k],a22[k]);*/ \
00261   /*}*/ \
00262   \
00263   /* Check, if gamma is zero. */ \
00264   if (g == 0.0) \
00265   { \
00266     /*fprintf(stderr,"gamma == 0!\n");*/ \
00267     /* Perform multiplication only for second row. */ \
00268     M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00269   } \
00270   else \
00271   { \
00272     /*fprintf(stderr,"gamma != 0!\n");*/ \
00273     /* Perform multiplication for both rows. */ \
00274     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00275     M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00276     memcpy(b,set->z,length*sizeof(double _Complex)); \
00277     /* Compute Chebyshev-coefficients using a DCT-II. */ \
00278     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00279     /* Compensate for factors introduced by a raw DCT-II. */ \
00280     a[0] *= 0.5; \
00281   } \
00282   \
00283   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00284   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00285   /* Compensate for factors introduced by a raw DCT-II. */ \
00286   b[0] *= 0.5; \
00287 }
00288 
00289 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00290 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00291 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00292 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
00293 
00294 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00295   double *a11, double *a12, double *a21, double *a22, double *x,
00296   double gamma, int tau, fpt_set set)
00297 {
00299   int length = 1<<(tau+1);
00301   double norm = 1.0/(length<<1);
00302 
00303   UNUSED(a21); UNUSED(a22);
00304 
00305   /* Compensate for factors introduced by a raw DCT-III. */
00306   a[0] *= 2.0;
00307   b[0] *= 2.0;
00308 
00309   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00310   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00311   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00312 
00313   /*for (k = 0; k < length; k++)*/
00314   /*{*/
00315     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00316     /*  a11[k],a12[k],a21[k],a22[k]);*/
00317   /*}*/
00318 
00319   /* Check, if gamma is zero. */
00320   if (gamma == 0.0)
00321   {
00322     /*fprintf(stderr,"gamma == 0!\n");*/
00323     /* Perform multiplication only for second row. */
00324     auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00325   }
00326   else
00327   {
00328     /*fprintf(stderr,"gamma != 0!\n");*/
00329     /* Perform multiplication for both rows. */
00330     auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00331     auvxpwy_symmetric(norm*gamma,a,a,a11,b,a12,length);
00332     memcpy(b,set->z,length*sizeof(double _Complex));
00333     /* Compute Chebyshev-coefficients using a DCT-II. */
00334     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00335     /* Compensate for factors introduced by a raw DCT-II. */
00336     a[0] *= 0.5;
00337   }
00338 
00339   /* Compute Chebyshev-coefficients using a DCT-II. */
00340   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00341   /* Compensate for factors introduced by a raw DCT-II. */
00342   b[0] *= 0.5;
00343 }
00344 
00345 static inline void fpt_do_step_symmetric_l(double _Complex  *a, double _Complex *b,
00346   double *a11, double *a12, double *a21, double *a22, double *x, double gamma, int tau, fpt_set set)
00347 {
00349   int length = 1<<(tau+1);
00351   double norm = 1.0/(length<<1);
00352 
00353   /* Compensate for factors introduced by a raw DCT-III. */
00354   a[0] *= 2.0;
00355   b[0] *= 2.0;
00356 
00357   UNUSED(a11); UNUSED(a12);
00358 
00359   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00360   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00361   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00362 
00363   /*for (k = 0; k < length; k++)*/
00364   /*{*/
00365     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00366     /*  a11[k],a12[k],a21[k],a22[k]);*/
00367   /*}*/
00368 
00369   /* Check, if gamma is zero. */
00370   if (gamma == 0.0)
00371   {
00372     /*fprintf(stderr,"gamma == 0!\n");*/
00373     /* Perform multiplication only for second row. */
00374     auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00375   }
00376   else
00377   {
00378     /*fprintf(stderr,"gamma != 0!\n");*/
00379     /* Perform multiplication for both rows. */
00380     auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00381     auvxpwy_symmetric_2(norm*gamma,a,a,a21,b,a22,length,x);
00382     memcpy(b,set->z,length*sizeof(double _Complex));
00383     /* Compute Chebyshev-coefficients using a DCT-II. */
00384     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00385     /* Compensate for factors introduced by a raw DCT-II. */
00386     a[0] *= 0.5;
00387   }
00388 
00389   /* Compute Chebyshev-coefficients using a DCT-II. */
00390   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00391   /* Compensate for factors introduced by a raw DCT-II. */
00392   b[0] *= 0.5;
00393 }
00394 
00395 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00396 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, \
00397   double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00398 { \
00399  \
00400   int length = 1<<(tau+1); \
00401  \
00402   double norm = 1.0/(length<<1); \
00403   \
00404   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00405   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00406   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00407   \
00408   /* Perform matrix multiplication. */ \
00409   M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00410   M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00411   memcpy(a,set->z,length*sizeof(double _Complex)); \
00412   \
00413   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00414   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00415   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00416 }
00417 
00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00420 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
00421 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
00422 
00423 
00424 static inline void fpt_do_step_t_symmetric_u(double _Complex  *a,
00425   double _Complex *b, double *a11, double *a12, double *x,
00426   double gamma, int tau, fpt_set set)
00427 {
00429   int length = 1<<(tau+1);
00431   double norm = 1.0/(length<<1);
00432 
00433   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00434   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00435   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00436 
00437   /* Perform matrix multiplication. */
00438   abuvxpwy_symmetric1_1(norm,gamma,set->z,a,a11,b,length,x);
00439   abuvxpwy_symmetric1_2(norm,gamma,b,a,a12,b,length,x);
00440   memcpy(a,set->z,length*sizeof(double _Complex));
00441 
00442   /* Compute Chebyshev-coefficients using a DCT-II. */
00443   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00444   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00445 }
00446 
00447 static inline void fpt_do_step_t_symmetric_l(double _Complex  *a,
00448   double _Complex *b, double *a21, double *a22,
00449   double *x, double gamma, int tau, fpt_set set)
00450 {
00452   int length = 1<<(tau+1);
00454   double norm = 1.0/(length<<1);
00455 
00456   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00457   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00458   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00459 
00460   /* Perform matrix multiplication. */
00461   abuvxpwy_symmetric2_2(norm,gamma,set->z,a,b,a21,length,x);
00462   abuvxpwy_symmetric2_1(norm,gamma,b,a,b,a22,length,x);
00463   memcpy(a,set->z,length*sizeof(double _Complex));
00464 
00465   /* Compute Chebyshev-coefficients using a DCT-II. */
00466   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00467   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00468 }
00469 
00470 
00471 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00472   const double *beta, const double *gamma)
00473 {
00474   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00475    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00476    */
00477   int i,j;
00478   double a,b,x_val_act,a_old;
00479   const double *x_act;
00480   double *y_act;
00481   const double *alpha_act, *beta_act, *gamma_act;
00482 
00483   /* Traverse all nodes. */
00484   x_act = x;
00485   y_act = y;
00486   for (i = 0; i < size; i++)
00487   {
00488     a = 1.0;
00489     b = 0.0;
00490     x_val_act = *x_act;
00491 
00492     if (k == 0)
00493     {
00494       *y_act = 1.0;
00495     }
00496     else
00497     {
00498       alpha_act = &(alpha[k]);
00499       beta_act = &(beta[k]);
00500       gamma_act = &(gamma[k]);
00501       for (j = k; j > 1; j--)
00502       {
00503         a_old = a;
00504         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00505          b = a_old*(*gamma_act);
00506         alpha_act--;
00507         beta_act--;
00508         gamma_act--;
00509       }
00510       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00511     }
00512     x_act++;
00513     y_act++;
00514   }
00515 }
00516 
00517 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00518   const double *beta, const double *gamma)
00519 {
00520   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00521    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00522    */
00523   int i,j;
00524   double a,b,x_val_act,a_old;
00525   const double *x_act;
00526   double *y_act, *z_act;
00527   const double *alpha_act, *beta_act, *gamma_act;
00528 
00529   /* Traverse all nodes. */
00530   x_act = x;
00531   y_act = y;
00532   z_act = z;
00533   for (i = 0; i < size; i++)
00534   {
00535     a = 1.0;
00536     b = 0.0;
00537     x_val_act = *x_act;
00538 
00539     if (k == 0)
00540     {
00541       *y_act = 1.0;
00542       *z_act = 0.0;
00543     }
00544     else
00545     {
00546       alpha_act = &(alpha[k]);
00547       beta_act = &(beta[k]);
00548       gamma_act = &(gamma[k]);
00549       for (j = k; j > 1; j--)
00550       {
00551         a_old = a;
00552         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00553         b = a_old*(*gamma_act);
00554         alpha_act--;
00555         beta_act--;
00556         gamma_act--;
00557       }
00558       if (i < size1)
00559         *z_act = a;
00560       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00561     }
00562 
00563     x_act++;
00564     y_act++;
00565     z_act++;
00566   }
00567   /*gamma_act++;
00568   FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
00569   fprintf(f,"size1: %10d, size: %10d\n",size1,size);
00570   fclose(f);*/
00571 }
00572 
00573 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00574   const double *alpha, const double *beta, const double *gamma, const
00575   double threshold)
00576 {
00577   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00578    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00579    */
00580   int i,j;
00581   double a,b,x_val_act,a_old;
00582   const double *x_act;
00583   double *y_act, *z_act;
00584   const double *alpha_act, *beta_act, *gamma_act;
00585   R max = -R_MAX;
00586   const R t = LOG10(FABS(threshold));
00587 
00588   /* Traverse all nodes. */
00589   x_act = x;
00590   y_act = y;
00591   z_act = z;
00592   for (i = 0; i < size; i++)
00593   {
00594     a = 1.0;
00595     b = 0.0;
00596     x_val_act = *x_act;
00597 
00598     if (k == 0)
00599     {
00600      *y_act = 1.0;
00601      *z_act = 0.0;
00602     }
00603     else
00604     {
00605       alpha_act = &(alpha[k]);
00606       beta_act = &(beta[k]);
00607       gamma_act = &(gamma[k]);
00608       for (j = k; j > 1; j--)
00609       {
00610         a_old = a;
00611         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00612          b = a_old*(*gamma_act);
00613         alpha_act--;
00614         beta_act--;
00615         gamma_act--;
00616       }
00617       *z_act = a;
00618       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00619       max = FMAX(max,LOG10(FABS(*y_act)));
00620       if (max > t)
00621         return 1;
00622     }
00623     x_act++;
00624     y_act++;
00625     z_act++;
00626   }
00627   return 0;
00628 }
00629 
00630 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00631   const double _Complex *a, const double *x, double _Complex *y,
00632   const double *alpha, const double *beta, const double *gamma,
00633   const double lambda)
00634 {
00635   int j,k;
00636   double _Complex tmp1, tmp2, tmp3;
00637   double xc;
00638 
00639   if (N == 0)
00640     for (j = 0; j <= M; j++)
00641       y[j] = a[0];
00642   else
00643   {
00644     for (j = 0; j <= M; j++)
00645     {
00646 #if 0
00647       xc = x[j];
00648       tmp2 = a[N];
00649       tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00650       for (k = N-1; k > 0; k--)
00651       {
00652         tmp3 =   a[k-1]
00653                + (alpha[k-1] * xc + beta[k-1]) * tmp1
00654                + gamma[k] * tmp2;
00655         tmp2 = tmp1;
00656         tmp1 = tmp3;
00657       }
00658       y[j] = lambda * tmp1;
00659 #else
00660       xc = x[j];
00661       tmp1 = a[N-1];
00662       tmp2 = a[N];
00663       for (k = N-1; k > 0; k--)
00664       {
00665         tmp3 = a[k-1] + tmp2 * gamma[k];
00666         tmp2 *= (alpha[k] * xc + beta[k]);
00667         tmp2 += tmp1;
00668         tmp1 = tmp3;
00669       }
00670       tmp2 *= (alpha[0] * xc + beta[0]);
00671       y[j] = lambda * (tmp2 + tmp1);
00672 #endif
00673     }
00674   }
00675 }
00676 
00695 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00696   double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gamma,
00697   double lambda)
00698 {
00699   int j,k;
00700   double _Complex* it1 = temp;
00701   double _Complex* it2 = y;
00702   double _Complex aux;
00703 
00704   /* Compute final result by multiplying with the constant lambda */
00705   a[0] = 0.0;
00706   for (j = 0; j <= M; j++)
00707   {
00708     it2[j] = lambda * y[j];
00709     a[0] += it2[j];
00710   }
00711 
00712   if (N > 0)
00713   {
00714     /* Compute final step. */
00715     a[1] = 0.0;
00716     for (j = 0; j <= M; j++)
00717     {
00718       it1[j] = it2[j];
00719       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00720       a[1] += it2[j];
00721     }
00722 
00723     for (k = 2; k <= N; k++)
00724     {
00725       a[k] = 0.0;
00726       for (j = 0; j <= M; j++)
00727       {
00728         aux = it1[j];
00729         it1[j] = it2[j];
00730         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00731         a[k] += it2[j];
00732       }
00733     }
00734   }
00735 }
00736 
00737 
00738 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00739 {
00741   int plength;
00743   int tau;
00745   int m;
00746   int k;
00747 
00748   /* Allocate memory for new DPT set. */
00749   fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00750 
00751   /* Save parameters in structure. */
00752   set->flags = flags;
00753 
00754   set->M = M;
00755   set->t = t;
00756   set->N = 1<<t;
00757 
00758   /* Allocate memory for M transforms. */
00759   set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00760 
00761   /* Initialize with NULL pointer. */
00762   for (m = 0; m < set->M; m++)
00763     set->dpt[m].steps = 0;
00764 
00765   /* Create arrays with Chebyshev nodes. */
00766 
00767   /* Initialize array with Chebyshev coefficients for the polynomial x. This
00768    * would be trivially an array containing a 1 as second entry with all other
00769    * coefficients set to zero. In order to compensate for the multiplicative
00770    * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
00771 
00772   /* Allocate memory for array of pointers to node arrays. */
00773   set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00774   /* For each polynomial length starting with 4, compute the Chebyshev nodes
00775    * using a DCT-III. */
00776   plength = 4;
00777   for (tau = 1; tau < t+1; tau++)
00778   {
00779     /* Allocate memory for current array. */
00780     set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00781     for (k = 0; k < plength; k++)
00782     {
00783       set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00784     }
00785     plength = plength << 1;
00786   }
00787 
00789   set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00790   set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00791 
00792   /* Check if fast transform is activated. */
00793   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00794   {
00796     set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00797     set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00798     set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00799 
00801     set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00802     set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00803     set->kinds      = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00804     set->kinds[0]   = FFTW_REDFT01;
00805     set->kinds[1]   = FFTW_REDFT01;
00806     set->kindsr     = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00807     set->kindsr[0]  = FFTW_REDFT10;
00808     set->kindsr[1]  = FFTW_REDFT10;
00809     set->lengths    = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
00810     for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00811     {
00812       set->lengths[tau] = plength;
00813       set->plans_dct3[tau] =
00814         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00815                            2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00816                            0);
00817       set->plans_dct2[tau] =
00818         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00819                            2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00820                            0);
00821     }
00822     nfft_free(set->lengths);
00823     nfft_free(set->kinds);
00824     nfft_free(set->kindsr);
00825     set->lengths = NULL;
00826     set->kinds = NULL;
00827     set->kindsr = NULL;
00828   }
00829 
00830   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00831   {
00832     set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00833     set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00834   }
00835 
00836   /* Return the newly created DPT set. */
00837   return set;
00838 }
00839 
00840 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00841   double *gam, int k_start, const double threshold)
00842 {
00843 
00844   int tau;          
00845   int l;            
00846   int plength;      
00848   int degree;       
00850   int firstl;       
00851   int lastl;        
00852   int plength_stab; 
00854   int degree_stab;  
00856   double *a11;      
00858   double *a12;      
00860   double *a21;      
00862   double *a22;      
00864   const double *calpha;
00865   const double *cbeta;
00866   const double *cgamma;
00867   int needstab = 0; 
00868   int k_start_tilde;
00869   int N_tilde;
00870   int clength;
00871   int clength_1;
00872   int clength_2;
00873   int t_stab, N_stab;
00874   fpt_data *data;
00875 
00876   /* Get pointer to DPT data. */
00877   data = &(set->dpt[m]);
00878 
00879   /* Check, if already precomputed. */
00880   if (data->steps != NULL)
00881     return;
00882 
00883   /* Save k_start. */
00884   data->k_start = k_start;
00885 
00886   /* Check if fast transform is activated. */
00887   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00888   {
00889     /* Save recursion coefficients. */
00890     data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00891     data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00892     data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00893 
00894     for (tau = 2; tau <= set->t; tau++)
00895     {
00896 
00897       data->alphaN[tau-2] = alpha[1<<tau];
00898       data->betaN[tau-2] = beta[1<<tau];
00899       data->gammaN[tau-2] = gam[1<<tau];
00900     }
00901 
00902     data->alpha_0 = alpha[1];
00903     data->beta_0 = beta[1];
00904     data->gamma_m1 = gam[0];
00905 
00906     k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00907       /*set->N*/);
00908     N_tilde = N_TILDE(set->N);
00909 
00910     /* Allocate memory for the cascade with t = log_2(N) many levels. */
00911     data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00912 
00913     /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
00914     plength = 4;
00915     for (tau = 1; tau < set->t; tau++)
00916     {
00917       /* Compute auxilliary values. */
00918       degree = plength>>1;
00919       /* Compute first l. */
00920       firstl = FIRST_L(k_start_tilde,plength);
00921       /* Compute last l. */
00922       lastl = LAST_L(N_tilde,plength);
00923 
00924       /* Allocate memory for current level. This level will contain 2^{t-tau-1}
00925        * many matrices. */
00926       data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00927                          * (lastl+1));
00928 
00929       /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
00930       for (l = firstl; l <= lastl; l++)
00931       {
00932         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00933         {
00934           //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
00935           //fflush(stderr);
00936           clength = plength/2;
00937         }
00938         else
00939         {
00940           clength = plength;
00941         }
00942 
00943         /* Allocate memory for the components of U_{n,tau,l}. */
00944         a11 = (double*) nfft_malloc(sizeof(double)*clength);
00945         a12 = (double*) nfft_malloc(sizeof(double)*clength);
00946         a21 = (double*) nfft_malloc(sizeof(double)*clength);
00947         a22 = (double*) nfft_malloc(sizeof(double)*clength);
00948 
00949         /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
00950          * nodes. */
00951 
00952         /* Get the pointers to the three-term recurrence coeffcients. */
00953         calpha = &(alpha[plength*l+1+1]);
00954         cbeta = &(beta[plength*l+1+1]);
00955         cgamma = &(gam[plength*l+1+1]);
00956 
00957         if (set->flags & FPT_NO_STABILIZATION)
00958         {
00959           /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
00960           calpha--;
00961           cbeta--;
00962           cgamma--;
00963           eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
00964             cgamma);
00965           eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
00966             cgamma);
00967           needstab = 0;
00968         }
00969         else
00970         {
00971           calpha--;
00972           cbeta--;
00973           cgamma--;
00974           /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
00975           needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
00976             degree-1, calpha, cbeta, cgamma, threshold);
00977           if (needstab == 0)
00978           {
00979             /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
00980             needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
00981               degree, calpha, cbeta, cgamma, threshold);
00982           }
00983         }
00984 
00985         /* Check if stabilization needed. */
00986         if (needstab == 0)
00987         {
00988           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
00989           data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
00990           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
00991           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
00992           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
00993           /* No stabilization needed. */
00994           data->steps[tau][l].a11[0] = a11;
00995           data->steps[tau][l].a12[0] = a12;
00996           data->steps[tau][l].a21[0] = a21;
00997           data->steps[tau][l].a22[0] = a22;
00998           data->steps[tau][l].g[0] = gam[plength*l+1+1];
00999           data->steps[tau][l].stable = true;
01000         }
01001         else
01002         {
01003           /* Stabilize. */
01004           degree_stab = degree*(2*l+1);
01005           nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01006 
01007           /* Old arrays are to small. */
01008           nfft_free(a11);
01009           nfft_free(a12);
01010           nfft_free(a21);
01011           nfft_free(a22);
01012 
01013           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01014           data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01015           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01016           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01017           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01018 
01019           plength_stab = N_stab;
01020 
01021           if (set->flags & FPT_AL_SYMMETRY)
01022           {
01023             if (m <= 1)
01024             {
01025               /* This should never be executed */
01026               clength_1 = plength_stab;
01027               clength_2 = plength_stab;
01028               /* Allocate memory for arrays. */
01029               a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01030               a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01031               a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01032               a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01033               /* Get the pointers to the three-term recurrence coeffcients. */
01034               calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01035               eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01036                 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01037               eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01038                 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01039             }
01040             else
01041             {
01042               clength = plength_stab/2;
01043               if (m%2 == 0)
01044               {
01045                 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01046                 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01047                 a21 = 0;
01048                 a22 = 0;
01049                 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01050                 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01051                   degree_stab-2, calpha, cbeta, cgamma);
01052                 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01053                   degree_stab-1, calpha, cbeta, cgamma);
01054               }
01055               else
01056               {
01057                 a11 = 0;
01058                 a12 = 0;
01059                 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01060                 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01061                 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01062                 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01063                   degree_stab-1,calpha, cbeta, cgamma);
01064                 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01065                   degree_stab+0, calpha, cbeta, cgamma);
01066               }
01067             }
01068           }
01069           else
01070           {
01071             clength_1 = plength_stab;
01072             clength_2 = plength_stab;
01073             a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01074             a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01075             a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01076             a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01077             calpha = &(alpha[2]);
01078             cbeta = &(beta[2]);
01079             cgamma = &(gam[2]);
01080             calpha--;
01081             cbeta--;
01082             cgamma--;
01083             eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01084               calpha, cbeta, cgamma);
01085             eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01086               calpha, cbeta, cgamma);
01087 
01088           }
01089           data->steps[tau][l].a11[0] = a11;
01090           data->steps[tau][l].a12[0] = a12;
01091           data->steps[tau][l].a21[0] = a21;
01092           data->steps[tau][l].a22[0] = a22;
01093 
01094           data->steps[tau][l].g[0] =  gam[1+1];
01095           data->steps[tau][l].stable = false;
01096           data->steps[tau][l].ts = t_stab;
01097           data->steps[tau][l].Ns = N_stab;
01098         }
01099       }
01101       plength = plength << 1;
01102     }
01103   }
01104 
01105   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01106   {
01107     /* Check, if recurrence coefficients must be copied. */
01108     if (set->flags & FPT_PERSISTENT_DATA)
01109     {
01110       data->alpha = (double*) alpha;
01111       data->beta = (double*) beta;
01112       data->gamma = (double*) gam;
01113     }
01114     else
01115     {
01116       data->alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01117       data->beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01118       data->gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01119       memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
01120       memcpy(data->beta,beta,(set->N+1)*sizeof(double));
01121       memcpy(data->gamma,gam,(set->N+1)*sizeof(double));
01122     }
01123   }
01124 }
01125 
01126 void dpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01127   const int k_end, const unsigned int flags)
01128 {
01129   int j;
01130   fpt_data *data = &(set->dpt[m]);
01131   int Nk;
01132   int tk;
01133   double norm;
01134 
01135   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01136   norm = 2.0/(Nk<<1);
01137 
01138   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01139   {
01140     return;
01141   }
01142 
01143   if (flags & FPT_FUNCTION_VALUES)
01144   {
01145     /* Fill array with Chebyshev nodes. */
01146     for (j = 0; j <= k_end; j++)
01147     {
01148       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01149     }
01150 
01151     memset(set->result,0U,data->k_start*sizeof(double _Complex));
01152     memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01153 
01154     /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01155       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01156       data->gamma_m1);*/
01157     eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01158       y, &data->alpha[1], &data->beta[1], &data->gamma[1], data->gamma_m1);
01159   }
01160   else
01161   {
01162     memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01163     memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01164 
01165     eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01166       set->result, &data->alpha[1], &data->beta[1], &data->gamma[1],
01167       data->gamma_m1);
01168 
01169     fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01170       (double*)set->result);
01171 
01172     set->result[0] *= 0.5;
01173     for (j = 0; j < Nk; j++)
01174     {
01175       set->result[j] *= norm;
01176     }
01177 
01178     memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01179   }
01180 }
01181 
01182 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01183   const int k_end, const unsigned int flags)
01184 {
01185   /* Get transformation data. */
01186   fpt_data *data = &(set->dpt[m]);
01188   int Nk;
01190   int tk;
01192   int k_start_tilde;
01194   int k_end_tilde;
01195 
01197   int tau;
01199   int firstl;
01201   int lastl;
01203   int l;
01205   int plength;
01207   int plength_stab;
01208   int t_stab;
01210   fpt_step *step;
01212   fftw_plan plan = 0;
01213   int length = k_end+1;
01214   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01215 
01217   int k;
01218 
01219   double _Complex *work_ptr;
01220   const double _Complex *x_ptr;
01221 
01222   /* Check, if slow transformation should be used due to small bandwidth. */
01223   if (k_end < FPT_BREAK_EVEN)
01224   {
01225     /* Use NDSFT. */
01226     dpt_trafo(set, m, x, y, k_end, flags);
01227     return;
01228   }
01229 
01230   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01231   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01232   k_end_tilde = K_END_TILDE(k_end,Nk);
01233 
01234   /* Check if fast transform is activated. */
01235   if (set->flags & FPT_NO_FAST_ALGORITHM)
01236     return;
01237 
01238   if (flags & FPT_FUNCTION_VALUES)
01239     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01240       (double*)set->work, NULL, 2, 1, kinds, 0U);
01241 
01242   /* Initialize working arrays. */
01243   memset(set->result,0U,2*Nk*sizeof(double _Complex));
01244 
01245   /* The first step. */
01246 
01247   /* Set the first 2*data->k_start coefficients to zero. */
01248   memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01249 
01250   work_ptr = &set->work[2*data->k_start];
01251   x_ptr = x;
01252 
01253   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01254   {
01255     *work_ptr++ = *x_ptr++;
01256     *work_ptr++ = K(0.0);
01257   }
01258 
01259   /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
01260   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01261 
01262   /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
01263    * x_{Nk-1} and x_{Nk-2}. */
01264   if (k_end == Nk)
01265   {
01266     set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01267     set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01268     set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01269   }
01270 
01271   /* Compute the remaining steps. */
01272   plength = 4;
01273   for (tau = 1; tau < tk; tau++)
01274   {
01275     /* Compute first l. */
01276     firstl = FIRST_L(k_start_tilde,plength);
01277     /* Compute last l. */
01278     lastl = LAST_L(k_end_tilde,plength);
01279 
01280     /* Compute the multiplication steps. */
01281     for (l = firstl; l <= lastl; l++)
01282     {
01283       /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
01284       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01285       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01286       memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01287       memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01288 
01289       /* Copy coefficients into first half. */
01290       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01291       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01292       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01293 
01294       /* Get matrix U_{n,tau,l} */
01295       step = &(data->steps[tau][l]);
01296 
01297       /* Check if step is stable. */
01298       if (step->stable)
01299       {
01300         /* Check, if we should do a symmetrizised step. */
01301         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01302         {
01303           /*for (k = 0; k < plength; k++)
01304           {
01305             fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
01306               step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
01307           }*/
01308           /* Multiply third and fourth polynomial with matrix U. */
01309           //fprintf(stderr,"\nhallo\n");
01310           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01311             step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01312         }
01313         else
01314         {
01315           /* Multiply third and fourth polynomial with matrix U. */
01316           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01317             step->a21[0], step->a22[0], step->g[0], tau, set);
01318         }
01319 
01320         if (step->g[0] != 0.0)
01321         {
01322           for (k = 0; k < plength; k++)
01323           {
01324             set->work[plength*2*l+k] += set->vec3[k];
01325           }
01326         }
01327         for (k = 0; k < plength; k++)
01328         {
01329           set->work[plength*(2*l+1)+k] += set->vec4[k];
01330         }
01331       }
01332       else
01333       {
01334         /* Stabilize. */
01335 
01336         /* The lengh of the polynomials */
01337         plength_stab = step->Ns;
01338         t_stab = step->ts;
01339 
01340         /*---------*/
01341         /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
01342         fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
01343         fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
01344         fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
01345         /*---------*/
01346 
01347         /* Set rest of vectors explicitely to zero */
01348         /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
01349           plength, plength_stab);*/
01350         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01351         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01352 
01353         /* Multiply third and fourth polynomial with matrix U. */
01354         /* Check for symmetry. */
01355         if (set->flags & FPT_AL_SYMMETRY)
01356         {
01357           if (m <= 1)
01358           {
01359             fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01360               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01361           }
01362           else if (m%2 == 0)
01363           {
01364             /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01365               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01366             fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01367               step->a21[0], step->a22[0],
01368               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01369             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01370               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01371           }
01372           else
01373           {
01374             /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01375               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01376             fpt_do_step_symmetric_l(set->vec3, set->vec4,
01377               step->a11[0], step->a12[0],
01378               step->a21[0],
01379               step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01380             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01381               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01382           }
01383         }
01384         else
01385         {
01386             fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01387               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01388         }
01389 
01390         if (step->g[0] != 0.0)
01391         {
01392           for (k = 0; k < plength_stab; k++)
01393           {
01394             set->result[k] += set->vec3[k];
01395           }
01396         }
01397 
01398         for (k = 0; k < plength_stab; k++)
01399         {
01400           set->result[Nk+k] += set->vec4[k];
01401         }
01402       }
01403     }
01404     /* Double length of polynomials. */
01405     plength = plength<<1;
01406 
01407     /*--------*/
01408     /*for (k = 0; k < 2*Nk; k++)
01409     {
01410       fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01411         k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01412         cimag(set->result[k]));
01413     }*/
01414     /*--------*/
01415   }
01416 
01417   /* Add the resulting cascade coeffcients to the coeffcients accumulated from
01418    * the stabilization steps. */
01419   for (k = 0; k < 2*Nk; k++)
01420   {
01421     set->result[k] += set->work[k];
01422   }
01423 
01424   /* The last step. Compute the Chebyshev coeffcients c_k^n from the
01425    * polynomials in front of P_0^n and P_1^n. */
01426   y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01427     data->alpha_0*set->result[Nk+1]*0.5);
01428   y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01429     data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01430   y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01431     data->beta_0*set->result[Nk+k_end-1] +
01432     data->alpha_0*set->result[Nk+k_end-2]*0.5);
01433   y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01434   for (k = 2; k <= k_end-2; k++)
01435   {
01436     y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01437       data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01438   }
01439 
01440   if (flags & FPT_FUNCTION_VALUES)
01441   {
01442     y[0] *= 2.0;
01443     fftw_execute_r2r(plan,(double*)y,(double*)y);
01444     fftw_destroy_plan(plan);
01445     for (k = 0; k <= k_end; k++)
01446     {
01447       y[k] *= 0.5;
01448     }
01449   }
01450 }
01451 
01452 void dpt_transposed(fpt_set set, const int m, double _Complex *x,
01453   double _Complex *y, const int k_end, const unsigned int flags)
01454 {
01455   int j;
01456   fpt_data *data = &(set->dpt[m]);
01457   int Nk;
01458   int tk;
01459   double norm;
01460 
01461   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01462   norm = 2.0/(Nk<<1);
01463 
01464   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01465   {
01466     return;
01467   }
01468 
01469   if (flags & FPT_FUNCTION_VALUES)
01470   {
01471     for (j = 0; j <= k_end; j++)
01472     {
01473       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01474     }
01475 
01476     eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01477       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01478       data->gamma_m1);
01479 
01480     memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01481       sizeof(double _Complex));
01482   }
01483   else
01484   {
01485     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01486     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01487 
01488     for (j = 0; j < Nk; j++)
01489     {
01490       set->result[j] *= norm;
01491     }
01492 
01493     fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01494       (double*)set->result);
01495 
01496     eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01497       set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01498       data->gamma_m1);
01499 
01500     memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01501   }
01502 }
01503 
01504 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01505   double _Complex *y, const int k_end, const unsigned int flags)
01506 {
01507   /* Get transformation data. */
01508   fpt_data *data = &(set->dpt[m]);
01510   int Nk;
01512   int tk;
01514   int k_start_tilde;
01516   int k_end_tilde;
01517 
01519   int tau;
01521   int firstl;
01523   int lastl;
01525   int l;
01527   int plength;
01529   int plength_stab;
01531   fpt_step *step;
01533   fftw_plan plan;
01534   int length = k_end+1;
01535   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01537   int k;
01538   int t_stab;
01539 
01540   /* Check, if slow transformation should be used due to small bandwidth. */
01541   if (k_end < FPT_BREAK_EVEN)
01542   {
01543     /* Use NDSFT. */
01544     dpt_transposed(set, m, x, y, k_end, flags);
01545     return;
01546   }
01547 
01548   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01549   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01550   k_end_tilde = K_END_TILDE(k_end,Nk);
01551 
01552   /* Check if fast transform is activated. */
01553   if (set->flags & FPT_NO_FAST_ALGORITHM)
01554   {
01555     return;
01556   }
01557 
01558   if (flags & FPT_FUNCTION_VALUES)
01559   {
01560     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01561       (double*)set->work, NULL, 2, 1, kinds, 0U);
01562     fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01563     fftw_destroy_plan(plan);
01564     for (k = 0; k <= k_end; k++)
01565     {
01566       set->result[k] *= 0.5;
01567     }
01568   }
01569   else
01570   {
01571     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01572   }
01573 
01574   /* Initialize working arrays. */
01575   memset(set->work,0U,2*Nk*sizeof(double _Complex));
01576 
01577   /* The last step is now the first step. */
01578   for (k = 0; k <= k_end; k++)
01579   {
01580     set->work[k] = data->gamma_m1*set->result[k];
01581   }
01582   //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
01583 
01584   set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01585     data->alpha_0*set->result[1]);
01586   for (k = 1; k < k_end; k++)
01587   {
01588     set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01589       data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01590   }
01591   if (k_end<Nk)
01592   {
01593     memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01594   }
01595 
01597   memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01598 
01599   /* Compute the remaining steps. */
01600   plength = Nk;
01601   for (tau = tk-1; tau >= 1; tau--)
01602   {
01603     /* Compute first l. */
01604     firstl = FIRST_L(k_start_tilde,plength);
01605     /* Compute last l. */
01606     lastl = LAST_L(k_end_tilde,plength);
01607 
01608     /* Compute the multiplication steps. */
01609     for (l = firstl; l <= lastl; l++)
01610     {
01611       /* Initialize second half of coefficient arrays with zeros. */
01612       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01613       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01614 
01615       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01616         (plength/2)*sizeof(double _Complex));
01617 
01618       /* Get matrix U_{n,tau,l} */
01619       step = &(data->steps[tau][l]);
01620 
01621       /* Check if step is stable. */
01622       if (step->stable)
01623       {
01624         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01625         {
01626           /* Multiply third and fourth polynomial with matrix U. */
01627           fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01628             step->a21[0], step->a22[0], step->g[0], tau, set);
01629         }
01630         else
01631         {
01632           /* Multiply third and fourth polynomial with matrix U. */
01633           fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01634             step->a21[0], step->a22[0], step->g[0], tau, set);
01635         }
01636         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01637 
01638         for (k = 0; k < plength; k++)
01639         {
01640           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01641         }
01642       }
01643       else
01644       {
01645         /* Stabilize. */
01646         plength_stab = step->Ns;
01647         t_stab = step->ts;
01648 
01649         memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01650         memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01651 
01652         /* Multiply third and fourth polynomial with matrix U. */
01653         if (set->flags & FPT_AL_SYMMETRY)
01654         {
01655           if (m <= 1)
01656           {
01657             fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01658               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01659           }
01660           else if (m%2 == 0)
01661           {
01662             fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01663               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01664           }
01665           else
01666           {
01667             fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01668               step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01669           }
01670         }
01671         else
01672         {
01673             fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01674               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01675         }
01676 
01677         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01678 
01679         for (k = 0; k < plength; k++)
01680         {
01681           set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01682         }
01683        }
01684     }
01685     /* Half the length of polynomial arrays. */
01686     plength = plength>>1;
01687   }
01688 
01689   /* First step */
01690   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01691   {
01692     x[k] = set->work[2*(data->k_start+k)];
01693   }
01694   if (k_end == Nk)
01695   {
01696     x[Nk-data->k_start] =
01697         data->gammaN[tk-2]*set->work[2*(Nk-2)]
01698       + data->betaN[tk-2] *set->work[2*(Nk-1)]
01699       + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01700   }
01701 }
01702 
01703 void fpt_finalize(fpt_set set)
01704 {
01705   int tau;
01706   int l;
01707   int m;
01708   fpt_data *data;
01709   int k_start_tilde;
01710   int N_tilde;
01711   int firstl, lastl;
01712   int plength;
01713   const int M = set->M;
01714 
01715   /* TODO Clean up DPT transform data structures. */
01716   for (m = 0; m < M; m++)
01717   {
01718     /* Check if precomputed. */
01719     data = &set->dpt[m];
01720     if (data->steps != (fpt_step**)NULL)
01721     {
01722       nfft_free(data->alphaN);
01723       nfft_free(data->betaN);
01724       nfft_free(data->gammaN);
01725       data->alphaN = NULL;
01726       data->betaN = NULL;
01727       data->gammaN = NULL;
01728 
01729       /* Free precomputed data. */
01730       k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01731         /*set->N*/);
01732       N_tilde = N_TILDE(set->N);
01733       plength = 4;
01734       for (tau = 1; tau < set->t; tau++)
01735       {
01736         /* Compute first l. */
01737         firstl = FIRST_L(k_start_tilde,plength);
01738         /* Compute last l. */
01739         lastl = LAST_L(N_tilde,plength);
01740 
01741         /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
01742         for (l = firstl; l <= lastl; l++)
01743         {
01744           /* Free components. */
01745           nfft_free(data->steps[tau][l].a11[0]);
01746           nfft_free(data->steps[tau][l].a12[0]);
01747           nfft_free(data->steps[tau][l].a21[0]);
01748           nfft_free(data->steps[tau][l].a22[0]);
01749           data->steps[tau][l].a11[0] = NULL;
01750           data->steps[tau][l].a12[0] = NULL;
01751           data->steps[tau][l].a21[0] = NULL;
01752           data->steps[tau][l].a22[0] = NULL;
01753           /* Free components. */
01754           nfft_free(data->steps[tau][l].a11);
01755           nfft_free(data->steps[tau][l].a12);
01756           nfft_free(data->steps[tau][l].a21);
01757           nfft_free(data->steps[tau][l].a22);
01758           nfft_free(data->steps[tau][l].g);
01759           data->steps[tau][l].a11 = NULL;
01760           data->steps[tau][l].a12 = NULL;
01761           data->steps[tau][l].a21 = NULL;
01762           data->steps[tau][l].a22 = NULL;
01763           data->steps[tau][l].g = NULL;
01764         }
01765         /* Free pointers for current level. */
01766         nfft_free(data->steps[tau]);
01767         data->steps[tau] = NULL;
01768         /* Double length of polynomials. */
01769         plength = plength<<1;
01770       }
01771       /* Free steps. */
01772       nfft_free(data->steps);
01773       data->steps = NULL;
01774     }
01775 
01776     if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01777     {
01778       /* Check, if recurrence coefficients must be copied. */
01779       //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
01780       if (!(set->flags & FPT_PERSISTENT_DATA))
01781       {
01782         nfft_free(data->alpha);
01783         nfft_free(data->beta);
01784         nfft_free(data->gamma);
01785       }
01786       data->alpha = NULL;
01787       data->beta = NULL;
01788       data->gamma = NULL;
01789     }
01790   }
01791 
01792   /* Delete array of DPT transform data. */
01793   nfft_free(set->dpt);
01794   set->dpt = NULL;
01795 
01796   for (tau = 1; tau < set->t+1; tau++)
01797   {
01798     nfft_free(set->xcvecs[tau-1]);
01799     set->xcvecs[tau-1] = NULL;
01800   }
01801   nfft_free(set->xcvecs);
01802   set->xcvecs = NULL;
01803 
01804   /* Free auxilliary arrays. */
01805   nfft_free(set->work);
01806   nfft_free(set->result);
01807 
01808   /* Check if fast transform is activated. */
01809   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01810   {
01811     /* Free auxilliary arrays. */
01812     nfft_free(set->vec3);
01813     nfft_free(set->vec4);
01814     nfft_free(set->z);
01815     set->work = NULL;
01816     set->result = NULL;
01817     set->vec3 = NULL;
01818     set->vec4 = NULL;
01819     set->z = NULL;
01820 
01821     /* Free FFTW plans. */
01822     for(tau = 0; tau < set->t/*-1*/; tau++)
01823     {
01824       fftw_destroy_plan(set->plans_dct3[tau]);
01825       fftw_destroy_plan(set->plans_dct2[tau]);
01826       set->plans_dct3[tau] = NULL;
01827       set->plans_dct2[tau] = NULL;
01828     }
01829 
01830     nfft_free(set->plans_dct3);
01831     nfft_free(set->plans_dct2);
01832     set->plans_dct3 = NULL;
01833     set->plans_dct2 = NULL;
01834   }
01835 
01836   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01837   {
01838     /* Delete arrays of Chebyshev nodes. */
01839     nfft_free(set->xc_slow);
01840     set->xc_slow = NULL;
01841     nfft_free(set->temp);
01842     set->temp = NULL;
01843   }
01844 
01845   /* Free DPT set structure. */
01846   nfft_free(set);
01847 }

Generated on 16 Sep 2009 by Doxygen 1.5.3