NFFT Logo 3.1.1 API Reference

fastgauss.c

00001 /*
00002  * Copyright (c) 2002, 2009 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: fastgauss.c 3198 2009-05-27 14:16:50Z keiner $ */
00020 
00027 #include <stdio.h>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 #include <sys/resource.h>
00031 #include <time.h>
00032 #include <complex.h>
00033 
00034 #include "nfft3.h"
00035 #include "nfft3util.h"
00036 
00045 #define DGT_PRE_CEXP     (1U<< 0)
00046 
00056 #define FGT_NDFT         (1U<< 1)
00057 
00066 #define FGT_APPROX_B     (1U<< 2)
00067 
00069 typedef struct
00070 {
00071   int N;                                
00072   int M;                                
00074   double _Complex *alpha;                
00075   double _Complex *f;                    
00077   unsigned flags;                       
00080   double _Complex sigma;                 
00082   double *x;                            
00083   double *y;                            
00085   double _Complex *pre_cexp;             
00087   int n;                                
00088   double p;                             
00090   double _Complex *b;                    
00092   nfft_plan *nplan1;                    
00093   nfft_plan *nplan2;                    
00095 } fgt_plan;
00096 
00104 void dgt_trafo(fgt_plan *ths)
00105 {
00106   int j,k,l;
00107 
00108   for(j=0; j<ths->M; j++)
00109     ths->f[j] = 0;
00110 
00111   if(ths->flags & DGT_PRE_CEXP)
00112     for(j=0,l=0; j<ths->M; j++)
00113       for(k=0; k<ths->N; k++,l++)
00114         ths->f[j] += ths->alpha[k]*ths->pre_cexp[l];
00115   else
00116     for(j=0; j<ths->M; j++)
00117       for(k=0; k<ths->N; k++)
00118         ths->f[j] += ths->alpha[k]*cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00119           (ths->y[j]-ths->x[k]));
00120 }
00121 
00129 void fgt_trafo(fgt_plan *ths)
00130 {
00131   int l;
00132 
00133   if(ths->flags & FGT_NDFT)
00134     {
00135       ndft_adjoint(ths->nplan1);
00136 
00137       for(l=0; l<ths->n; l++)
00138         ths->nplan1->f_hat[l] *= ths->b[l];
00139 
00140       ndft_trafo(ths->nplan2);
00141     }
00142   else
00143     {
00144       nfft_adjoint(ths->nplan1);
00145 
00146       for(l=0; l<ths->n; l++)
00147         ths->nplan1->f_hat[l] *= ths->b[l];
00148 
00149       nfft_trafo(ths->nplan2);
00150     }
00151 }
00152 
00167 void fgt_init_guru(fgt_plan *ths, int N, int M, double _Complex sigma, int n,
00168        double p, int m, unsigned flags)
00169 {
00170   int j,n_fftw;
00171   fftw_plan fplan;
00172 
00173   ths->M = M;
00174   ths->N = N;
00175   ths->sigma = sigma;
00176   ths->flags = flags;
00177 
00178   ths->x = (double*)nfft_malloc(ths->N*sizeof(double));
00179   ths->y = (double*)nfft_malloc(ths->M*sizeof(double));
00180   ths->alpha = (double _Complex*)nfft_malloc(ths->N*sizeof(double _Complex));
00181   ths->f = (double _Complex*)nfft_malloc(ths->M*sizeof(double _Complex));
00182 
00183   ths->n = n;
00184   ths->p = p;
00185 
00186   ths->b = (double _Complex*)nfft_malloc(ths->n*sizeof(double _Complex));
00187 
00188   ths->nplan1 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00189   ths->nplan2 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00190 
00191   n_fftw=nfft_next_power_of_2(2*ths->n);
00192 
00193   nfft_init_guru(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT|
00194                  PRE_PSI| MALLOC_X| MALLOC_F_HAT| FFTW_INIT, FFTW_MEASURE);
00195   nfft_init_guru(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT|
00196                  PRE_PSI| MALLOC_X| FFTW_INIT, FFTW_MEASURE);
00197 
00198   ths->nplan1->f = ths->alpha;
00199   ths->nplan2->f_hat = ths->nplan1->f_hat;
00200   ths->nplan2->f = ths->f;
00201 
00202   if(ths->flags & FGT_APPROX_B)
00203     {
00204       fplan = fftw_plan_dft_1d(ths->n, ths->b, ths->b, FFTW_FORWARD,
00205                                FFTW_MEASURE);
00206 
00207       for(j=0; j<ths->n; j++)
00208   ths->b[j] = cexp(-ths->p*ths->p*ths->sigma*(j-ths->n/2)*(j-ths->n/2)/
00209                           ((double)ths->n*ths->n)) / ths->n;
00210 
00211       nfft_fftshift_complex(ths->b, 1, &ths->n);
00212       fftw_execute(fplan);
00213       nfft_fftshift_complex(ths->b, 1, &ths->n);
00214 
00215       fftw_destroy_plan(fplan);
00216     }
00217   else
00218     {
00219       for(j=0; j<ths->n; j++)
00220   ths->b[j] = 1.0/ths->p * csqrt(PI/ths->sigma)*
00221     cexp(-PI*PI*(j-ths->n/2)*(j-ths->n/2)/
00222          (ths->p*ths->p*ths->sigma));
00223     }
00224 }
00225 
00237 void fgt_init(fgt_plan *ths, int N, int M, double _Complex sigma, double eps)
00238 {
00239   double p;
00240   int n;
00241 
00242   p=0.5+sqrt(-log(eps)/creal(sigma));
00243   if(p<1)
00244     p=1;
00245 
00246   n=2*((int)ceil(p*cabs(sigma)/PI * sqrt(-log(eps)/creal(sigma))));
00247 
00248   if(N*M<=((int)(1U<<20)))
00249     fgt_init_guru(ths, N, M, sigma, n, p, 7, DGT_PRE_CEXP);
00250   else
00251     fgt_init_guru(ths, N, M, sigma, n, p, 7, 0);
00252 }
00253 
00260 void fgt_init_node_dependent(fgt_plan *ths)
00261 {
00262   int j,k,l;
00263 
00264   if(ths->flags & DGT_PRE_CEXP)
00265    {
00266      ths->pre_cexp=(double _Complex*)nfft_malloc(ths->M*ths->N*
00267             sizeof(double _Complex));
00268 
00269      for(j=0,l=0; j<ths->M; j++)
00270        for(k=0; k<ths->N; k++,l++)
00271          ths->pre_cexp[l]=cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00272                                 (ths->y[j]-ths->x[k]));
00273    }
00274 
00275   for(j=0; j<ths->nplan1->M_total; j++)
00276     ths->nplan1->x[j] = ths->x[j]/ths->p;
00277   for(j=0; j<ths->nplan2->M_total; j++)
00278     ths->nplan2->x[j] = ths->y[j]/ths->p;
00279 
00280   if(ths->nplan1->nfft_flags & PRE_PSI)
00281     nfft_precompute_psi(ths->nplan1);
00282   if(ths->nplan2->nfft_flags & PRE_PSI)
00283     nfft_precompute_psi(ths->nplan2);
00284 }
00285 
00292 void fgt_finalize(fgt_plan *ths)
00293 {
00294   nfft_finalize(ths->nplan2);
00295   nfft_finalize(ths->nplan1);
00296 
00297   nfft_free(ths->nplan2);
00298   nfft_free(ths->nplan1);
00299 
00300   nfft_free(ths->b);
00301 
00302   nfft_free(ths->f);
00303   nfft_free(ths->y);
00304 
00305   nfft_free(ths->alpha);
00306   nfft_free(ths->x);
00307 }
00308 
00315 void fgt_test_init_rand(fgt_plan *ths)
00316 {
00317   int j,k;
00318 
00319   for(k=0; k<ths->N; k++)
00320     ths->x[k] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00321 
00322   for(j=0; j<ths->M; j++)
00323     ths->y[j] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00324 
00325   for(k=0; k<ths->N; k++)
00326     ths->alpha[k] =   (double)rand()/(RAND_MAX)-1.0/2.0
00327             + _Complex_I*(double)rand()/(RAND_MAX)-I/2.0;
00328 }
00329 
00338 double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
00339 {
00340   int r;
00341   double t_out,t;
00342   double tau=0.01;
00343 
00344   t_out=0;
00345   r=0;
00346   while(t_out<tau)
00347     {
00348       r++;
00349       if(dgt)
00350         {
00351           t=nfft_second();
00352           dgt_trafo(ths);
00353         }
00354       else
00355         {
00356           t=nfft_second();
00357           fgt_trafo(ths);
00358         }
00359       t_out+=nfft_second()-t;
00360     }
00361   t_out/=r;
00362 
00363   return t_out;
00364 }
00365 
00375 void fgt_test_simple(int N, int M, double _Complex sigma, double eps)
00376 {
00377   fgt_plan my_plan;
00378   double _Complex *swap_dgt;
00379 
00380   fgt_init(&my_plan, N, M, sigma, eps);
00381   swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*sizeof(double _Complex));
00382 
00383   fgt_test_init_rand(&my_plan);
00384   fgt_init_node_dependent(&my_plan);
00385 
00386   NFFT_SWAP_complex(swap_dgt,my_plan.f);
00387   dgt_trafo(&my_plan);
00388   nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform");
00389   NFFT_SWAP_complex(swap_dgt,my_plan.f);
00390 
00391   fgt_trafo(&my_plan);
00392   nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform");
00393 
00394   printf("\n relative error: %1.3e\n", nfft_error_l_infty_1_complex(swap_dgt,
00395          my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00396 
00397   nfft_free(swap_dgt);
00398   fgt_finalize(&my_plan);
00399 }
00400 
00410 void fgt_test_andersson(void)
00411 {
00412   fgt_plan my_plan;
00413   double _Complex *swap_dgt;
00414   int N;
00415 
00416   double _Complex sigma=4*(138+ _Complex_I*100);
00417   int n=128;
00418   int N_dgt_pre_exp=(int)(1U<<11);
00419   int N_dgt=(int)(1U<<19);
00420 
00421   printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma));
00422 
00423   for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1)
00424     {
00425       printf("$%d$\t & ",N);
00426 
00427       if(N<N_dgt_pre_exp)
00428         fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP);
00429       else
00430         fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0);
00431 
00432       swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*
00433                 sizeof(double _Complex));
00434 
00435       fgt_test_init_rand(&my_plan);
00436 
00437       fgt_init_node_dependent(&my_plan);
00438 
00439       if(N<N_dgt)
00440   {
00441           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00442           if(N<N_dgt_pre_exp)
00443             my_plan.flags^=DGT_PRE_CEXP;
00444 
00445     printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00446           if(N<N_dgt_pre_exp)
00447             my_plan.flags^=DGT_PRE_CEXP;
00448           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00449   }
00450       else
00451   printf("\t\t & ");
00452 
00453       if(N<N_dgt_pre_exp)
00454   printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00455       else
00456   printf("\t\t & ");
00457 
00458       my_plan.flags^=FGT_NDFT;
00459       printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00460       my_plan.flags^=FGT_NDFT;
00461 
00462       printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00463 
00464       printf("$%1.1e$\t \\\\ \n",
00465        nfft_error_l_infty_1_complex(swap_dgt, my_plan.f, my_plan.M,
00466             my_plan.alpha, my_plan.N));
00467       fflush(stdout);
00468 
00469       nfft_free(swap_dgt);
00470       fgt_finalize(&my_plan);
00471       fftw_cleanup();
00472     }
00473 }
00474 
00481 void fgt_test_error(void)
00482 {
00483   fgt_plan my_plan;
00484   double _Complex *swap_dgt;
00485   int n,mi;
00486 
00487   double _Complex sigma=4*(138+ _Complex_I*100);
00488   int N=1000;
00489   int M=1000;
00490   int m[2]={7,3};
00491 
00492   printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00493   printf("error=[\n");
00494 
00495   swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00496 
00497   for(n=8; n<=128; n+=4)
00498     {
00499       printf("%d\t",n);
00500       for(mi=0;mi<2;mi++)
00501         {
00502           fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
00503           fgt_test_init_rand(&my_plan);
00504           fgt_init_node_dependent(&my_plan);
00505 
00506           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00507           dgt_trafo(&my_plan);
00508           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00509 
00510           fgt_trafo(&my_plan);
00511 
00512           printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00513                  my_plan.M, my_plan.alpha, my_plan.N));
00514           fflush(stdout);
00515 
00516           fgt_finalize(&my_plan);
00517           fftw_cleanup();
00518         }
00519       printf("\n");
00520     }
00521   printf("];\n");
00522 
00523   nfft_free(swap_dgt);
00524 }
00525 
00532 void fgt_test_error_p(void)
00533 {
00534   fgt_plan my_plan;
00535   double _Complex *swap_dgt;
00536   int n,pi;
00537 
00538   double _Complex sigma=20+40*_Complex_I;
00539   int N=1000;
00540   int M=1000;
00541   double p[3]={1,1.5,2};
00542 
00543   printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00544   printf("error=[\n");
00545 
00546   swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00547 
00548   for(n=8; n<=128; n+=4)
00549     {
00550       printf("%d\t",n);
00551       for(pi=0;pi<3;pi++)
00552         {
00553           fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
00554           fgt_test_init_rand(&my_plan);
00555           fgt_init_node_dependent(&my_plan);
00556 
00557           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00558           dgt_trafo(&my_plan);
00559           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00560 
00561           fgt_trafo(&my_plan);
00562 
00563           printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00564                  my_plan.M, my_plan.alpha, my_plan.N));
00565           fflush(stdout);
00566 
00567           fgt_finalize(&my_plan);
00568           fftw_cleanup();
00569         }
00570       printf("\n");
00571     }
00572   printf("];\n");
00573 }
00574 
00580 int main(int argc,char **argv)
00581 {
00582   if(argc!=2)
00583     {
00584       fprintf(stderr,"fastgauss type\n");
00585       fprintf(stderr," type\n");
00586       fprintf(stderr,"  0 - Simple test.\n");
00587       fprintf(stderr,"  1 - Compares accuracy and execution time.\n");
00588       fprintf(stderr,"      Pipe to output_andersson.tex\n");
00589       fprintf(stderr,"  2 - Compares accuracy.\n");
00590       fprintf(stderr,"      Pipe to output_error.m\n");
00591       fprintf(stderr,"  3 - Compares accuracy.\n");
00592       fprintf(stderr,"      Pipe to output_error_p.m\n");
00593       return -1;
00594     }
00595 
00596   if(atoi(argv[1])==0)
00597     fgt_test_simple(10, 10, 5+3*_Complex_I, 0.001);
00598 
00599   if(atoi(argv[1])==1)
00600     fgt_test_andersson();
00601 
00602   if(atoi(argv[1])==2)
00603     fgt_test_error();
00604 
00605   if(atoi(argv[1])==3)
00606     fgt_test_error_p();
00607 
00608   return 1;
00609 }
00610 /* \} */

Generated on 17 Aug 2009 by Doxygen 1.5.3