NFFT Logo 3.1.1 API Reference

nfft.c

00001 /*
00002  * Copyright (c) 2002, 2009 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: nfft.c 3198 2009-05-27 14:16:50Z keiner $ */
00020 
00026 #include <stdio.h>
00027 #include <math.h>
00028 #include <string.h>
00029 #include <stdlib.h>
00030 
00031 #include <complex.h>
00032 
00033 #include "nfft3util.h"
00034 #include "nfft3.h"
00035 #include "infft.h"
00036 
00059 #define MACRO_ndft_init_result_trafo memset(f,0,ths->M_total*                 \
00060                                             sizeof(double _Complex));
00061 #define MACRO_ndft_init_result_conjugated MACRO_ndft_init_result_trafo
00062 #define MACRO_ndft_init_result_adjoint memset(f_hat,0,ths->N_total*           \
00063                 sizeof(double _Complex));
00064 #define MACRO_ndft_init_result_transposed MACRO_ndft_init_result_adjoint
00065 
00066 #define MACRO_ndft_sign_trafo      +2*PI*ths->x[j*ths->d+t]
00067 #define MACRO_ndft_sign_conjugated -2*PI*ths->x[j*ths->d+t]
00068 #define MACRO_ndft_sign_adjoint    +2*PI*ths->x[j*ths->d+t]
00069 #define MACRO_ndft_sign_transposed -2*PI*ths->x[j*ths->d+t]
00070 
00071 #define MACRO_init_k_N_Omega_x(which_one) {                                   \
00072 for(t=0; t<ths->d; t++)                                                       \
00073   {                                                                           \
00074     k[t]=-ths->N[t]/2;                                                        \
00075     x[t]= MACRO_ndft_sign_ ## which_one;                                      \
00076     Omega[t+1]=k[t]*x[t]+Omega[t];                                            \
00077   }                                                                           \
00078 omega=Omega[ths->d];                                                          \
00079 }                                                                             \
00080 
00081 #define MACRO_count_k_N_Omega {                                               \
00082 for(t = ths->d-1; (t >= 1) && (k[t] == ths->N[t]/2-1); t--)                   \
00083   k[t]-= ths->N[t]-1;                                                         \
00084                                                                               \
00085 k[t]++;                                                                       \
00086                                                                               \
00087 for(t2 = t; t2<ths->d; t2++)                                                  \
00088   Omega[t2+1]=k[t2]*x[t2]+Omega[t2];                                          \
00089                                                                               \
00090 omega=Omega[ths->d];                                                          \
00091 }
00092 
00093 #define MACRO_ndft_compute_trafo (*fj) += (*f_hat_k)*cexp(-_Complex_I*omega);
00094 
00095 #define MACRO_ndft_compute_conjugated MACRO_ndft_compute_trafo
00096 
00097 #define MACRO_ndft_compute_adjoint (*f_hat_k) += (*fj)*cexp(+ _Complex_I*omega);
00098 
00099 #define MACRO_ndft_compute_transposed MACRO_ndft_compute_adjoint
00100 
00101 #define MACRO_ndft(which_one)                                                 \
00102 void ndft_ ## which_one (nfft_plan *ths)                                      \
00103 {                                                                             \
00104   int j;                                \
00105   int t,t2;                             \
00106   int k_L;                              \
00107   double _Complex *f_hat, *f;           \
00108   double _Complex *f_hat_k;             \
00109   double _Complex *fj;                  \
00110   double x[ths->d];                     \
00111   int k[ths->d];                        \
00112   double omega, Omega[ths->d+1];        \
00113                                                                               \
00114   f_hat=ths->f_hat; f=ths->f;                                                 \
00115                                                                               \
00116   MACRO_ndft_init_result_ ## which_one                                        \
00117                                                                               \
00118   if(ths->d==1) /* univariate case (due to performance) */                    \
00119     {                                                                         \
00120       t=0;                                                                    \
00121       for(j=0, fj = f; j<ths->M_total; j++, fj++)                             \
00122         {                                                                     \
00123     for(k_L=0, f_hat_k = f_hat; k_L<ths->N_total; k_L++, f_hat_k++)     \
00124       {                                                                 \
00125         omega=(k_L-ths->N_total/2)* MACRO_ndft_sign_ ## which_one;      \
00126               MACRO_ndft_compute_ ## which_one;                               \
00127       }                                                                 \
00128         }                                                                     \
00129     }                                                                         \
00130   else /* multivariate case */                        \
00131     {                                                                         \
00132       Omega[0]=0;                                                             \
00133       for(j=0, fj=f; j<ths->M_total; j++, fj++)                               \
00134         {                                                                     \
00135           MACRO_init_k_N_Omega_x(which_one);                                  \
00136           for(k_L=0, f_hat_k=f_hat; k_L<ths->N_total; k_L++, f_hat_k++)       \
00137       {                                                                 \
00138               MACRO_ndft_compute_ ## which_one;                               \
00139         MACRO_count_k_N_Omega;                                          \
00140       } /* for(k_L) */                                                  \
00141         } /* for(j) */                                                        \
00142     } /* else */                                                              \
00143 } /* ndft_trafo */
00144 
00145 
00148 MACRO_ndft(trafo)
00149 MACRO_ndft(adjoint)
00150 
00176 static void nfft_uo(const nfft_plan *ths,const int j,int *up,int *op,const int act_dim)
00177 {
00178   double xj=ths->x[j*ths->d+act_dim];
00179   int c = LRINT(xj * ths->n[act_dim]);
00180 
00181   if(xj < 0)
00182     {
00183       (*up) = c-1-(ths->m);
00184       (*op) = c  +(ths->m);
00185     }
00186   else
00187     {
00188       (*up) = c  -(ths->m);
00189       (*op) = c+1+(ths->m);
00190     }
00191 }
00192 
00193 static void nfft_uo2(int *u, int *o, const double x, const int n, const int m)
00194 {
00195   int c = LRINT(x * n);
00196 
00197   if(x < 0)
00198     {
00199       *u=(c-1-m+n)%n;
00200       *o=(c+  m+n)%n;
00201     }
00202   else
00203     {
00204       *u=(c  -m+n)%n;
00205       *o=(c+1+m+n)%n;
00206     }
00207 }
00208 
00209 
00210 #define MACRO_nfft_D_compute_A {                                              \
00211  g_hat[k_plain[ths->d]] = f_hat[ks_plain[ths->d]] * c_phi_inv_k[ths->d];      \
00212 }
00213 
00214 #define MACRO_nfft_D_compute_T {                                              \
00215  f_hat[ks_plain[ths->d]] = g_hat[k_plain[ths->d]] * c_phi_inv_k[ths->d];      \
00216 }
00217 
00218 #define MACRO_nfft_D_init_result_A  memset(g_hat,0,ths->n_total*              \
00219              sizeof(double _Complex));
00220 #define MACRO_nfft_D_init_result_T memset(f_hat,0,ths->N_total*               \
00221                                            sizeof(double _Complex));
00222 
00223 #define MACRO_with_PRE_PHI_HUT * ths->c_phi_inv[t2][ks[t2]];
00224 #define MACRO_without_PRE_PHI_HUT / (PHI_HUT(ks[t2]-ths->N[t2]/2,t2));
00225 
00226 #define MACRO_init_k_ks {                                                     \
00227   for(t = ths->d-1; t>=0; t--)                                                \
00228     {                                                                         \
00229       kp[t]= 0;                                                               \
00230       k[t] = 0;                                                               \
00231       ks[t] = ths->N[t]/2;                                                    \
00232     }                                                                         \
00233   t++;                                                                        \
00234 }
00235 
00236 #define MACRO_update_c_phi_inv_k(which_one) {                                 \
00237   for(t2=t; t2<ths->d; t2++)                                                  \
00238     {                                                                         \
00239       c_phi_inv_k[t2+1]= c_phi_inv_k[t2] MACRO_ ##which_one;                  \
00240       ks_plain[t2+1]= ks_plain[t2]*ths->N[t2]+ks[t2];                         \
00241       k_plain[t2+1]= k_plain[t2]*ths->n[t2]+k[t2];                            \
00242     }                                                                         \
00243 }
00244 
00245 #define MACRO_count_k_ks {                                                    \
00246   for(t=ths->d-1; (t>0)&& (kp[t]==ths->N[t]-1); t--)                          \
00247     {                                                                         \
00248       kp[t]= 0;                                                               \
00249       k[t]= 0;                                                                \
00250       ks[t]= ths->N[t]/2;                                                     \
00251     }                                                                         \
00252                                                                               \
00253   kp[t]++; k[t]++; ks[t]++;                                                   \
00254   if(kp[t]==ths->N[t]/2)                                                      \
00255     {                                                                         \
00256       k[t]= ths->n[t]-ths->N[t]/2;                                            \
00257       ks[t]= 0;                                                               \
00258     }                                                                         \
00259 }                                                                             \
00260 
00261 
00265 #define MACRO_nfft_D(which_one)                                               \
00266 static inline void nfft_D_ ## which_one (nfft_plan *ths)                      \
00267 {                                                                             \
00268   int t, t2;                            \
00269   int k_L;                              \
00270   int kp[ths->d];                       \
00271   int k[ths->d];                        \
00272   int ks[ths->d];                       \
00273   double c_phi_inv_k[ths->d+1];         \
00274   int k_plain[ths->d+1];                \
00275   int ks_plain[ths->d+1];               \
00276   double _Complex *f_hat, *g_hat;       \
00277                                                                               \
00278   f_hat=ths->f_hat; g_hat=ths->g_hat;                                         \
00279   MACRO_nfft_D_init_result_ ## which_one;                                     \
00280                                                                               \
00281   c_phi_inv_k[0]=1;                                                           \
00282   k_plain[0]=0;                                                               \
00283   ks_plain[0]=0;                                                              \
00284                                                                               \
00285   if(ths->nfft_flags & PRE_PHI_HUT)                                           \
00286     {                                                                         \
00287       MACRO_init_k_ks;                                                        \
00288                                                                               \
00289       for(k_L=0; k_L<ths->N_total; k_L++)                                     \
00290   {                                                                     \
00291           MACRO_update_c_phi_inv_k(with_PRE_PHI_HUT);                         \
00292                                                                               \
00293     MACRO_nfft_D_compute_ ## which_one;                                 \
00294                                                                         \
00295     MACRO_count_k_ks;                                                   \
00296   } /* for(k_L) */                                                      \
00297     } /* if(PRE_PHI_HUT) */                                                   \
00298   else                                                                        \
00299     {                                                                         \
00300       MACRO_init_k_ks;                                                        \
00301                                                                               \
00302       for(k_L=0; k_L<ths->N_total; k_L++)                                     \
00303     {                                                                   \
00304         MACRO_update_c_phi_inv_k(without_PRE_PHI_HUT);                        \
00305                                                                               \
00306       MACRO_nfft_D_compute_ ## which_one;                               \
00307                                                                         \
00308       MACRO_count_k_ks;                                                 \
00309     } /* for(k_L) */                                                    \
00310     } /* else(PRE_PHI_HUT) */                                                 \
00311 } /* nfft_D */
00312 
00313 MACRO_nfft_D(A)
00314 MACRO_nfft_D(T)
00315 
00319 #define MACRO_nfft_B_init_result_A  memset(f,0,ths->M_total*                  \
00320                                            sizeof(double _Complex));
00321 #define MACRO_nfft_B_init_result_T memset(g,0,ths->n_total*                   \
00322                                           sizeof(double _Complex));
00323 
00324 #define MACRO_nfft_B_PRE_FULL_PSI_compute_A {                                 \
00325   (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]];            \
00326 }
00327 
00328 #define MACRO_nfft_B_PRE_FULL_PSI_compute_T {                                 \
00329   g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj);                            \
00330 }
00331 
00332 #define MACRO_nfft_B_compute_A {                                              \
00333   (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]];                            \
00334 }
00335 
00336 #define MACRO_nfft_B_compute_T {                                              \
00337   g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj);                            \
00338 }
00339 
00340 #define MACRO_with_FG_PSI fg_psi[t2][lj[t2]]
00341 
00342 #define MACRO_with_PRE_PSI     ths->psi[(j*ths->d+t2)*(2*ths->m+2)+lj[t2]]
00343 
00344 #define MACRO_without_PRE_PSI  PHI(ths->x[j*ths->d+t2]-                       \
00345                                    ((double)l[t2])/ths->n[t2], t2)
00346 
00347 #define MACRO_init_uo_l_lj_t {                                                \
00348   for(t = ths->d-1; t>=0; t--)                                                \
00349     {                                                                         \
00350       nfft_uo(ths,j,&u[t],&o[t],t);                                           \
00351       l[t] = u[t];                                                            \
00352       lj[t] = 0;                                                              \
00353     } /* for(t) */                                                            \
00354   t++;                                                                        \
00355 }
00356 
00357 #define MACRO_update_phi_prod_ll_plain(which_one) {                           \
00358   for(t2=t; t2<ths->d; t2++)                                                  \
00359     {                                                                         \
00360       phi_prod[t2+1]=phi_prod[t2]* MACRO_ ## which_one;                       \
00361       ll_plain[t2+1]=ll_plain[t2]*ths->n[t2] +(l[t2]+ths->n[t2])%ths->n[t2];  \
00362     } /* for(t2) */                                                           \
00363 }
00364 
00365 #define MACRO_count_uo_l_lj_t {                                               \
00366   for(t = ths->d-1; (t>0)&&(l[t]==o[t]); t--)                                 \
00367     {                                                                         \
00368       l[t] = u[t];                                                            \
00369       lj[t] = 0;                                                              \
00370     } /* for(t) */                                                            \
00371                                                                               \
00372   l[t]++;                                                                     \
00373   lj[t]++;                                                                    \
00374 }
00375 
00376 #define MACRO_nfft_B(which_one)                                               \
00377 static inline void nfft_B_ ## which_one (nfft_plan *ths)                      \
00378 {                                                                             \
00379   int lprod;                            \
00380   int u[ths->d], o[ths->d];             \
00381   int t, t2;                            \
00382   int j;                                \
00383   int l_L, ix;                          \
00384   int l[ths->d];                        \
00385   int lj[ths->d];                       \
00386   int ll_plain[ths->d+1];               \
00387   double phi_prod[ths->d+1];            \
00388   double _Complex *f, *g;               \
00389   double _Complex *fj;                  \
00390   double y[ths->d];                                                           \
00391   double fg_psi[ths->d][2*ths->m+2];                                          \
00392   double fg_exp_l[ths->d][2*ths->m+2];                                        \
00393   int l_fg,lj_fg;                                                             \
00394   double tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3;                       \
00395   double ip_w;                                                                \
00396   int ip_u;                                                                   \
00397   int ip_s=ths->K/(ths->m+1);                                                 \
00398                                                                               \
00399   f=ths->f; g=ths->g;                                                         \
00400                                                                               \
00401   MACRO_nfft_B_init_result_ ## which_one;                                     \
00402                                                                               \
00403   if(ths->nfft_flags & PRE_FULL_PSI)                                          \
00404     {                                                                         \
00405       for(ix=0, j=0, fj=f; j<ths->M_total; j++, fj++)                         \
00406         for(l_L=0; l_L<ths->psi_index_f[j]; l_L++, ix++)                      \
00407     MACRO_nfft_B_PRE_FULL_PSI_compute_ ## which_one;                    \
00408       return;                                                                 \
00409     }                                                                         \
00410                                                                               \
00411   phi_prod[0]=1;                                                              \
00412   ll_plain[0]=0;                                                              \
00413                                                                               \
00414   for(t=0,lprod = 1; t<ths->d; t++)                                           \
00415     lprod *= (2*ths->m+2);                                                    \
00416                                                                               \
00417   if(ths->nfft_flags & PRE_PSI)                                               \
00418     {                                                                         \
00419       for(j=0, fj=f; j<ths->M_total; j++, fj++)                               \
00420   {                                                                     \
00421           MACRO_init_uo_l_lj_t;                                               \
00422                                                                               \
00423     for(l_L=0; l_L<lprod; l_L++)                                        \
00424       {                                                                 \
00425               MACRO_update_phi_prod_ll_plain(with_PRE_PSI);                   \
00426                                                                               \
00427         MACRO_nfft_B_compute_ ## which_one;                             \
00428                                                                   \
00429         MACRO_count_uo_l_lj_t;                                          \
00430             } /* for(l_L) */                                                  \
00431   } /* for(j) */                                                        \
00432       return;                                                                 \
00433     } /* if(PRE_PSI) */                                                       \
00434                                                                               \
00435   if(ths->nfft_flags & PRE_FG_PSI)                                            \
00436     {                                                                         \
00437       for(t2=0; t2<ths->d; t2++)                                              \
00438         {                                                                     \
00439           tmpEXP2 = exp(-1.0/ths->b[t2]);                                     \
00440           tmpEXP2sq = tmpEXP2*tmpEXP2;                                        \
00441           tmp2 = 1.0;                                                         \
00442           tmp3 = 1.0;                                                         \
00443           fg_exp_l[t2][0] = 1.0;                                              \
00444           for(lj_fg=1; lj_fg <= (2*ths->m+2); lj_fg++)                        \
00445             {                                                                 \
00446               tmp3 = tmp2*tmpEXP2;                                            \
00447               tmp2 *= tmpEXP2sq;                                              \
00448               fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;               \
00449             }                                                                 \
00450         }                                                                     \
00451       for(j=0, fj=f; j<ths->M_total; j++, fj++)                               \
00452   {                                                                     \
00453           MACRO_init_uo_l_lj_t;                                               \
00454                                                                               \
00455           for(t2=0; t2<ths->d; t2++)                                          \
00456             {                                                                 \
00457               fg_psi[t2][0] = ths->psi[2*(j*ths->d+t2)];                      \
00458               tmpEXP1 = ths->psi[2*(j*ths->d+t2)+1];                          \
00459               tmp1 = 1.0;                                                     \
00460               for(l_fg=u[t2]+1, lj_fg=1; l_fg <= o[t2]; l_fg++, lj_fg++)      \
00461                 {                                                             \
00462                   tmp1 *= tmpEXP1;                                            \
00463                   fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg]; \
00464                 }                                                             \
00465             }                                                                 \
00466                                                                               \
00467     for(l_L=0; l_L<lprod; l_L++)                                        \
00468       {                                                                 \
00469               MACRO_update_phi_prod_ll_plain(with_FG_PSI);                    \
00470                                                                               \
00471         MACRO_nfft_B_compute_ ## which_one;                             \
00472                                                                   \
00473         MACRO_count_uo_l_lj_t;                                          \
00474             } /* for(l_L) */                                                  \
00475   } /* for(j) */                                                        \
00476       return;                                                                 \
00477     } /* if(PRE_FG_PSI) */                                                    \
00478                                                                               \
00479   if(ths->nfft_flags & FG_PSI)                                                \
00480     {                                                                         \
00481       for(t2=0; t2<ths->d; t2++)                                              \
00482         {                                                                     \
00483           tmpEXP2 = exp(-1.0/ths->b[t2]);                                     \
00484           tmpEXP2sq = tmpEXP2*tmpEXP2;                                        \
00485           tmp2 = 1.0;                                                         \
00486           tmp3 = 1.0;                                                         \
00487           fg_exp_l[t2][0] = 1.0;                                              \
00488           for(lj_fg=1; lj_fg <= (2*ths->m+2); lj_fg++)                        \
00489             {                                                                 \
00490               tmp3 = tmp2*tmpEXP2;                                            \
00491               tmp2 *= tmpEXP2sq;                                              \
00492               fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;               \
00493             }                                                                 \
00494         }                                                                     \
00495       for(j=0, fj=f; j<ths->M_total; j++, fj++)                               \
00496   {                                                                     \
00497           MACRO_init_uo_l_lj_t;                                               \
00498                                                                               \
00499           for(t2=0; t2<ths->d; t2++)                                          \
00500             {                                                                 \
00501               fg_psi[t2][0] =                                                 \
00502                 (PHI((ths->x[j*ths->d+t2]-((double)u[t2])/ths->n[t2]),t2));   \
00503                                                                               \
00504               tmpEXP1 = exp(2.0*(ths->n[t2]*ths->x[j*ths->d+t2] - u[t2])      \
00505                       / ths->b[t2]);                                          \
00506               tmp1 = 1.0;                                                     \
00507               for(l_fg=u[t2]+1, lj_fg=1; l_fg <= o[t2]; l_fg++, lj_fg++)      \
00508                 {                                                             \
00509                   tmp1 *= tmpEXP1;                                            \
00510                   fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg]; \
00511                 }                                                             \
00512             }                                                                 \
00513                                                                               \
00514     for(l_L=0; l_L<lprod; l_L++)                                        \
00515       {                                                                 \
00516               MACRO_update_phi_prod_ll_plain(with_FG_PSI);                    \
00517                                                                               \
00518         MACRO_nfft_B_compute_ ## which_one;                             \
00519                                                                   \
00520         MACRO_count_uo_l_lj_t;                                          \
00521             } /* for(l_L) */                                                  \
00522   } /* for(j) */                                                        \
00523       return;                                                                 \
00524     } /* if(FG_PSI) */                                                        \
00525                                                                               \
00526                                                                               \
00527   if(ths->nfft_flags & PRE_LIN_PSI)                                           \
00528     {                                                                         \
00529       for(j=0, fj=f; j<ths->M_total; j++, fj++)                               \
00530   {                                                                     \
00531           MACRO_init_uo_l_lj_t;                                               \
00532                                                                               \
00533           for(t2=0; t2<ths->d; t2++)                                          \
00534             {                                                                 \
00535               y[t2] = ((ths->n[t2]*ths->x[j*ths->d+t2]-                       \
00536                           (double)u[t2]) * ((double)ths->K))/(ths->m+1);      \
00537               ip_u  = LRINT(floor(y[t2]));                                    \
00538               ip_w  = y[t2]-ip_u;                                             \
00539               for(l_fg=u[t2], lj_fg=0; l_fg <= o[t2]; l_fg++, lj_fg++)        \
00540                 {                                                             \
00541                   fg_psi[t2][lj_fg] = ths->psi[(ths->K+1)*t2+                 \
00542                  abs(ip_u-lj_fg*ip_s)]*         \
00543                                       (1-ip_w) +                              \
00544                                       ths->psi[(ths->K+1)*t2+                 \
00545                  abs(ip_u-lj_fg*ip_s+1)]*       \
00546                                        (ip_w);                                \
00547               }                                                               \
00548             }                                                                 \
00549                                                                               \
00550     for(l_L=0; l_L<lprod; l_L++)                                        \
00551       {                                                                 \
00552               MACRO_update_phi_prod_ll_plain(with_FG_PSI);                    \
00553                                                                               \
00554         MACRO_nfft_B_compute_ ## which_one;                             \
00555                                                                   \
00556         MACRO_count_uo_l_lj_t;                                          \
00557             } /* for(l_L) */                                                  \
00558   } /* for(j) */                                                        \
00559       return;                                                                 \
00560     } /* if(PRE_LIN_PSI) */                                                   \
00561                                                                               \
00562   /* no precomputed psi at all */                                             \
00563   for(j=0, fj=f; j<ths->M_total; j++, fj++)                                   \
00564     {                                                                         \
00565       MACRO_init_uo_l_lj_t;                                                   \
00566                                                                         \
00567       for(l_L=0; l_L<lprod; l_L++)                                            \
00568       {                                                                     \
00569           MACRO_update_phi_prod_ll_plain(without_PRE_PSI);                    \
00570                                                                               \
00571           MACRO_nfft_B_compute_ ## which_one;                                 \
00572                                                                   \
00573           MACRO_count_uo_l_lj_t;                                              \
00574   } /* for(l_L) */                                                      \
00575     } /* for(j) */                                                            \
00576 } /* nfft_B */                                                                \
00577 
00578 MACRO_nfft_B(A)
00579 MACRO_nfft_B(T)
00580 
00581 /* ############################################################ SPECIFIC VERSIONS FOR d=1 */
00582 
00583 static void nfft_1d_init_fg_exp_l(double *fg_exp_l, const int m, const double b)
00584 {
00585   int l;
00586   double fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
00587 
00588   fg_exp_b0 = exp(-1.0/b);
00589   fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
00590   fg_exp_b1 = 1.0;
00591   fg_exp_b2 = 1.0;
00592   fg_exp_l[0] = 1.0;
00593   for(l=1; l <= 2*m+1; l++)
00594     {
00595       fg_exp_b2 = fg_exp_b1*fg_exp_b0;
00596       fg_exp_b1 *= fg_exp_b0_sq;
00597       fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
00598     }
00599 }
00600 
00601 static void nfft_trafo_1d_compute(double _Complex *fj, const double _Complex *g,const double *psij_const, const double *xj, const int n, const int m)
00602 {
00603   int u,o,l;
00604   const double _Complex *gj;
00605   const double *psij;
00606   psij=psij_const;
00607 
00608   nfft_uo2(&u,&o,*xj, n, m);
00609 
00610   if(u<o)
00611     for(l=1,gj=g+u,(*fj)=(*psij++) * (*gj++); l<=2*m+1; l++)
00612       (*fj) += (*psij++) * (*gj++);
00613   else
00614     {
00615       for(l=1,gj=g+u,(*fj)=(*psij++) * (*gj++); l<2*m+1-o; l++)
00616   (*fj) += (*psij++) * (*gj++);
00617       for(l=0,gj=g; l<=o; l++)
00618   (*fj) += (*psij++) * (*gj++);
00619     }
00620 }
00621 
00622 static void nfft_adjoint_1d_compute(const double _Complex *fj, double _Complex *g,const double *psij_const, const double *xj, const int n, const int m)
00623 {
00624   int u,o,l;
00625   double _Complex *gj;
00626   const double *psij;
00627   psij=psij_const;
00628 
00629   nfft_uo2(&u,&o,*xj, n, m);
00630 
00631   if(u<o)
00632     for(l=0,gj=g+u; l<=2*m+1; l++)
00633       (*gj++) += (*psij++) * (*fj);
00634   else
00635     {
00636       for(l=0,gj=g+u; l<2*m+1-o; l++)
00637   (*gj++) += (*psij++) * (*fj);
00638       for(l=0,gj=g; l<=o; l++)
00639   (*gj++) += (*psij++) * (*fj);
00640     }
00641 }
00642 
00643 static void nfft_trafo_1d_B(nfft_plan *ths)
00644 {
00645   int n,N,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
00646   double _Complex *fj,*g;
00647   double *psij, *psij_const, *xj, ip_y,ip_w;
00648 
00649   double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
00650 
00651   N=ths->N[0];
00652   n=ths->n[0];
00653   M=ths->M_total;
00654   m=ths->m;
00655 
00656   g=ths->g;
00657 
00658 
00659   if(ths->nfft_flags & PRE_FULL_PSI)
00660     {
00661       psi_index_g=ths->psi_index_g;
00662       for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
00663         for(l=1, (*fj)=(*psij++) * g[(*psi_index_g++)]; l<=2*m+1; l++)
00664     (*fj) += (*psij++) * g[(*psi_index_g++)];
00665       return;
00666     } /* if(PRE_FULL_PSI) */
00667 
00668   if(ths->nfft_flags & PRE_PSI)
00669     {
00670       for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj++)
00671   nfft_trafo_1d_compute(fj, g, ths->psi+j*(2*m+2), xj, n, m);
00672       return;
00673     } /* if(PRE_PSI) */
00674 
00675   if(ths->nfft_flags & PRE_FG_PSI)
00676     {
00677       psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00678       fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00679 
00680       nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00681 
00682       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00683   {
00684     fg_psij0 = ths->psi[2*j];
00685     fg_psij1 = ths->psi[2*j+1];
00686     fg_psij2 = 1.0;
00687     psij_const[0] = fg_psij0;
00688     for(l=1; l<=2*m+1; l++)
00689       {
00690         fg_psij2 *= fg_psij1;
00691         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00692       }
00693 
00694     nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00695   }
00696       nfft_free(fg_exp_l);
00697       nfft_free(psij_const);
00698       return;
00699     } /* if(PRE_FG_PSI) */
00700 
00701   if(ths->nfft_flags & FG_PSI)
00702     {
00703       psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00704       fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00705 
00706       nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00707 
00708       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00709   {
00710     nfft_uo(ths,j,&u,&o,0);
00711     fg_psij0 = (PHI(*xj-((double)u)/n,0));
00712     fg_psij1 = exp(2.0*(n*(*xj) - u)/ths->b[0]);
00713     fg_psij2 = 1.0;
00714     psij_const[0] = fg_psij0;
00715     for(l=1; l<=2*m+1; l++)
00716       {
00717         fg_psij2 *= fg_psij1;
00718         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00719       }
00720 
00721     nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00722   }
00723       nfft_free(fg_exp_l);
00724       nfft_free(psij_const);
00725       return;
00726     } /* if(FG_PSI) */
00727 
00728   if(ths->nfft_flags & PRE_LIN_PSI)
00729     {
00730       psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00731       K=ths->K;
00732       ip_s=K/(m+1);
00733 
00734       psij_const[2*m+1]=0;
00735 
00736       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00737   {
00738     nfft_uo(ths,j,&u,&o,0);
00739 
00740     ip_y = fabs(n*(*xj) - u)*((double)K)/(m+1);
00741     ip_u = LRINT(floor(ip_y));
00742     ip_w = ip_y-ip_u;
00743     for(l=0; l < 2*m+2; l++)
00744       psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
00745         ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
00746 
00747     nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00748   }
00749       nfft_free(psij_const);
00750       return;
00751     } /* if(PRE_LIN_PSI) */
00752 
00753   /* no precomputed psi at all */
00754   psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00755   for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00756     {
00757       nfft_uo(ths,j,&u,&o,0);
00758 
00759       for(l=0;l<=2*m+1;l++)
00760   psij_const[l]=(PHI(*xj-((double)((u+l)))/n,0));
00761 
00762       nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00763     }
00764   nfft_free(psij_const);
00765 }
00766 
00767 static void nfft_adjoint_1d_B(nfft_plan *ths)
00768 {
00769   int n,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
00770   double _Complex *fj,*g;
00771   double *psij, *psij_const, *xj, ip_y, ip_w;
00772   double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
00773 
00774   n=ths->n[0];
00775   M=ths->M_total;
00776   m=ths->m;
00777 
00778   g=ths->g;
00779   memset(g,0,ths->n_total*sizeof(double _Complex));
00780 
00781   if(ths->nfft_flags & PRE_FULL_PSI)
00782     {
00783       psi_index_g=ths->psi_index_g;
00784       for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
00785         for(l=0; l<=2*m+1; l++)
00786     g[*psi_index_g++] += (*psij++) * (*fj);
00787       return;
00788     } /* if(PRE_FULL_PSI) */
00789 
00790   if(ths->nfft_flags & PRE_PSI)
00791     {
00792       for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj++)
00793   nfft_adjoint_1d_compute(fj, g, ths->psi+j*(2*m+2), xj, n, m);
00794       return;
00795     } /* if(PRE_PSI) */
00796 
00797   if(ths->nfft_flags & PRE_FG_PSI)
00798     {
00799       psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00800       fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00801 
00802       nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00803 
00804       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00805   {
00806     fg_psij0 = ths->psi[2*j];
00807     fg_psij1 = ths->psi[2*j+1];
00808     fg_psij2 = 1.0;
00809     psij_const[0] = fg_psij0;
00810     for(l=1; l<=2*m+1; l++)
00811       {
00812         fg_psij2 *= fg_psij1;
00813         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00814       }
00815 
00816     nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00817   }
00818       nfft_free(fg_exp_l);
00819       nfft_free(psij_const);
00820       return;
00821     } /* if(PRE_FG_PSI) */
00822 
00823   if(ths->nfft_flags & FG_PSI)
00824     {
00825       psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00826       fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00827 
00828       nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00829 
00830       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00831   {
00832     nfft_uo(ths,j,&u,&o,0);
00833     fg_psij0 = (PHI(*xj-((double)u)/n,0));
00834     fg_psij1 = exp(2.0*(n*(*xj) - u)/ths->b[0]);
00835     fg_psij2 = 1.0;
00836     psij_const[0] = fg_psij0;
00837     for(l=1; l<=2*m+1; l++)
00838       {
00839         fg_psij2 *= fg_psij1;
00840         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00841       }
00842 
00843     nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00844   }
00845       nfft_free(fg_exp_l);
00846       nfft_free(psij_const);
00847       return;
00848     } /* if(FG_PSI) */
00849 
00850   if(ths->nfft_flags & PRE_LIN_PSI)
00851     {
00852       psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00853       K=ths->K;
00854       ip_s=K/(m+1);
00855 
00856       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00857   {
00858     nfft_uo(ths,j,&u,&o,0);
00859 
00860     ip_y = fabs(n*(*xj) - u)*((double)K)/(m+1);
00861     ip_u = LRINT(floor(ip_y));
00862     ip_w = ip_y-ip_u;
00863     for(l=0; l < 2*m+2; l++)
00864       psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
00865         ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
00866 
00867     nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00868   }
00869       nfft_free(psij_const);
00870       return;
00871     } /* if(PRE_LIN_PSI) */
00872 
00873   /* no precomputed psi at all */
00874   psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00875   for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00876     {
00877       nfft_uo(ths,j,&u,&o,0);
00878 
00879       for(l=0;l<=2*m+1;l++)
00880   psij_const[l]=(PHI(*xj-((double)((u+l)))/n,0));
00881 
00882       nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00883     }
00884   nfft_free(psij_const);
00885 }
00886 
00887 void nfft_trafo_1d(nfft_plan *ths)
00888 {
00889   int k,n,N;
00890   double _Complex *g_hat1,*g_hat2,*f_hat1,*f_hat2;
00891   double *c_phi_inv1, *c_phi_inv2;
00892 
00893   ths->g_hat=ths->g1;
00894   ths->g=ths->g2;
00895 
00896   N=ths->N[0];
00897   n=ths->n[0];
00898 
00899   f_hat1=ths->f_hat;
00900   f_hat2=&ths->f_hat[N/2];
00901   g_hat1=&ths->g_hat[n-N/2];
00902   g_hat2=ths->g_hat;
00903 
00904   TIC(0)
00905   memset(ths->g_hat,0,ths->n_total*sizeof(double _Complex));
00906   if(ths->nfft_flags & PRE_PHI_HUT)
00907     {
00908       c_phi_inv1=ths->c_phi_inv[0];
00909       c_phi_inv2=&ths->c_phi_inv[0][N/2];
00910       for(k=0;k<N/2;k++)
00911   {
00912     (*g_hat1++) = (*f_hat1++) * (*c_phi_inv1++);
00913     (*g_hat2++) = (*f_hat2++) * (*c_phi_inv2++);
00914   }
00915     }
00916   else
00917     for(k=0;k<N/2;k++)
00918       {
00919   (*g_hat1++) = (*f_hat1++) / (PHI_HUT(k-N/2,0));
00920   (*g_hat2++) = (*f_hat2++) / (PHI_HUT(k,0));
00921       }
00922 
00923   TOC(0)
00924 
00925   TIC_FFTW(1)
00926   fftw_execute(ths->my_fftw_plan1);
00927   TOC_FFTW(1);
00928 
00929   TIC(2);
00930   nfft_trafo_1d_B(ths);
00931   TOC(2);
00932 }
00933 
00934 void nfft_adjoint_1d(nfft_plan *ths)
00935 {
00936   int k,n,N;
00937   double _Complex *g_hat1,*g_hat2,*f_hat1,*f_hat2;
00938   double *c_phi_inv1, *c_phi_inv2;
00939 
00940   N=ths->N[0];
00941   n=ths->n[0];
00942 
00943   ths->g_hat=ths->g1;
00944   ths->g=ths->g2;
00945 
00946   f_hat1=ths->f_hat;
00947   f_hat2=&ths->f_hat[N/2];
00948   g_hat1=&ths->g_hat[n-N/2];
00949   g_hat2=ths->g_hat;
00950 
00951   TIC(2)
00952   nfft_adjoint_1d_B(ths);
00953   TOC(0)
00954 
00955   TIC_FFTW(1)
00956   fftw_execute(ths->my_fftw_plan2);
00957   TOC_FFTW(1);
00958 
00959   TIC(0)
00960   if(ths->nfft_flags & PRE_PHI_HUT)
00961     {
00962       c_phi_inv1=ths->c_phi_inv[0];
00963       c_phi_inv2=&ths->c_phi_inv[0][N/2];
00964       for(k=0;k<N/2;k++)
00965   {
00966     (*f_hat1++) = (*g_hat1++) * (*c_phi_inv1++);
00967     (*f_hat2++) = (*g_hat2++) * (*c_phi_inv2++);
00968   }
00969     }
00970   else
00971     for(k=0;k<N/2;k++)
00972       {
00973   (*f_hat1++) = (*g_hat1++) / (PHI_HUT(k-N/2,0));
00974   (*f_hat2++) = (*g_hat2++) / (PHI_HUT(k,0));
00975       }
00976   TOC(0)
00977 }
00978 
00979 
00980 /* ############################################################ SPECIFIC VERSIONS FOR d=2 */
00981 
00982 static void nfft_2d_init_fg_exp_l(double *fg_exp_l, const int m, const double b)
00983 {
00984   int l;
00985   double fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
00986 
00987   fg_exp_b0 = exp(-1.0/b);
00988   fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
00989   fg_exp_b1 = 1.0;
00990   fg_exp_b2 = 1.0;
00991   fg_exp_l[0] = 1.0;
00992   for(l=1; l <= 2*m+1; l++)
00993     {
00994       fg_exp_b2 = fg_exp_b1*fg_exp_b0;
00995       fg_exp_b1 *= fg_exp_b0_sq;
00996       fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
00997     }
00998 }
00999 
01000 static void nfft_trafo_2d_compute(double _Complex *fj, const double _Complex *g,
01001           const double *psij_const0, const double *psij_const1,
01002           const double *xj0, const double *xj1,
01003           const int n0, const int n1, const int m)
01004 {
01005   int u0,o0,l0,u1,o1,l1;
01006   const double _Complex *gj;
01007   const double *psij0,*psij1;
01008 
01009   psij0=psij_const0;
01010   psij1=psij_const1;
01011 
01012   nfft_uo2(&u0,&o0,*xj0, n0, m);
01013   nfft_uo2(&u1,&o1,*xj1, n1, m);
01014 
01015   *fj=0;
01016 
01017   if(u0<o0)
01018       if(u1<o1)
01019     for(l0=0; l0<=2*m+1; l0++,psij0++)
01020     {
01021         psij1=psij_const1;
01022         gj=g+(u0+l0)*n1+u1;
01023         for(l1=0; l1<=2*m+1; l1++)
01024       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01025     }
01026       else
01027     for(l0=0; l0<=2*m+1; l0++,psij0++)
01028     {
01029         psij1=psij_const1;
01030         gj=g+(u0+l0)*n1+u1;
01031         for(l1=0; l1<2*m+1-o1; l1++)
01032       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01033         gj=g+(u0+l0)*n1;
01034         for(l1=0; l1<=o1; l1++)
01035       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01036     }
01037   else
01038       if(u1<o1)
01039       {
01040     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01041     {
01042         psij1=psij_const1;
01043         gj=g+(u0+l0)*n1+u1;
01044         for(l1=0; l1<=2*m+1; l1++)
01045       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01046     }
01047     for(l0=0; l0<=o0; l0++,psij0++)
01048     {
01049         psij1=psij_const1;
01050         gj=g+l0*n1+u1;
01051         for(l1=0; l1<=2*m+1; l1++)
01052       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01053     }
01054       }
01055       else
01056       {
01057     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01058     {
01059         psij1=psij_const1;
01060         gj=g+(u0+l0)*n1+u1;
01061         for(l1=0; l1<2*m+1-o1; l1++)
01062       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01063         gj=g+(u0+l0)*n1;
01064         for(l1=0; l1<=o1; l1++)
01065       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01066     }
01067     for(l0=0; l0<=o0; l0++,psij0++)
01068     {
01069         psij1=psij_const1;
01070         gj=g+l0*n1+u1;
01071         for(l1=0; l1<2*m+1-o1; l1++)
01072       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01073         gj=g+l0*n1;
01074         for(l1=0; l1<=o1; l1++)
01075       (*fj) += (*psij0) * (*psij1++) * (*gj++);
01076     }
01077       }
01078 }
01079 
01080 static void nfft_adjoint_2d_compute(const double _Complex *fj, double _Complex *g,
01081             const double *psij_const0, const double *psij_const1,
01082             const double *xj0, const double *xj1,
01083             const int n0, const int n1, const int m)
01084 {
01085   int u0,o0,l0,u1,o1,l1;
01086   double _Complex *gj;
01087   const double *psij0,*psij1;
01088 
01089   psij0=psij_const0;
01090   psij1=psij_const1;
01091 
01092   nfft_uo2(&u0,&o0,*xj0, n0, m);
01093   nfft_uo2(&u1,&o1,*xj1, n1, m);
01094 
01095   if(u0<o0)
01096       if(u1<o1)
01097     for(l0=0; l0<=2*m+1; l0++,psij0++)
01098     {
01099         psij1=psij_const1;
01100         gj=g+(u0+l0)*n1+u1;
01101         for(l1=0; l1<=2*m+1; l1++)
01102     (*gj++) += (*psij0) * (*psij1++) * (*fj);
01103     }
01104       else
01105     for(l0=0; l0<=2*m+1; l0++,psij0++)
01106     {
01107         psij1=psij_const1;
01108         gj=g+(u0+l0)*n1+u1;
01109         for(l1=0; l1<2*m+1-o1; l1++)
01110       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01111         gj=g+(u0+l0)*n1;
01112         for(l1=0; l1<=o1; l1++)
01113       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01114     }
01115   else
01116       if(u1<o1)
01117       {
01118     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01119     {
01120         psij1=psij_const1;
01121         gj=g+(u0+l0)*n1+u1;
01122         for(l1=0; l1<=2*m+1; l1++)
01123       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01124     }
01125     for(l0=0; l0<=o0; l0++,psij0++)
01126     {
01127         psij1=psij_const1;
01128         gj=g+l0*n1+u1;
01129         for(l1=0; l1<=2*m+1; l1++)
01130       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01131     }
01132       }
01133       else
01134       {
01135     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01136     {
01137         psij1=psij_const1;
01138         gj=g+(u0+l0)*n1+u1;
01139         for(l1=0; l1<2*m+1-o1; l1++)
01140       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01141         gj=g+(u0+l0)*n1;
01142         for(l1=0; l1<=o1; l1++)
01143       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01144     }
01145     for(l0=0; l0<=o0; l0++,psij0++)
01146     {
01147         psij1=psij_const1;
01148         gj=g+l0*n1+u1;
01149         for(l1=0; l1<2*m+1-o1; l1++)
01150       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01151         gj=g+l0*n1;
01152         for(l1=0; l1<=o1; l1++)
01153       (*gj++) += (*psij0) * (*psij1++) * (*fj);
01154     }
01155       }
01156 }
01157 
01158 static void nfft_trafo_2d_B(nfft_plan *ths)
01159 {
01160   int n0,N0,n1,N1,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
01161   double _Complex *fj,*g;
01162   double *psij, *psij_const, *xj, ip_y, ip_w;
01163 
01164   double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
01165 
01166   N0=ths->N[0];
01167   n0=ths->n[0];
01168   N1=ths->N[1];
01169   n1=ths->n[1];
01170   M=ths->M_total;
01171   m=ths->m;
01172 
01173   g=ths->g;
01174 
01175   if(ths->nfft_flags & PRE_FULL_PSI)
01176     {
01177       psi_index_g=ths->psi_index_g;
01178       for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
01179         for(l=1, (*fj)=(*psij++) * g[(*psi_index_g++)]; l<(2*m+2)*(2*m+2); l++)
01180     (*fj) += (*psij++) * g[(*psi_index_g++)];
01181       return;
01182     } /* if(PRE_FULL_PSI) */
01183 
01184   if(ths->nfft_flags & PRE_PSI)
01185     {
01186       for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=2)
01187   nfft_trafo_2d_compute(fj, g, ths->psi+j*2*(2*m+2), ths->psi+(j*2+1)*(2*m+2), xj, xj+1, n0, n1, m);
01188       return;
01189     } /* if(PRE_PSI) */
01190 
01191   if(ths->nfft_flags & PRE_FG_PSI)
01192     {
01193       psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01194       fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01195 
01196       nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01197       nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01198 
01199       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01200   {
01201     fg_psij0 = ths->psi[2*j*2];
01202     fg_psij1 = ths->psi[2*j*2+1];
01203     fg_psij2 = 1.0;
01204     psij_const[0] = fg_psij0;
01205     for(l=1; l<=2*m+1; l++)
01206       {
01207         fg_psij2 *= fg_psij1;
01208         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01209       }
01210 
01211     fg_psij0 = ths->psi[2*(j*2+1)];
01212     fg_psij1 = ths->psi[2*(j*2+1)+1];
01213     fg_psij2 = 1.0;
01214     psij_const[2*m+2] = fg_psij0;
01215     for(l=1; l<=2*m+1; l++)
01216       {
01217         fg_psij2 *= fg_psij1;
01218         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01219       }
01220 
01221     nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01222   }
01223       nfft_free(fg_exp_l);
01224       nfft_free(psij_const);
01225       return;
01226     } /* if(PRE_FG_PSI) */
01227 
01228   if(ths->nfft_flags & FG_PSI)
01229     {
01230       psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01231       fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01232 
01233       nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01234       nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01235 
01236       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01237   {
01238     nfft_uo(ths,j,&u,&o,0);
01239     fg_psij0 = (PHI(*xj-((double)u)/n0,0));
01240     fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
01241     fg_psij2 = 1.0;
01242     psij_const[0] = fg_psij0;
01243     for(l=1; l<=2*m+1; l++)
01244       {
01245         fg_psij2 *= fg_psij1;
01246         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01247       }
01248 
01249     nfft_uo(ths,j,&u,&o,1);
01250     fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
01251     fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
01252     fg_psij2 = 1.0;
01253     psij_const[2*m+2] = fg_psij0;
01254     for(l=1; l<=2*m+1; l++)
01255       {
01256         fg_psij2 *= fg_psij1;
01257         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01258       }
01259 
01260     nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01261   }
01262       nfft_free(fg_exp_l);
01263       nfft_free(psij_const);
01264       return;
01265     } /* if(FG_PSI) */
01266 
01267   if(ths->nfft_flags & PRE_LIN_PSI)
01268     {
01269       psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01270       K=ths->K;
01271       ip_s=K/(m+1);
01272 
01273       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01274   {
01275     nfft_uo(ths,j,&u,&o,0);
01276     ip_y = fabs(n0*(*(xj+0)) - u)*ip_s;
01277     ip_u = LRINT(floor(ip_y));
01278     ip_w = ip_y-ip_u;
01279     for(l=0; l < 2*m+2; l++)
01280       psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) + ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
01281 
01282     nfft_uo(ths,j,&u,&o,1);
01283     ip_y = fabs(n1*(*(xj+1)) - u)*ip_s;
01284     ip_u = LRINT(floor(ip_y));
01285     ip_w = ip_y-ip_u;
01286     for(l=0; l < 2*m+2; l++)
01287       psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) + ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
01288 
01289     nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01290   }
01291       nfft_free(psij_const);
01292       return;
01293     } /* if(PRE_LIN_PSI) */
01294 
01295   /* no precomputed psi at all */
01296   psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01297   for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01298     {
01299       nfft_uo(ths,j,&u,&o,0);
01300       for(l=0;l<=2*m+1;l++)
01301   psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
01302 
01303       nfft_uo(ths,j,&u,&o,1);
01304       for(l=0;l<=2*m+1;l++)
01305   psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
01306 
01307       nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01308     }
01309   nfft_free(psij_const);
01310 }
01311 
01312 static void nfft_adjoint_2d_B(nfft_plan *ths)
01313 {
01314   int n0,N0,n1,N1,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
01315   double _Complex *fj,*g;
01316   double *psij, *psij_const, *xj ,ip_y, ip_w;
01317 
01318   double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
01319 
01320   N0=ths->N[0];
01321   n0=ths->n[0];
01322   N1=ths->N[1];
01323   n1=ths->n[1];
01324   M=ths->M_total;
01325   m=ths->m;
01326 
01327   g=ths->g;
01328   memset(g,0,ths->n_total*sizeof(double _Complex));
01329 
01330   if(ths->nfft_flags & PRE_FULL_PSI)
01331     {
01332       psi_index_g=ths->psi_index_g;
01333       for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
01334         for(l=1, g[(*psi_index_g++)]=(*psij++) * (*fj); l<(2*m+2)*(2*m+2); l++)
01335     g[(*psi_index_g++)] += (*psij++) * (*fj);
01336       return;
01337     } /* if(PRE_FULL_PSI) */
01338 
01339   if(ths->nfft_flags & PRE_PSI)
01340     {
01341       for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=2)
01342   nfft_adjoint_2d_compute(fj, g, ths->psi+j*2*(2*m+2), ths->psi+(j*2+1)*(2*m+2), xj, xj+1, n0, n1, m);
01343       return;
01344     } /* if(PRE_PSI) */
01345 
01346   if(ths->nfft_flags & PRE_FG_PSI)
01347     {
01348       psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01349       fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01350 
01351       nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01352       nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01353 
01354       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01355   {
01356     fg_psij0 = ths->psi[2*j*2];
01357     fg_psij1 = ths->psi[2*j*2+1];
01358     fg_psij2 = 1.0;
01359     psij_const[0] = fg_psij0;
01360     for(l=1; l<=2*m+1; l++)
01361       {
01362         fg_psij2 *= fg_psij1;
01363         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01364       }
01365 
01366     fg_psij0 = ths->psi[2*(j*2+1)];
01367     fg_psij1 = ths->psi[2*(j*2+1)+1];
01368     fg_psij2 = 1.0;
01369     psij_const[2*m+2] = fg_psij0;
01370     for(l=1; l<=2*m+1; l++)
01371       {
01372         fg_psij2 *= fg_psij1;
01373         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01374       }
01375 
01376     nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01377   }
01378       nfft_free(fg_exp_l);
01379       nfft_free(psij_const);
01380       return;
01381     } /* if(PRE_FG_PSI) */
01382 
01383   if(ths->nfft_flags & FG_PSI)
01384     {
01385       psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01386       fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01387 
01388       nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01389       nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01390 
01391       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01392   {
01393     nfft_uo(ths,j,&u,&o,0);
01394     fg_psij0 = (PHI(*xj-((double)u)/n0,0));
01395     fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
01396     fg_psij2 = 1.0;
01397     psij_const[0] = fg_psij0;
01398     for(l=1; l<=2*m+1; l++)
01399       {
01400         fg_psij2 *= fg_psij1;
01401         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01402       }
01403 
01404     nfft_uo(ths,j,&u,&o,1);
01405     fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
01406     fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
01407     fg_psij2 = 1.0;
01408     psij_const[2*m+2] = fg_psij0;
01409     for(l=1; l<=2*m+1; l++)
01410       {
01411         fg_psij2 *= fg_psij1;
01412         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01413       }
01414 
01415     nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01416   }
01417       nfft_free(fg_exp_l);
01418       nfft_free(psij_const);
01419       return;
01420     } /* if(FG_PSI) */
01421 
01422   if(ths->nfft_flags & PRE_LIN_PSI)
01423     {
01424       psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01425       K=ths->K;
01426       ip_s=K/(m+1);
01427 
01428       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01429   {
01430     nfft_uo(ths,j,&u,&o,0);
01431     ip_y = fabs(n0*(*(xj+0)) - u)*((double)K)/(m+1);
01432     ip_u = LRINT(floor(ip_y));
01433     ip_w = ip_y-ip_u;
01434     for(l=0; l < 2*m+2; l++)
01435       psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
01436         ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
01437 
01438     nfft_uo(ths,j,&u,&o,1);
01439     ip_y = fabs(n1*(*(xj+1)) - u)*((double)K)/(m+1);
01440     ip_u = LRINT(floor(ip_y));
01441     ip_w = ip_y-ip_u;
01442     for(l=0; l < 2*m+2; l++)
01443       psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
01444         ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
01445 
01446     nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01447   }
01448       nfft_free(psij_const);
01449       return;
01450     } /* if(PRE_LIN_PSI) */
01451 
01452   /* no precomputed psi at all */
01453   psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01454   for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01455     {
01456       nfft_uo(ths,j,&u,&o,0);
01457       for(l=0;l<=2*m+1;l++)
01458   psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
01459 
01460       nfft_uo(ths,j,&u,&o,1);
01461       for(l=0;l<=2*m+1;l++)
01462   psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
01463 
01464       nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01465     }
01466   nfft_free(psij_const);
01467 }
01468 
01469 
01470 void nfft_trafo_2d(nfft_plan *ths)
01471 {
01472   int k0,k1,n0,n1,N0,N1;
01473   double _Complex *g_hat,*f_hat;
01474   double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12;
01475   double ck01, ck02, ck11, ck12;
01476   double _Complex *g_hat11,*f_hat11,*g_hat21,*f_hat21,*g_hat12,*f_hat12,*g_hat22,*f_hat22;
01477 
01478   ths->g_hat=ths->g1;
01479   ths->g=ths->g2;
01480 
01481   N0=ths->N[0];
01482   N1=ths->N[1];
01483   n0=ths->n[0];
01484   n1=ths->n[1];
01485 
01486   f_hat=ths->f_hat;
01487   g_hat=ths->g_hat;
01488 
01489   TIC(0)
01490   memset(ths->g_hat,0,ths->n_total*sizeof(double _Complex));
01491   if(ths->nfft_flags & PRE_PHI_HUT)
01492     {
01493       c_phi_inv01=ths->c_phi_inv[0];
01494       c_phi_inv02=&ths->c_phi_inv[0][N0/2];
01495 
01496       for(k0=0;k0<N0/2;k0++)
01497   {
01498     ck01=(*c_phi_inv01++);
01499     ck02=(*c_phi_inv02++);
01500 
01501     c_phi_inv11=ths->c_phi_inv[1];
01502     c_phi_inv12=&ths->c_phi_inv[1][N1/2];
01503 
01504     g_hat11=g_hat + (n0-N0/2+k0)*n1+n1-N1/2;
01505     f_hat11=f_hat + k0*N1;
01506           g_hat21=g_hat + k0*n1+n1-N1/2;
01507           f_hat21=f_hat + (N0/2+k0)*N1;
01508           g_hat12=g_hat + (n0-N0/2+k0)*n1;
01509           f_hat12=f_hat + k0*N1+N1/2;
01510     g_hat22=g_hat + k0*n1;
01511     f_hat22=f_hat + (N0/2+k0)*N1+N1/2;
01512     for(k1=0;k1<N1/2;k1++)
01513       {
01514         ck11=(*c_phi_inv11++);
01515         ck12=(*c_phi_inv12++);
01516 
01517         (*g_hat11++) = (*f_hat11++) * ck01 * ck11;
01518         (*g_hat21++) = (*f_hat21++) * ck02 * ck11;
01519         (*g_hat12++) = (*f_hat12++) * ck01 * ck12;
01520         (*g_hat22++) = (*f_hat22++) * ck02 * ck12;
01521       }
01522   }
01523     }
01524   else
01525     for(k0=0;k0<N0/2;k0++)
01526       {
01527   ck01=1./(PHI_HUT(k0-N0/2,0));
01528   ck02=1./(PHI_HUT(k0,0));
01529   for(k1=0;k1<N1/2;k1++)
01530     {
01531       ck11=1./(PHI_HUT(k1-N1/2,1));
01532       ck12=1./(PHI_HUT(k1,1));
01533       g_hat[(n0-N0/2+k0)*n1+n1-N1/2+k1] = f_hat[k0*N1+k1]             * ck01 * ck11;
01534       g_hat[k0*n1+n1-N1/2+k1]           = f_hat[(N0/2+k0)*N1+k1]      * ck02 * ck11;
01535       g_hat[(n0-N0/2+k0)*n1+k1]         = f_hat[k0*N1+N1/2+k1]        * ck01 * ck12;
01536       g_hat[k0*n1+k1]                   = f_hat[(N0/2+k0)*N1+N1/2+k1] * ck02 * ck12;
01537     }
01538       }
01539 
01540   TOC(0)
01541 
01542   TIC_FFTW(1)
01543   fftw_execute(ths->my_fftw_plan1);
01544   TOC_FFTW(1);
01545 
01546   TIC(2);
01547   nfft_trafo_2d_B(ths);
01548   TOC(2);
01549 }
01550 
01551 void nfft_adjoint_2d(nfft_plan *ths)
01552 {
01553   int k0,k1,n0,n1,N0,N1;
01554   double _Complex *g_hat,*f_hat;
01555   double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12;
01556   double ck01, ck02, ck11, ck12;
01557   double _Complex *g_hat11,*f_hat11,*g_hat21,*f_hat21,*g_hat12,*f_hat12,*g_hat22,*f_hat22;
01558 
01559   ths->g_hat=ths->g1;
01560   ths->g=ths->g2;
01561 
01562   N0=ths->N[0];
01563   N1=ths->N[1];
01564   n0=ths->n[0];
01565   n1=ths->n[1];
01566 
01567   f_hat=ths->f_hat;
01568   g_hat=ths->g_hat;
01569 
01570   TIC(2);
01571   nfft_adjoint_2d_B(ths);
01572   TOC(2);
01573 
01574   TIC_FFTW(1)
01575   fftw_execute(ths->my_fftw_plan2);
01576   TOC_FFTW(1);
01577 
01578   TIC(0)
01579   if(ths->nfft_flags & PRE_PHI_HUT)
01580     {
01581       c_phi_inv01=ths->c_phi_inv[0];
01582       c_phi_inv02=&ths->c_phi_inv[0][N0/2];
01583   
01584       for(k0=0;k0<N0/2;k0++)
01585   {
01586     ck01=(*c_phi_inv01++);
01587     ck02=(*c_phi_inv02++);
01588 
01589     c_phi_inv11=ths->c_phi_inv[1];
01590     c_phi_inv12=&ths->c_phi_inv[1][N1/2];
01591     g_hat11=g_hat + (n0-N0/2+k0)*n1+n1-N1/2;
01592     f_hat11=f_hat + k0*N1;
01593           g_hat21=g_hat + k0*n1+n1-N1/2;
01594           f_hat21=f_hat + (N0/2+k0)*N1;
01595           g_hat12=g_hat + (n0-N0/2+k0)*n1;
01596           f_hat12=f_hat + k0*N1+N1/2;
01597     g_hat22=g_hat + k0*n1;
01598     f_hat22=f_hat + (N0/2+k0)*N1+N1/2;
01599     for(k1=0;k1<N1/2;k1++)
01600       {
01601         ck11=(*c_phi_inv11++);
01602         ck12=(*c_phi_inv12++);
01603 
01604         (*f_hat11++) = (*g_hat11++) * ck01 * ck11;
01605         (*f_hat21++) = (*g_hat21++) * ck02 * ck11;
01606         (*f_hat12++) = (*g_hat12++) * ck01 * ck12;
01607         (*f_hat22++) = (*g_hat22++) * ck02 * ck12;
01608       }
01609   }
01610     }
01611   else
01612     for(k0=0;k0<N0/2;k0++)
01613       {
01614   ck01=1./(PHI_HUT(k0-N0/2,0));
01615   ck02=1./(PHI_HUT(k0,0));
01616   for(k1=0;k1<N1/2;k1++)
01617     {
01618       ck11=1./(PHI_HUT(k1-N1/2,1));
01619       ck12=1./(PHI_HUT(k1,1));
01620       f_hat[k0*N1+k1]             = g_hat[(n0-N0/2+k0)*n1+n1-N1/2+k1] * ck01 * ck11;
01621       f_hat[(N0/2+k0)*N1+k1]      = g_hat[k0*n1+n1-N1/2+k1]           * ck02 * ck11;
01622       f_hat[k0*N1+N1/2+k1]        = g_hat[(n0-N0/2+k0)*n1+k1]         * ck01 * ck12;
01623       f_hat[(N0/2+k0)*N1+N1/2+k1] = g_hat[k0*n1+k1]                   * ck02 * ck12;
01624     }
01625       }
01626   TOC(0)
01627 }
01628 
01629 /* ############################################################ SPECIFIC VERSIONS FOR d=3 */
01630 
01631 static void nfft_3d_init_fg_exp_l(double *fg_exp_l, const int m, const double b)
01632 {
01633   int l;
01634   double fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
01635 
01636   fg_exp_b0 = exp(-1.0/b);
01637   fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
01638   fg_exp_b1 = 1.0;
01639   fg_exp_b2 = 1.0;
01640   fg_exp_l[0] = 1.0;
01641   for(l=1; l <= 2*m+1; l++)
01642     {
01643       fg_exp_b2 = fg_exp_b1*fg_exp_b0;
01644       fg_exp_b1 *= fg_exp_b0_sq;
01645       fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
01646     }
01647 }
01648 
01649 static void nfft_trafo_3d_compute(double _Complex *fj, const double _Complex *g,
01650           const double *psij_const0, const double *psij_const1, const double *psij_const2,
01651           const double *xj0, const double *xj1, const double *xj2,
01652           const int n0, const int n1, const int n2, const int m)
01653 {
01654   int u0,o0,l0,u1,o1,l1,u2,o2,l2;
01655   const double _Complex *gj;
01656   const double *psij0,*psij1,*psij2;
01657 
01658   psij0=psij_const0;
01659   psij1=psij_const1;
01660   psij2=psij_const2;
01661 
01662   nfft_uo2(&u0,&o0,*xj0, n0, m);
01663   nfft_uo2(&u1,&o1,*xj1, n1, m);
01664   nfft_uo2(&u2,&o2,*xj2, n2, m);
01665 
01666   *fj=0;
01667 
01668   if(u0<o0)
01669     if(u1<o1)
01670       if(u2<o2)
01671   for(l0=0; l0<=2*m+1; l0++,psij0++)
01672     {
01673       psij1=psij_const1;
01674       for(l1=0; l1<=2*m+1; l1++,psij1++)
01675         {
01676     psij2=psij_const2;
01677     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01678     for(l2=0; l2<=2*m+1; l2++)
01679       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01680         }
01681     }
01682       else/* asserts (u2>o2)*/
01683   for(l0=0; l0<=2*m+1; l0++,psij0++)
01684     {
01685       psij1=psij_const1;
01686       for(l1=0; l1<=2*m+1; l1++,psij1++)
01687         {
01688     psij2=psij_const2;
01689     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01690     for(l2=0; l2<2*m+1-o2; l2++)
01691       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01692     gj=g+((u0+l0)*n1+(u1+l1))*n2;
01693     for(l2=0; l2<=o2; l2++)
01694       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01695         }
01696     }
01697     else/* asserts (u1>o1)*/
01698       if(u2<o2)
01699   for(l0=0; l0<=2*m+1; l0++,psij0++)
01700     {
01701       psij1=psij_const1;
01702       for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01703         {
01704     psij2=psij_const2;
01705     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01706     for(l2=0; l2<=2*m+1; l2++)
01707       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01708         }
01709       for(l1=0; l1<=o1; l1++,psij1++)
01710         {
01711     psij2=psij_const2;
01712     gj=g+((u0+l0)*n1+l1)*n2+u2;
01713     for(l2=0; l2<=2*m+1; l2++)
01714       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01715         }
01716     }
01717       else/* asserts (u2>o2) */
01718   {
01719     for(l0=0; l0<=2*m+1; l0++,psij0++)
01720       {
01721         psij1=psij_const1;
01722         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01723     {
01724       psij2=psij_const2;
01725       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01726       for(l2=0; l2<2*m+1-o2; l2++)
01727         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01728       gj=g+((u0+l0)*n1+(u1+l1))*n2;
01729       for(l2=0; l2<=o2; l2++)
01730         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01731     }
01732         for(l1=0; l1<=o1; l1++,psij1++)
01733     {
01734       psij2=psij_const2;
01735       gj=g+((u0+l0)*n1+l1)*n2+u2;
01736       for(l2=0; l2<2*m+1-o2; l2++)
01737         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01738       gj=g+((u0+l0)*n1+l1)*n2;
01739       for(l2=0; l2<=o2; l2++)
01740         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01741     }
01742       }
01743   }
01744   else/* asserts (u0>o0) */
01745     if(u1<o1)
01746       if(u2<o2)
01747   {
01748     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01749       {
01750         psij1=psij_const1;
01751         for(l1=0; l1<=2*m+1; l1++,psij1++)
01752     {
01753       psij2=psij_const2;
01754       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01755       for(l2=0; l2<=2*m+1; l2++)
01756         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01757     }
01758       }
01759 
01760     for(l0=0; l0<=o0; l0++,psij0++)
01761       {
01762         psij1=psij_const1;
01763         for(l1=0; l1<=2*m+1; l1++,psij1++)
01764     {
01765       psij2=psij_const2;
01766       gj=g+(l0*n1+(u1+l1))*n2+u2;
01767       for(l2=0; l2<=2*m+1; l2++)
01768         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01769     }
01770       }
01771   }
01772       else/* asserts (u2>o2) */
01773   {
01774     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01775       {
01776         psij1=psij_const1;
01777         for(l1=0; l1<=2*m+1; l1++,psij1++)
01778     {
01779       psij2=psij_const2;
01780       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01781       for(l2=0; l2<2*m+1-o2; l2++)
01782         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01783       gj=g+((u0+l0)*n1+(u1+l1))*n2;
01784       for(l2=0; l2<=o2; l2++)
01785         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01786     }
01787       }
01788 
01789     for(l0=0; l0<=o0; l0++,psij0++)
01790       {
01791         psij1=psij_const1;
01792         for(l1=0; l1<=2*m+1; l1++,psij1++)
01793     {
01794       psij2=psij_const2;
01795       gj=g+(l0*n1+(u1+l1))*n2+u2;
01796       for(l2=0; l2<2*m+1-o2; l2++)
01797         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01798       gj=g+(l0*n1+(u1+l1))*n2;
01799       for(l2=0; l2<=o2; l2++)
01800         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01801     }
01802       }
01803   }
01804     else/* asserts (u1>o1) */
01805       if(u2<o2)
01806   {
01807     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01808       {
01809         psij1=psij_const1;
01810         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01811     {
01812       psij2=psij_const2;
01813       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01814       for(l2=0; l2<=2*m+1; l2++)
01815         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01816     }
01817         for(l1=0; l1<=o1; l1++,psij1++)
01818     {
01819       psij2=psij_const2;
01820       gj=g+((u0+l0)*n1+l1)*n2+u2;
01821       for(l2=0; l2<=2*m+1; l2++)
01822         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01823     }
01824       }
01825     for(l0=0; l0<=o0; l0++,psij0++)
01826       {
01827         psij1=psij_const1;
01828         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01829     {
01830       psij2=psij_const2;
01831       gj=g+(l0*n1+(u1+l1))*n2+u2;
01832       for(l2=0; l2<=2*m+1; l2++)
01833         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01834     }
01835         for(l1=0; l1<=o1; l1++,psij1++)
01836     {
01837       psij2=psij_const2;
01838       gj=g+(l0*n1+l1)*n2+u2;
01839       for(l2=0; l2<=2*m+1; l2++)
01840         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01841     }
01842       }
01843   }
01844       else/* asserts (u2>o2) */
01845   {
01846     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01847       {
01848         psij1=psij_const1;
01849         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01850     {
01851       psij2=psij_const2;
01852       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01853       for(l2=0; l2<2*m+1-o2; l2++)
01854         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01855       gj=g+((u0+l0)*n1+(u1+l1))*n2;
01856       for(l2=0; l2<=o2; l2++)
01857         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01858     }
01859         for(l1=0; l1<=o1; l1++,psij1++)
01860     {
01861       psij2=psij_const2;
01862       gj=g+((u0+l0)*n1+l1)*n2+u2;
01863       for(l2=0; l2<2*m+1-o2; l2++)
01864         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01865       gj=g+((u0+l0)*n1+l1)*n2;
01866       for(l2=0; l2<=o2; l2++)
01867         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01868     }
01869       }
01870 
01871     for(l0=0; l0<=o0; l0++,psij0++)
01872       {
01873         psij1=psij_const1;
01874         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01875     {
01876       psij2=psij_const2;
01877       gj=g+(l0*n1+(u1+l1))*n2+u2;
01878       for(l2=0; l2<2*m+1-o2; l2++)
01879         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01880       gj=g+(l0*n1+(u1+l1))*n2;
01881       for(l2=0; l2<=o2; l2++)
01882         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01883     }
01884         for(l1=0; l1<=o1; l1++,psij1++)
01885     {
01886       psij2=psij_const2;
01887       gj=g+(l0*n1+l1)*n2+u2;
01888       for(l2=0; l2<2*m+1-o2; l2++)
01889         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01890       gj=g+(l0*n1+l1)*n2;
01891       for(l2=0; l2<=o2; l2++)
01892         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01893     }
01894       }
01895   }
01896 }
01897 
01898 static void nfft_adjoint_3d_compute(const double _Complex *fj, double _Complex *g,
01899             const double *psij_const0, const double *psij_const1, const double *psij_const2,
01900             const double *xj0, const double *xj1, const double *xj2,
01901             const int n0, const int n1, const int n2, const int m)
01902 {
01903   int u0,o0,l0,u1,o1,l1,u2,o2,l2;
01904   double _Complex *gj;
01905   const double *psij0,*psij1,*psij2;
01906 
01907   psij0=psij_const0;
01908   psij1=psij_const1;
01909   psij2=psij_const2;
01910 
01911   nfft_uo2(&u0,&o0,*xj0, n0, m);
01912   nfft_uo2(&u1,&o1,*xj1, n1, m);
01913   nfft_uo2(&u2,&o2,*xj2, n2, m);
01914 
01915   if(u0<o0)
01916     if(u1<o1)
01917       if(u2<o2)
01918   for(l0=0; l0<=2*m+1; l0++,psij0++)
01919     {
01920       psij1=psij_const1;
01921       for(l1=0; l1<=2*m+1; l1++,psij1++)
01922         {
01923     psij2=psij_const2;
01924     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01925     for(l2=0; l2<=2*m+1; l2++)
01926       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01927         }
01928     }
01929       else/* asserts (u2>o2)*/
01930   for(l0=0; l0<=2*m+1; l0++,psij0++)
01931     {
01932       psij1=psij_const1;
01933       for(l1=0; l1<=2*m+1; l1++,psij1++)
01934         {
01935     psij2=psij_const2;
01936     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01937     for(l2=0; l2<2*m+1-o2; l2++)
01938       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01939     gj=g+((u0+l0)*n1+(u1+l1))*n2;
01940     for(l2=0; l2<=o2; l2++)
01941       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01942         }
01943     }
01944     else/* asserts (u1>o1)*/
01945       if(u2<o2)
01946   for(l0=0; l0<=2*m+1; l0++,psij0++)
01947     {
01948       psij1=psij_const1;
01949       for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01950         {
01951     psij2=psij_const2;
01952     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01953     for(l2=0; l2<=2*m+1; l2++)
01954       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01955         }
01956       for(l1=0; l1<=o1; l1++,psij1++)
01957         {
01958     psij2=psij_const2;
01959     gj=g+((u0+l0)*n1+l1)*n2+u2;
01960     for(l2=0; l2<=2*m+1; l2++)
01961       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01962         }
01963     }
01964       else/* asserts (u2>o2) */
01965   {
01966     for(l0=0; l0<=2*m+1; l0++,psij0++)
01967       {
01968         psij1=psij_const1;
01969         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01970     {
01971       psij2=psij_const2;
01972       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01973       for(l2=0; l2<2*m+1-o2; l2++)
01974         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01975       gj=g+((u0+l0)*n1+(u1+l1))*n2;
01976       for(l2=0; l2<=o2; l2++)
01977         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01978     }
01979         for(l1=0; l1<=o1; l1++,psij1++)
01980     {
01981       psij2=psij_const2;
01982       gj=g+((u0+l0)*n1+l1)*n2+u2;
01983       for(l2=0; l2<2*m+1-o2; l2++)
01984         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01985       gj=g+((u0+l0)*n1+l1)*n2;
01986       for(l2=0; l2<=o2; l2++)
01987         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01988     }
01989       }
01990   }
01991   else/* asserts (u0>o0) */
01992     if(u1<o1)
01993       if(u2<o2)
01994   {
01995     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01996       {
01997         psij1=psij_const1;
01998         for(l1=0; l1<=2*m+1; l1++,psij1++)
01999     {
02000       psij2=psij_const2;
02001       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02002       for(l2=0; l2<=2*m+1; l2++)
02003         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02004     }
02005       }
02006 
02007     for(l0=0; l0<=o0; l0++,psij0++)
02008       {
02009         psij1=psij_const1;
02010         for(l1=0; l1<=2*m+1; l1++,psij1++)
02011     {
02012       psij2=psij_const2;
02013       gj=g+(l0*n1+(u1+l1))*n2+u2;
02014       for(l2=0; l2<=2*m+1; l2++)
02015         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02016     }
02017       }
02018   }
02019       else/* asserts (u2>o2) */
02020   {
02021     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
02022       {
02023         psij1=psij_const1;
02024         for(l1=0; l1<=2*m+1; l1++,psij1++)
02025     {
02026       psij2=psij_const2;
02027       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02028       for(l2=0; l2<2*m+1-o2; l2++)
02029         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02030       gj=g+((u0+l0)*n1+(u1+l1))*n2;
02031       for(l2=0; l2<=o2; l2++)
02032         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02033     }
02034       }
02035 
02036     for(l0=0; l0<=o0; l0++,psij0++)
02037       {
02038         psij1=psij_const1;
02039         for(l1=0; l1<=2*m+1; l1++,psij1++)
02040     {
02041       psij2=psij_const2;
02042       gj=g+(l0*n1+(u1+l1))*n2+u2;
02043       for(l2=0; l2<2*m+1-o2; l2++)
02044         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02045       gj=g+(l0*n1+(u1+l1))*n2;
02046       for(l2=0; l2<=o2; l2++)
02047         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02048     }
02049       }
02050   }
02051     else/* asserts (u1>o1) */
02052       if(u2<o2)
02053   {
02054     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
02055       {
02056         psij1=psij_const1;
02057         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02058     {
02059       psij2=psij_const2;
02060       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02061       for(l2=0; l2<=2*m+1; l2++)
02062         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02063     }
02064         for(l1=0; l1<=o1; l1++,psij1++)
02065     {
02066       psij2=psij_const2;
02067       gj=g+((u0+l0)*n1+l1)*n2+u2;
02068       for(l2=0; l2<=2*m+1; l2++)
02069         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02070     }
02071       }
02072     for(l0=0; l0<=o0; l0++,psij0++)
02073       {
02074         psij1=psij_const1;
02075         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02076     {
02077       psij2=psij_const2;
02078       gj=g+(l0*n1+(u1+l1))*n2+u2;
02079       for(l2=0; l2<=2*m+1; l2++)
02080         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02081     }
02082         for(l1=0; l1<=o1; l1++,psij1++)
02083     {
02084       psij2=psij_const2;
02085       gj=g+(l0*n1+l1)*n2+u2;
02086       for(l2=0; l2<=2*m+1; l2++)
02087         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02088     }
02089       }
02090   }
02091       else/* asserts (u2>o2) */
02092   {
02093     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
02094       {
02095         psij1=psij_const1;
02096         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02097     {
02098       psij2=psij_const2;
02099       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02100       for(l2=0; l2<2*m+1-o2; l2++)
02101         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02102       gj=g+((u0+l0)*n1+(u1+l1))*n2;
02103       for(l2=0; l2<=o2; l2++)
02104         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02105     }
02106         for(l1=0; l1<=o1; l1++,psij1++)
02107     {
02108       psij2=psij_const2;
02109       gj=g+((u0+l0)*n1+l1)*n2+u2;
02110       for(l2=0; l2<2*m+1-o2; l2++)
02111         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02112       gj=g+((u0+l0)*n1+l1)*n2;
02113       for(l2=0; l2<=o2; l2++)
02114         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02115     }
02116       }
02117 
02118     for(l0=0; l0<=o0; l0++,psij0++)
02119       {
02120         psij1=psij_const1;
02121         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02122     {
02123       psij2=psij_const2;
02124       gj=g+(l0*n1+(u1+l1))*n2+u2;
02125       for(l2=0; l2<2*m+1-o2; l2++)
02126         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02127       gj=g+(l0*n1+(u1+l1))*n2;
02128       for(l2=0; l2<=o2; l2++)
02129         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02130     }
02131         for(l1=0; l1<=o1; l1++,psij1++)
02132     {
02133       psij2=psij_const2;
02134       gj=g+(l0*n1+l1)*n2+u2;
02135       for(l2=0; l2<2*m+1-o2; l2++)
02136         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02137       gj=g+(l0*n1+l1)*n2;
02138       for(l2=0; l2<=o2; l2++)
02139         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02140     }
02141       }
02142   }
02143 }
02144 
02145 
02146 static void nfft_trafo_3d_B(nfft_plan *ths)
02147 {
02148   int n0,N0,n1,N1,n2,N2,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
02149   double _Complex *fj,*g;
02150   double *psij, *psij_const, *xj, ip_y, ip_w;
02151 
02152   double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
02153 
02154   N0=ths->N[0];
02155   n0=ths->n[0];
02156   N1=ths->N[1];
02157   n1=ths->n[1];
02158   N2=ths->N[2];
02159   n2=ths->n[2];
02160   M=ths->M_total;
02161   m=ths->m;
02162 
02163   g=ths->g;
02164 
02165   if(ths->nfft_flags & PRE_FULL_PSI)
02166     {
02167       psi_index_g=ths->psi_index_g;
02168       for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
02169         for(l=1, (*fj)=(*psij++) * g[(*psi_index_g++)]; l<(2*m+2)*(2*m+2)*(2*m+2); l++)
02170     (*fj) += (*psij++) * g[(*psi_index_g++)];
02171       return;
02172     } /* if(PRE_FULL_PSI) */
02173 
02174   if(ths->nfft_flags & PRE_PSI)
02175     {
02176       for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=3)
02177   nfft_trafo_3d_compute(fj, g, ths->psi+j*3*(2*m+2), ths->psi+(j*3+1)*(2*m+2), ths->psi+(j*3+2)*(2*m+2), xj, xj+1, xj+2, n0, n1, n2, m);
02178       return;
02179     } /* if(PRE_PSI) */
02180 
02181   if(ths->nfft_flags & PRE_FG_PSI)
02182     {
02183       psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02184       fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02185 
02186       nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02187       nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02188       nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02189 
02190       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02191   {
02192     fg_psij0 = ths->psi[2*j*3];
02193     fg_psij1 = ths->psi[2*j*3+1];
02194     fg_psij2 = 1.0;
02195     psij_const[0] = fg_psij0;
02196     for(l=1; l<=2*m+1; l++)
02197       {
02198         fg_psij2 *= fg_psij1;
02199         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02200       }
02201 
02202     fg_psij0 = ths->psi[2*(j*3+1)];
02203     fg_psij1 = ths->psi[2*(j*3+1)+1];
02204     fg_psij2 = 1.0;
02205     psij_const[2*m+2] = fg_psij0;
02206     for(l=1; l<=2*m+1; l++)
02207       {
02208         fg_psij2 *= fg_psij1;
02209         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02210       }
02211 
02212     fg_psij0 = ths->psi[2*(j*3+2)];
02213     fg_psij1 = ths->psi[2*(j*3+2)+1];
02214     fg_psij2 = 1.0;
02215     psij_const[2*(2*m+2)] = fg_psij0;
02216     for(l=1; l<=2*m+1; l++)
02217       {
02218         fg_psij2 *= fg_psij1;
02219         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02220       }
02221 
02222     nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02223   }
02224       nfft_free(fg_exp_l);
02225       nfft_free(psij_const);
02226       return;
02227     } /* if(PRE_FG_PSI) */
02228 
02229   if(ths->nfft_flags & FG_PSI)
02230     {
02231       psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02232       fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02233 
02234       nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02235       nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02236       nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02237 
02238       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02239   {
02240     nfft_uo(ths,j,&u,&o,0);
02241     fg_psij0 = (PHI(*xj-((double)u)/n0,0));
02242     fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
02243     fg_psij2 = 1.0;
02244     psij_const[0] = fg_psij0;
02245     for(l=1; l<=2*m+1; l++)
02246       {
02247         fg_psij2 *= fg_psij1;
02248         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02249       }
02250 
02251     nfft_uo(ths,j,&u,&o,1);
02252     fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
02253     fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
02254     fg_psij2 = 1.0;
02255     psij_const[2*m+2] = fg_psij0;
02256     for(l=1; l<=2*m+1; l++)
02257       {
02258         fg_psij2 *= fg_psij1;
02259         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02260       }
02261 
02262     nfft_uo(ths,j,&u,&o,2);
02263     fg_psij0 = (PHI(*(xj+2)-((double)u)/n2,2));
02264     fg_psij1 = exp(2.0*(n2*(*(xj+2)) - u)/ths->b[2]);
02265     fg_psij2 = 1.0;
02266     psij_const[2*(2*m+2)] = fg_psij0;
02267     for(l=1; l<=2*m+1; l++)
02268       {
02269         fg_psij2 *= fg_psij1;
02270         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02271       }
02272 
02273     nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02274   }
02275       nfft_free(fg_exp_l);
02276       nfft_free(psij_const);
02277       return;
02278     } /* if(FG_PSI) */
02279 
02280   if(ths->nfft_flags & PRE_LIN_PSI)
02281     {
02282       psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02283       K=ths->K;
02284       ip_s=K/(m+1);
02285 
02286       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02287   {
02288     nfft_uo(ths,j,&u,&o,0);
02289     ip_y = fabs(n0*(*(xj+0)) - u)*((double)K)/(m+1);
02290     ip_u = LRINT(floor(ip_y));
02291     ip_w = ip_y-ip_u;
02292     for(l=0; l < 2*m+2; l++)
02293       psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02294         ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
02295 
02296     nfft_uo(ths,j,&u,&o,1);
02297     ip_y = fabs(n1*(*(xj+1)) - u)*((double)K)/(m+1);
02298     ip_u = LRINT(floor(ip_y));
02299     ip_w = ip_y-ip_u;
02300     for(l=0; l < 2*m+2; l++)
02301       psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02302         ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02303 
02304     nfft_uo(ths,j,&u,&o,2);
02305     ip_y = fabs(n2*(*(xj+2)) - u)*((double)K)/(m+1);
02306     ip_u = LRINT(floor(ip_y));
02307     ip_w = ip_y-ip_u;
02308     for(l=0; l < 2*m+2; l++)
02309       psij_const[2*(2*m+2)+l] = ths->psi[2*(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02310         ths->psi[2*(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02311 
02312     nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02313   }
02314       nfft_free(psij_const);
02315       return;
02316     } /* if(PRE_LIN_PSI) */
02317 
02318   /* no precomputed psi at all */
02319   psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02320   for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02321     {
02322       nfft_uo(ths,j,&u,&o,0);
02323       for(l=0;l<=2*m+1;l++)
02324   psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
02325 
02326       nfft_uo(ths,j,&u,&o,1);
02327       for(l=0;l<=2*m+1;l++)
02328   psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
02329 
02330       nfft_uo(ths,j,&u,&o,2);
02331       for(l=0;l<=2*m+1;l++)
02332   psij_const[2*(2*m+2)+l]=(PHI(*(xj+2)-((double)((u+l)))/n2,2));
02333 
02334       nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02335     }
02336   nfft_free(psij_const);
02337 }
02338 
02339 static void nfft_adjoint_3d_B(nfft_plan *ths)
02340 {
02341   int n0,N0,n1,N1,n2,N2,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
02342   double _Complex *fj,*g;
02343   double *psij, *psij_const, *xj, ip_y, ip_w;
02344 
02345   double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
02346 
02347   N0=ths->N[0];
02348   n0=ths->n[0];
02349   N1=ths->N[1];
02350   n1=ths->n[1];
02351   N2=ths->N[2];
02352   n2=ths->n[2];
02353   M=ths->M_total;
02354   m=ths->m;
02355 
02356   g=ths->g;
02357   memset(g,0,ths->n_total*sizeof(double _Complex));
02358 
02359   if(ths->nfft_flags & PRE_FULL_PSI)
02360     {
02361       psi_index_g=ths->psi_index_g;
02362       for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
02363         for(l=1, g[(*psi_index_g++)]=(*psij++) * (*fj); l<(2*m+2)*(2*m+2)*(2*m+2); l++)
02364     g[(*psi_index_g++)] += (*psij++) * (*fj);
02365       return;
02366     } /* if(PRE_FULL_PSI) */
02367 
02368   if(ths->nfft_flags & PRE_PSI)
02369     {
02370       for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=3)
02371   nfft_adjoint_3d_compute(fj, g, ths->psi+j*3*(2*m+2), ths->psi+(j*3+1)*(2*m+2), ths->psi+(j*3+2)*(2*m+2), xj, xj+1, xj+2, n0, n1, n2, m);
02372       return;
02373     } /* if(PRE_PSI) */
02374 
02375   if(ths->nfft_flags & PRE_FG_PSI)
02376     {
02377       psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02378       fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02379 
02380       nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02381       nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02382       nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02383 
02384       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02385   {
02386     fg_psij0 = ths->psi[2*j*3];
02387     fg_psij1 = ths->psi[2*j*3+1];
02388     fg_psij2 = 1.0;
02389     psij_const[0] = fg_psij0;
02390     for(l=1; l<=2*m+1; l++)
02391       {
02392         fg_psij2 *= fg_psij1;
02393         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02394       }
02395 
02396     fg_psij0 = ths->psi[2*(j*3+1)];
02397     fg_psij1 = ths->psi[2*(j*3+1)+1];
02398     fg_psij2 = 1.0;
02399     psij_const[2*m+2] = fg_psij0;
02400     for(l=1; l<=2*m+1; l++)
02401       {
02402         fg_psij2 *= fg_psij1;
02403         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02404       }
02405 
02406     fg_psij0 = ths->psi[2*(j*3+2)];
02407     fg_psij1 = ths->psi[2*(j*3+2)+1];
02408     fg_psij2 = 1.0;
02409     psij_const[2*(2*m+2)] = fg_psij0;
02410     for(l=1; l<=2*m+1; l++)
02411       {
02412         fg_psij2 *= fg_psij1;
02413         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02414       }
02415 
02416     nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02417   }
02418       nfft_free(fg_exp_l);
02419       nfft_free(psij_const);
02420       return;
02421     } /* if(PRE_FG_PSI) */
02422 
02423   if(ths->nfft_flags & FG_PSI)
02424     {
02425       psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02426       fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02427 
02428       nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02429       nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02430       nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02431 
02432       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02433   {
02434     nfft_uo(ths,j,&u,&o,0);
02435     fg_psij0 = (PHI(*xj-((double)u)/n0,0));
02436     fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
02437     fg_psij2 = 1.0;
02438     psij_const[0] = fg_psij0;
02439     for(l=1; l<=2*m+1; l++)
02440       {
02441         fg_psij2 *= fg_psij1;
02442         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02443       }
02444 
02445     nfft_uo(ths,j,&u,&o,1);
02446     fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
02447     fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
02448     fg_psij2 = 1.0;
02449     psij_const[2*m+2] = fg_psij0;
02450     for(l=1; l<=2*m+1; l++)
02451       {
02452         fg_psij2 *= fg_psij1;
02453         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02454       }
02455 
02456     nfft_uo(ths,j,&u,&o,2);
02457     fg_psij0 = (PHI(*(xj+2)-((double)u)/n2,2));
02458     fg_psij1 = exp(2.0*(n2*(*(xj+2)) - u)/ths->b[2]);
02459     fg_psij2 = 1.0;
02460     psij_const[2*(2*m+2)] = fg_psij0;
02461     for(l=1; l<=2*m+1; l++)
02462       {
02463         fg_psij2 *= fg_psij1;
02464         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02465       }
02466 
02467     nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02468   }
02469       nfft_free(fg_exp_l);
02470       nfft_free(psij_const);
02471       return;
02472     } /* if(FG_PSI) */
02473 
02474   if(ths->nfft_flags & PRE_LIN_PSI)
02475     {
02476       psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02477       K=ths->K;
02478       ip_s=K/(m+1);
02479 
02480       for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02481   {
02482     nfft_uo(ths,j,&u,&o,0);
02483     ip_y = fabs(n0*(*(xj+0)) - u)*((double)K)/(m+1);
02484     ip_u = LRINT(floor(ip_y));
02485     ip_w = ip_y-ip_u;
02486     for(l=0; l < 2*m+2; l++)
02487       psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02488         ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
02489 
02490     nfft_uo(ths,j,&u,&o,1);
02491     ip_y = fabs(n1*(*(xj+1)) - u)*((double)K)/(m+1);
02492     ip_u = LRINT(floor(ip_y));
02493     ip_w = ip_y-ip_u;
02494     for(l=0; l < 2*m+2; l++)
02495       psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02496         ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02497 
02498     nfft_uo(ths,j,&u,&o,2);
02499     ip_y = fabs(n2*(*(xj+2)) - u)*((double)K)/(m+1);
02500     ip_u = LRINT(floor(ip_y));
02501     ip_w = ip_y-ip_u;
02502     for(l=0; l < 2*m+2; l++)
02503       psij_const[2*(2*m+2)+l] = ths->psi[2*(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02504         ths->psi[2*(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02505 
02506     nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02507   }
02508       nfft_free(psij_const);
02509       return;
02510     } /* if(PRE_LIN_PSI) */
02511 
02512   /* no precomputed psi at all */
02513   psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02514   for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02515     {
02516       nfft_uo(ths,j,&u,&o,0);
02517       for(l=0;l<=2*m+1;l++)
02518   psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
02519 
02520       nfft_uo(ths,j,&u,&o,1);
02521       for(l=0;l<=2*m+1;l++)
02522   psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
02523 
02524       nfft_uo(ths,j,&u,&o,2);
02525       for(l=0;l<=2*m+1;l++)
02526   psij_const[2*(2*m+2)+l]=(PHI(*(xj+2)-((double)((u+l)))/n2,2));
02527 
02528       nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02529     }
02530   nfft_free(psij_const);
02531 }
02532 
02533 void nfft_trafo_3d(nfft_plan *ths)
02534 {
02535   int k0,k1,k2,n0,n1,n2,N0,N1,N2;
02536   double _Complex *g_hat,*f_hat;
02537   double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12, *c_phi_inv21, *c_phi_inv22;
02538   double ck01, ck02, ck11, ck12, ck21, ck22;
02539   double _Complex *g_hat111,*f_hat111,*g_hat211,*f_hat211,*g_hat121,*f_hat121,*g_hat221,*f_hat221;
02540   double _Complex *g_hat112,*f_hat112,*g_hat212,*f_hat212,*g_hat122,*f_hat122,*g_hat222,*f_hat222;
02541 
02542   ths->g_hat=ths->g1;
02543   ths->g=ths->g2;
02544 
02545   N0=ths->N[0];
02546   N1=ths->N[1];
02547   N2=ths->N[2];
02548   n0=ths->n[0];
02549   n1=ths->n[1];
02550   n2=ths->n[2];
02551 
02552   f_hat=ths->f_hat;
02553   g_hat=ths->g_hat;
02554 
02555   TIC(0)
02556   memset(ths->g_hat,0,ths->n_total*sizeof(double _Complex));
02557   if(ths->nfft_flags & PRE_PHI_HUT)
02558     {
02559       c_phi_inv01=ths->c_phi_inv[0];
02560       c_phi_inv02=&ths->c_phi_inv[0][N0/2];
02561 
02562       for(k0=0;k0<N0/2;k0++)
02563   {
02564     ck01=(*c_phi_inv01++);
02565     ck02=(*c_phi_inv02++);
02566     c_phi_inv11=ths->c_phi_inv[1];
02567     c_phi_inv12=&ths->c_phi_inv[1][N1/2];
02568 
02569     for(k1=0;k1<N1/2;k1++)
02570       {
02571         ck11=(*c_phi_inv11++);
02572         ck12=(*c_phi_inv12++);
02573         c_phi_inv21=ths->c_phi_inv[2];
02574         c_phi_inv22=&ths->c_phi_inv[2][N2/2];
02575 
02576         g_hat111=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2;
02577         f_hat111=f_hat + (k0*N1+k1)*N2;
02578         g_hat211=g_hat + (k0*n1+n1-N1/2+k1)*n2+n2-N2/2;
02579         f_hat211=f_hat + ((N0/2+k0)*N1+k1)*N2;
02580         g_hat121=g_hat + ((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2;
02581         f_hat121=f_hat + (k0*N1+N1/2+k1)*N2;
02582         g_hat221=g_hat + (k0*n1+k1)*n2+n2-N2/2;
02583         f_hat221=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2;
02584 
02585         g_hat112=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2;
02586         f_hat112=f_hat + (k0*N1+k1)*N2+N2/2;
02587         g_hat212=g_hat + (k0*n1+n1-N1/2+k1)*n2;
02588         f_hat212=f_hat + ((N0/2+k0)*N1+k1)*N2+N2/2;
02589         g_hat122=g_hat + ((n0-N0/2+k0)*n1+k1)*n2;
02590         f_hat122=f_hat + (k0*N1+N1/2+k1)*N2+N2/2;
02591         g_hat222=g_hat + (k0*n1+k1)*n2;
02592         f_hat222=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2+N2/2;
02593 
02594         for(k2=0;k2<N2/2;k2++)
02595     {
02596       ck21=(*c_phi_inv21++);
02597       ck22=(*c_phi_inv22++);
02598 
02599       (*g_hat111++) = (*f_hat111++) * ck01 * ck11 * ck21;
02600       (*g_hat211++) = (*f_hat211++) * ck02 * ck11 * ck21;
02601       (*g_hat121++) = (*f_hat121++) * ck01 * ck12 * ck21;
02602       (*g_hat221++) = (*f_hat221++) * ck02 * ck12 * ck21;
02603 
02604       (*g_hat112++) = (*f_hat112++) * ck01 * ck11 * ck22;
02605       (*g_hat212++) = (*f_hat212++) * ck02 * ck11 * ck22;
02606       (*g_hat122++) = (*f_hat122++) * ck01 * ck12 * ck22;
02607       (*g_hat222++) = (*f_hat222++) * ck02 * ck12 * ck22;
02608     }
02609       }
02610   }
02611     }
02612   else
02613     for(k0=0;k0<N0/2;k0++)
02614       {
02615   ck01=1./(PHI_HUT(k0-N0/2,0));
02616   ck02=1./(PHI_HUT(k0,0));
02617   for(k1=0;k1<N1/2;k1++)
02618     {
02619       ck11=1./(PHI_HUT(k1-N1/2,1));
02620       ck12=1./(PHI_HUT(k1,1));
02621 
02622       for(k2=0;k2<N2/2;k2++)
02623         {
02624     ck21=1./(PHI_HUT(k2-N2/2,2));
02625     ck22=1./(PHI_HUT(k2,2));
02626 
02627     g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] = f_hat[(k0*N1+k1)*N2+k2]                  * ck01 * ck11 * ck21;
02628     g_hat[(k0*n1+n1-N1/2+k1)*n2+n2-N2/2+k2]           = f_hat[((N0/2+k0)*N1+k1)*N2+k2]           * ck02 * ck11 * ck21;
02629     g_hat[((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2+k2]         = f_hat[(k0*N1+N1/2+k1)*N2+k2]             * ck01 * ck12 * ck21;
02630     g_hat[(k0*n1+k1)*n2+n2-N2/2+k2]                   = f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+k2]      * ck02 * ck12 * ck21;
02631 
02632     g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+k2]         = f_hat[(k0*N1+k1)*N2+N2/2+k2]             * ck01 * ck11 * ck22;
02633     g_hat[(k0*n1+n1-N1/2+k1)*n2+k2]                   = f_hat[((N0/2+k0)*N1+k1)*N2+N2/2+k2]      * ck02 * ck11 * ck22;
02634     g_hat[((n0-N0/2+k0)*n1+k1)*n2+k2]                 = f_hat[(k0*N1+N1/2+k1)*N2+N2/2+k2]        * ck01 * ck12 * ck22;
02635     g_hat[(k0*n1+k1)*n2+k2]                           = f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+N2/2+k2] * ck02 * ck12 * ck22;
02636         }
02637     }
02638       }
02639 
02640   TOC(0)
02641 
02642   TIC_FFTW(1)
02643   fftw_execute(ths->my_fftw_plan1);
02644   TOC_FFTW(1);
02645 
02646   TIC(2);
02647   nfft_trafo_3d_B(ths);
02648   TOC(2);
02649 }
02650 
02651 void nfft_adjoint_3d(nfft_plan *ths)
02652 {
02653   int k0,k1,k2,n0,n1,n2,N0,N1,N2;
02654   double _Complex *g_hat,*f_hat;
02655   double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12, *c_phi_inv21, *c_phi_inv22;
02656   double ck01, ck02, ck11, ck12, ck21, ck22;
02657   double _Complex *g_hat111,*f_hat111,*g_hat211,*f_hat211,*g_hat121,*f_hat121,*g_hat221,*f_hat221;
02658   double _Complex *g_hat112,*f_hat112,*g_hat212,*f_hat212,*g_hat122,*f_hat122,*g_hat222,*f_hat222;
02659 
02660   ths->g_hat=ths->g1;
02661   ths->g=ths->g2;
02662 
02663   N0=ths->N[0];
02664   N1=ths->N[1];
02665   N2=ths->N[2];
02666   n0=ths->n[0];
02667   n1=ths->n[1];
02668   n2=ths->n[2];
02669 
02670   f_hat=ths->f_hat;
02671   g_hat=ths->g_hat;
02672 
02673   TIC(2);
02674   nfft_adjoint_3d_B(ths);
02675   TOC(2);
02676 
02677   TIC_FFTW(1)
02678   fftw_execute(ths->my_fftw_plan2);
02679   TOC_FFTW(1);
02680 
02681   TIC(0)
02682   if(ths->nfft_flags & PRE_PHI_HUT)
02683     {
02684       c_phi_inv01=ths->c_phi_inv[0];
02685       c_phi_inv02=&ths->c_phi_inv[0][N0/2];
02686 
02687       for(k0=0;k0<N0/2;k0++)
02688   {
02689     ck01=(*c_phi_inv01++);
02690     ck02=(*c_phi_inv02++);
02691     c_phi_inv11=ths->c_phi_inv[1];
02692     c_phi_inv12=&ths->c_phi_inv[1][N1/2];
02693 
02694     for(k1=0;k1<N1/2;k1++)
02695       {
02696         ck11=(*c_phi_inv11++);
02697         ck12=(*c_phi_inv12++);
02698         c_phi_inv21=ths->c_phi_inv[2];
02699         c_phi_inv22=&ths->c_phi_inv[2][N2/2];
02700 
02701         g_hat111=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2;
02702         f_hat111=f_hat + (k0*N1+k1)*N2;
02703         g_hat211=g_hat + (k0*n1+n1-N1/2+k1)*n2+n2-N2/2;
02704         f_hat211=f_hat + ((N0/2+k0)*N1+k1)*N2;
02705         g_hat121=g_hat + ((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2;
02706         f_hat121=f_hat + (k0*N1+N1/2+k1)*N2;
02707         g_hat221=g_hat + (k0*n1+k1)*n2+n2-N2/2;
02708         f_hat221=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2;
02709 
02710         g_hat112=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2;
02711         f_hat112=f_hat + (k0*N1+k1)*N2+N2/2;
02712         g_hat212=g_hat + (k0*n1+n1-N1/2+k1)*n2;
02713         f_hat212=f_hat + ((N0/2+k0)*N1+k1)*N2+N2/2;
02714         g_hat122=g_hat + ((n0-N0/2+k0)*n1+k1)*n2;
02715         f_hat122=f_hat + (k0*N1+N1/2+k1)*N2+N2/2;
02716         g_hat222=g_hat + (k0*n1+k1)*n2;
02717         f_hat222=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2+N2/2;
02718 
02719         for(k2=0;k2<N2/2;k2++)
02720     {
02721       ck21=(*c_phi_inv21++);
02722       ck22=(*c_phi_inv22++);
02723 
02724       (*f_hat111++) = (*g_hat111++) * ck01 * ck11 * ck21;
02725       (*f_hat211++) = (*g_hat211++) * ck02 * ck11 * ck21;
02726       (*f_hat121++) = (*g_hat121++) * ck01 * ck12 * ck21;
02727       (*f_hat221++) = (*g_hat221++) * ck02 * ck12 * ck21;
02728 
02729       (*f_hat112++) = (*g_hat112++) * ck01 * ck11 * ck22;
02730       (*f_hat212++) = (*g_hat212++) * ck02 * ck11 * ck22;
02731       (*f_hat122++) = (*g_hat122++) * ck01 * ck12 * ck22;
02732       (*f_hat222++) = (*g_hat222++) * ck02 * ck12 * ck22;
02733     }
02734       }
02735   }
02736     }
02737   else
02738     for(k0=0;k0<N0/2;k0++)
02739       {
02740   ck01=1./(PHI_HUT(k0-N0/2,0));
02741   ck02=1./(PHI_HUT(k0,0));
02742   for(k1=0;k1<N1/2;k1++)
02743     {
02744       ck11=1./(PHI_HUT(k1-N1/2,1));
02745       ck12=1./(PHI_HUT(k1,1));
02746 
02747       for(k2=0;k2<N2/2;k2++)
02748         {
02749     ck21=1./(PHI_HUT(k2-N2/2,2));
02750     ck22=1./(PHI_HUT(k2,2));
02751 
02752     f_hat[(k0*N1+k1)*N2+k2]                  = g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] * ck01 * ck11 * ck21;
02753     f_hat[((N0/2+k0)*N1+k1)*N2+k2]           = g_hat[(k0*n1+n1-N1/2+k1)*n2+n2-N2/2+k2]           * ck02 * ck11 * ck21;
02754     f_hat[(k0*N1+N1/2+k1)*N2+k2]             = g_hat[((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2+k2]         * ck01 * ck12 * ck21;
02755     f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+k2]      = g_hat[(k0*n1+k1)*n2+n2-N2/2+k2]                   * ck02 * ck12 * ck21;
02756 
02757     f_hat[(k0*N1+k1)*N2+N2/2+k2]             = g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+k2]         * ck01 * ck11 * ck22;
02758     f_hat[((N0/2+k0)*N1+k1)*N2+N2/2+k2]      = g_hat[(k0*n1+n1-N1/2+k1)*n2+k2]                   * ck02 * ck11 * ck22;
02759     f_hat[(k0*N1+N1/2+k1)*N2+N2/2+k2]        = g_hat[((n0-N0/2+k0)*n1+k1)*n2+k2]                 * ck01 * ck12 * ck22;
02760     f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+N2/2+k2] = g_hat[(k0*n1+k1)*n2+k2]                           * ck02 * ck12 * ck22;
02761         }
02762     }
02763       }
02764 
02765   TOC(0)
02766 }
02767 
02770 void nfft_trafo(nfft_plan *ths)
02771 {
02772   switch(ths->d)
02773     {
02774     case 1: nfft_trafo_1d(ths); break;
02775     case 2: nfft_trafo_2d(ths); break;
02776     case 3: nfft_trafo_3d(ths); break;
02777     default:
02778     /* use ths->my_fftw_plan1 */
02779     ths->g_hat=ths->g1;
02780     ths->g=ths->g2;
02781 
02785     TIC(0)
02786     nfft_D_A(ths);
02787     TOC(0)
02788 
02789     
02793     TIC_FFTW(1)
02794     fftw_execute(ths->my_fftw_plan1);
02795     TOC_FFTW(1)
02796 
02797     
02800     TIC(2)
02801     nfft_B_A(ths);
02802     TOC(2)
02803     }
02804 } /* nfft_trafo */
02805 
02806 void nfft_adjoint(nfft_plan *ths)
02807 {
02808   switch(ths->d)
02809     {
02810     case 1: nfft_adjoint_1d(ths); break;
02811     case 2: nfft_adjoint_2d(ths); break;
02812     case 3: nfft_adjoint_3d(ths); break;
02813     default:
02814       /* use ths->my_fftw_plan2 */
02815       ths->g_hat=ths->g1;
02816       ths->g=ths->g2;
02817       
02821       TIC(2)
02822       nfft_B_T(ths);
02823       TOC(2)
02824   
02825       
02829       TIC_FFTW(1)
02830       fftw_execute(ths->my_fftw_plan2);
02831       TOC_FFTW(1)
02832   
02833       
02836       TIC(0)
02837       nfft_D_T(ths);
02838       TOC(0)
02839     }
02840 } /* nfft_adjoint */
02841 
02842 
02845 static void nfft_precompute_phi_hut(nfft_plan *ths)
02846 {
02847   int ks[ths->d];                       
02848   int t;                                
02850   ths->c_phi_inv = (double**) nfft_malloc(ths->d*sizeof(double*));
02851 
02852   for(t=0; t<ths->d; t++)
02853     {
02854       ths->c_phi_inv[t]= (double*)nfft_malloc(ths->N[t]*sizeof(double));
02855       for(ks[t]=0; ks[t]<ths->N[t]; ks[t]++)
02856   ths->c_phi_inv[t][ks[t]]= 1.0/(PHI_HUT(ks[t]-ths->N[t]/2,t));
02857     }
02858 } /* nfft_phi_hut */
02859 
02865 void nfft_precompute_lin_psi(nfft_plan *ths)
02866 {
02867   int t;                                
02868   int j;                                
02869   double step;                          
02871   for (t=0; t<ths->d; t++)
02872     {
02873       step=((double)(ths->m+1))/(ths->K*ths->n[t]);
02874       for(j=0;j<=ths->K;j++)
02875   {
02876     ths->psi[(ths->K+1)*t + j] = PHI(j*step,t);
02877   } /* for(j) */
02878     } /* for(t) */
02879 }
02880 
02881 static void nfft_precompute_fg_psi(nfft_plan *ths)
02882 {
02883   int t;                                
02884   int j;                                
02885   int u, o;                             
02887   for (t=0; t<ths->d; t++)
02888     for(j=0;j<ths->M_total;j++)
02889       {
02890   nfft_uo(ths,j,&u,&o,t);
02891 
02892         ths->psi[2*(j*ths->d+t)]=
02893             (PHI((ths->x[j*ths->d+t]-((double)u)/ths->n[t]),t));
02894 
02895         ths->psi[2*(j*ths->d+t)+1]=
02896             exp(2.0*(ths->n[t]*ths->x[j*ths->d+t] - u) / ths->b[t]);
02897       } /* for(j) */
02898   /* for(t) */
02899 } /* nfft_precompute_fg_psi */
02900 
02901 void nfft_precompute_psi(nfft_plan *ths)
02902 {
02903   int t;                                
02904   int j;                                
02905   int l;                                
02906   int lj;                               
02907   int u, o;                             
02909   for (t=0; t<ths->d; t++)
02910     for(j=0;j<ths->M_total;j++)
02911       {
02912   nfft_uo(ths,j,&u,&o,t);
02913 
02914   for(l=u, lj=0; l <= o; l++, lj++)
02915     ths->psi[(j*ths->d+t)*(2*ths->m+2)+lj]=
02916       (PHI((ths->x[j*ths->d+t]-((double)l)/ths->n[t]),t));
02917       } /* for(j) */
02918   /* for(t) */
02919 } /* nfft_precompute_psi */
02920 
02921 void nfft_precompute_full_psi(nfft_plan *ths)
02922 {
02923   int t,t2;                             
02924   int j;                                
02925   int l_L;                              
02926   int l[ths->d];                        
02927   int lj[ths->d];                       
02928   int ll_plain[ths->d+1];               
02929   int lprod;                            
02930   int u[ths->d], o[ths->d];             
02932   double phi_prod[ths->d+1];
02933 
02934   int ix,ix_old;
02935 
02936   phi_prod[0]=1;
02937   ll_plain[0]=0;
02938 
02939   for(t=0,lprod = 1; t<ths->d; t++)
02940       lprod *= 2*ths->m+2;
02941 
02942   for(j=0,ix=0,ix_old=0; j<ths->M_total; j++)
02943     {
02944       MACRO_init_uo_l_lj_t;
02945 
02946       for(l_L=0; l_L<lprod; l_L++, ix++)
02947   {
02948     MACRO_update_phi_prod_ll_plain(without_PRE_PSI);
02949 
02950     ths->psi_index_g[ix]=ll_plain[ths->d];
02951     ths->psi[ix]=phi_prod[ths->d];
02952 
02953     MACRO_count_uo_l_lj_t;
02954   } /* for(l_L) */
02955 
02956 
02957       ths->psi_index_f[j]=ix-ix_old;
02958       ix_old=ix;
02959     } /* for(j) */
02960 }
02961 
02962 void nfft_precompute_one_psi(nfft_plan *ths)
02963 {
02964   if(ths->nfft_flags & PRE_LIN_PSI)
02965     nfft_precompute_lin_psi(ths);
02966   if(ths->nfft_flags & PRE_FG_PSI)
02967     nfft_precompute_fg_psi(ths);
02968   if(ths->nfft_flags & PRE_PSI)
02969     nfft_precompute_psi(ths);
02970   if(ths->nfft_flags & PRE_FULL_PSI)
02971     nfft_precompute_full_psi(ths);
02972 }
02973 
02974 
02975 static void nfft_init_help(nfft_plan *ths)
02976 {
02977   int t;                                
02978   int lprod;                            
02980   ths->N_total=nfft_prod_int(ths->N, ths->d);
02981   ths->n_total=nfft_prod_int(ths->n, ths->d);
02982 
02983   ths->sigma = (double*) nfft_malloc(ths->d*sizeof(double));
02984   for(t = 0;t < ths->d; t++)
02985     ths->sigma[t] = ((double)ths->n[t])/ths->N[t];
02986 
02987   WINDOW_HELP_INIT;
02988 
02989   if(ths->nfft_flags & MALLOC_X)
02990     ths->x = (double*)nfft_malloc(ths->d*ths->M_total*sizeof(double));
02991 
02992   if(ths->nfft_flags & MALLOC_F_HAT)
02993     ths->f_hat = (double _Complex*)nfft_malloc(ths->N_total*sizeof(double _Complex));
02994 
02995   if(ths->nfft_flags & MALLOC_F)
02996     ths->f = (double _Complex*)nfft_malloc(ths->M_total*sizeof(double _Complex));
02997 
02998   if(ths->nfft_flags & PRE_PHI_HUT)
02999     nfft_precompute_phi_hut(ths);
03000 
03001   if(ths->nfft_flags & PRE_LIN_PSI)
03002   {
03003       ths->K=(1U<< 10)*(ths->m+1);
03004       ths->psi = (double*) nfft_malloc((ths->K+1)*ths->d*sizeof(double));
03005   }
03006 
03007   if(ths->nfft_flags & PRE_FG_PSI)
03008     ths->psi = (double*) nfft_malloc(ths->M_total*ths->d*2*sizeof(double));
03009 
03010   if(ths->nfft_flags & PRE_PSI)
03011     ths->psi = (double*) nfft_malloc(ths->M_total*ths->d*
03012              (2*ths->m+2)*sizeof(double));
03013 
03014   if(ths->nfft_flags & PRE_FULL_PSI)
03015   {
03016       for(t=0,lprod = 1; t<ths->d; t++)
03017     lprod *= 2*ths->m+2;
03018 
03019       ths->psi = (double*) nfft_malloc(ths->M_total*lprod*sizeof(double));
03020 
03021       ths->psi_index_f = (int*) nfft_malloc(ths->M_total*sizeof(int));
03022       ths->psi_index_g = (int*) nfft_malloc(ths->M_total*lprod*sizeof(int));
03023   }
03024 
03025   if(ths->nfft_flags & FFTW_INIT)
03026   {
03027     ths->g1=(double _Complex*)nfft_malloc(ths->n_total*sizeof(double _Complex));
03028 
03029     if(ths->nfft_flags & FFT_OUT_OF_PLACE)
03030       ths->g2 = (double _Complex*) nfft_malloc(ths->n_total*sizeof(double _Complex));
03031     else
03032       ths->g2 = ths->g1;
03033 
03034     ths->my_fftw_plan1 = fftw_plan_dft(ths->d, ths->n, ths->g1, ths->g2, FFTW_FORWARD, ths->fftw_flags);
03035     ths->my_fftw_plan2 = fftw_plan_dft(ths->d, ths->n, ths->g2, ths->g1,
03036       FFTW_BACKWARD, ths->fftw_flags);
03037   }
03038 
03039   ths->mv_trafo = (void (*) (void* ))nfft_trafo;
03040   ths->mv_adjoint = (void (*) (void* ))nfft_adjoint;
03041 }
03042 
03043 void nfft_init(nfft_plan *ths, int d, int *N, int M_total)
03044 {
03045   int t;                                
03047   ths->d = d;
03048 
03049   ths->N=(int*) nfft_malloc(d*sizeof(int));
03050   for(t = 0;t < d; t++)
03051     ths->N[t] = N[t];
03052 
03053   ths->M_total = M_total;
03054 
03055   ths->n = (int*) nfft_malloc(d*sizeof(int));
03056   for(t = 0;t < d; t++)
03057     ths->n[t] = 2*nfft_next_power_of_2(ths->N[t]);
03058 
03059   WINDOW_HELP_ESTIMATE_m;
03060 
03061   ths->nfft_flags = PRE_PHI_HUT| PRE_PSI| MALLOC_X| MALLOC_F_HAT| MALLOC_F|
03062                     FFTW_INIT| FFT_OUT_OF_PLACE;
03063   ths->fftw_flags= FFTW_ESTIMATE| FFTW_DESTROY_INPUT;
03064 
03065   nfft_init_help(ths);
03066 }
03067 
03068 void nfft_init_guru(nfft_plan *ths, int d, int *N, int M_total, int *n,
03069       int m, unsigned nfft_flags, unsigned fftw_flags)
03070 {
03071   int t;                                
03073   ths->d =d;
03074   ths->N= (int*) nfft_malloc(ths->d*sizeof(int));
03075   for(t=0; t<d; t++)
03076     ths->N[t]= N[t];
03077   ths->M_total= M_total;
03078   ths->n= (int*) nfft_malloc(ths->d*sizeof(int));
03079   for(t=0; t<d; t++)
03080     ths->n[t]= n[t];
03081   ths->m= m;
03082   ths->nfft_flags= nfft_flags;
03083   ths->fftw_flags= fftw_flags;
03084 
03085   nfft_init_help(ths);
03086 }
03087 
03088 void nfft_init_1d(nfft_plan *ths, int N1, int M_total)
03089 {
03090   int N[1];
03091 
03092   N[0]=N1;
03093   nfft_init(ths,1,N,M_total);
03094 }
03095 
03096 void nfft_init_2d(nfft_plan *ths, int N1, int N2, int M_total)
03097 {
03098   int N[2];
03099 
03100   N[0]=N1;
03101   N[1]=N2;
03102   nfft_init(ths,2,N,M_total);
03103 }
03104 
03105 void nfft_init_3d(nfft_plan *ths, int N1, int N2, int N3, int M_total)
03106 {
03107   int N[3];
03108 
03109   N[0]=N1;
03110   N[1]=N2;
03111   N[2]=N3;
03112   nfft_init(ths,3,N,M_total);
03113 }
03114 
03115 void nfft_check(nfft_plan *ths)
03116 {
03117   int j;
03118 
03119   for(j=0;j<ths->M_total*ths->d;j++)
03120     if((ths->x[j]<-0.5) || (ths->x[j]>=0.5))
03121       fprintf(stderr,"nfft_check: ths->x[%d]=%e out of range [-0.5,0.5)\n",
03122         j,ths->x[j]);
03123 
03124   for(j=0;j<ths->d;j++)
03125     {
03126       if(ths->sigma[j]<=1)
03127   fprintf(stderr,"nfft_check: oversampling factor too small\n");
03128 
03129       if(ths->N[j]<=ths->m)
03130   fprintf(stderr,
03131     "nfft_check: polynomial degree N is smaller than cut-off m\n");
03132 
03133       if(ths->N[j]%2==1)
03134   fprintf(stderr,"nfft_check: polynomial degree N has to be even\n");
03135     }
03136 }
03137 
03138 void nfft_finalize(nfft_plan *ths)
03139 {
03140   int t; /* index over dimensions */
03141 
03142   if(ths->nfft_flags & FFTW_INIT)
03143   {
03144     fftw_destroy_plan(ths->my_fftw_plan2);
03145     fftw_destroy_plan(ths->my_fftw_plan1);
03146 
03147     if(ths->nfft_flags & FFT_OUT_OF_PLACE)
03148       nfft_free(ths->g2);
03149 
03150     nfft_free(ths->g1);
03151   }
03152 
03153   if(ths->nfft_flags & PRE_FULL_PSI)
03154     {
03155       nfft_free(ths->psi_index_g);
03156       nfft_free(ths->psi_index_f);
03157       nfft_free(ths->psi);
03158     }
03159 
03160   if(ths->nfft_flags & PRE_PSI)
03161     nfft_free(ths->psi);
03162 
03163   if(ths->nfft_flags & PRE_FG_PSI)
03164     nfft_free(ths->psi);
03165 
03166   if(ths->nfft_flags & PRE_LIN_PSI)
03167     nfft_free(ths->psi);
03168 
03169   if(ths->nfft_flags & PRE_PHI_HUT)
03170     {
03171       for(t=0; t<ths->d; t++)
03172         nfft_free(ths->c_phi_inv[t]);
03173       nfft_free(ths->c_phi_inv);
03174     }
03175 
03176   if(ths->nfft_flags & MALLOC_F)
03177     nfft_free(ths->f);
03178 
03179   if(ths->nfft_flags & MALLOC_F_HAT)
03180     nfft_free(ths->f_hat);
03181 
03182   if(ths->nfft_flags & MALLOC_X)
03183   nfft_free(ths->x);
03184 
03185   WINDOW_HELP_FINALIZE;
03186 
03187   nfft_free(ths->sigma);
03188   nfft_free(ths->n);
03189   nfft_free(ths->N);
03190 }

Generated on 17 Aug 2009 by Doxygen 1.5.3