qpms/ewaldsf.c

   1 #include "ewald.h"
   2 #include <gsl/gsl_sf_gamma.h>
   3 #include <gsl/gsl_sf_expint.h>
   4 #include <gsl/gsl_sf_exp.h>
   5 #include <gsl/gsl_sf_result.h>
   6 #include <gsl/gsl_errno.h>
   7 #include <gsl/gsl_machine.h> // Maybe I should rather use DBL_EPSILON instead of GSL_DBL_EPSILON.
   8 #include "kahansum.h"
   9 #include <math.h>
  10 #include <complex.h>
  11 //#include <gsl/gsl_integration.h>
  12 #include <gsl/gsl_errno.h>
  13 #include <float.h>
  14 #include <stdbool.h>
  15 #include <Faddeeva.h>
  16 #include "tiny_inlines.h"
  17
  18 // Some magic constants
  19
  20 #ifndef COMPLEXPART_REL_ZERO_LIMIT
  21 #define COMPLEXPART_REL_ZERO_LIMIT 1e-14
  22 #endif
  23
  24 #ifndef DELTA_RECURRENT_EXPOVERFLOW_LIMIT
  25 #define DELTA_RECURRENT_EXPOVERFLOW_LIMIT 10.
  26 #endif
  27
  28 gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler;
  29
  30 void IgnoreUnderflowsGSLErrorHandler (const char * reason,
  31                     const char * file,
  32                     const int line,
  33                     const int gsl_errno) {
  34   if (gsl_errno == GSL_EUNDRFLW)
  35     return;
  36
  37   gsl_stream_printf ("ERROR", file, line, reason);
  38
  39   fflush(stdout);
  40   fprintf (stderr, "Underflow-ignoring error handler invoked.\n");
  41   fflush(stderr);
  42
  43   abort();
  44 }
  45
  46 // DLMF 8.7.3 (latter expression) for complex second argument
  47 // BTW if a is large negative, it might take a while to evaluate.
  48 // This can't be used for non-positive integer a due to
  49 // Г(a) in the formula.
  50 int cx_gamma_inc_series_e(const double a, const complex double z, qpms_csf_result * result) {
  51   if (a <= 0 && a == (int) a) {
  52     result->val = NAN + NAN*I;
  53     result->err = NAN;
  54     GSL_ERROR("Undefined for non-positive integer values", GSL_EDOM);
  55   }
  56   gsl_sf_result fullgamma;
  57   int retval = gsl_sf_gamma_e(a, &fullgamma);
  58   if (GSL_EUNDRFLW == retval)
  59     result->err += DBL_MIN;
  60   else if (GSL_SUCCESS != retval){
  61     result->val = NAN + NAN*I; result->err = NAN;
  62     return retval;
  63   }
  64
  65   complex double sumprefac = cpow(z, a) * cexp(-z);
  66   double sumprefac_abs = cabs(sumprefac);
  67   complex double sum, sumc; ckahaninit(&sum, &sumc);
  68   double err, errc; kahaninit(&err, &errc);
  69
  70   bool breakswitch = false;
  71   for (int k = 0; (!breakswitch) && (a + k + 1 <= GSL_SF_GAMMA_XMAX); ++k) {
  72     gsl_sf_result fullgamma_ak;
  73     if (GSL_SUCCESS != (retval = gsl_sf_gamma_e(a+k+1, &fullgamma_ak))) {
  74       result->val = NAN + NAN*I; result->err = NAN;
  75       return retval;
  76     }
  77     complex double summand = - cpow(z, k) / fullgamma_ak.val; // TODO test branch selection here with cimag(z) = -0.0
  78     ckahanadd(&sum, &sumc, summand);
  79     double summanderr = fabs(fullgamma_ak.err * cabs(summand / fullgamma_ak.val));
  80     // TODO add also the rounding error
  81     kahanadd(&err, &errc, summanderr);
  82     // TODO put some smarter cutoff break here?
  83     if (a + k >= 18 && (cabs(summand) < err || cabs(summand) < DBL_EPSILON))
  84       breakswitch = true;
  85   }
  86   sum *= sumprefac; // Not sure if not breaking the Kahan summation here
  87   sumc *= sumprefac;
  88   err *= sumprefac_abs;
  89   errc *= sumprefac_abs;
  90   ckahanadd(&sum, &sumc, 1.);
  91   kahanadd(&err, &errc, DBL_EPSILON);
  92   result->err = cabs(sum) * fullgamma.err + err * fabs(fullgamma.val);
  93   result->val = sum * fullgamma.val; // + sumc*fullgamma.val???
  94   if (breakswitch)
  95     return GSL_SUCCESS;
  96   else GSL_ERROR("Overflow; the absolute value of the z argument is probably too large.", GSL_ETOL); // maybe different error code...
  97 }
  98
  99 /* Continued fraction which occurs in evaluation
 100  * of Q(a,z) or Gamma(a,z).
 101  * Borrowed from GSL and adapted for complex z.
 102  *
 103  *              1   (1-a)/z  1/z  (2-a)/z   2/z  (3-a)/z
 104  *   F(a,z) =  ---- ------- ----- -------- ----- -------- ...
 105  *             1 +   1 +     1 +   1 +      1 +   1 +
 106  *
 107  */
 108 static int
 109 cx_gamma_inc_F_CF(const double a, const complex double z, qpms_csf_result * result)
 110 {
 111   const int    nmax  =  5000;
 112   const double small =  DBL_EPSILON * DBL_EPSILON * DBL_EPSILON;
 113
 114   complex double hn = 1.0;           /* convergent */
 115   complex double Cn = 1.0 / small;
 116   complex double Dn = 1.0;
 117   int n;
 118
 119   /* n == 1 has a_1, b_1, b_0 independent of a,z,
 120      so that has been done by hand                */
 121   for ( n = 2 ; n < nmax ; n++ )
 122   {
 123     complex double an;
 124     complex double delta;
 125
 126     if(n % 2)
 127       an = 0.5*(n-1)/z;
 128     else
 129       an = (0.5*n-a)/z;
 130
 131     Dn = 1.0 + an * Dn;
 132     if ( cabs(Dn) < small )
 133       Dn = small;
 134     Cn = 1.0 + an/Cn;
 135     if ( cabs(Cn) < small )
 136       Cn = small;
 137     Dn = 1.0 / Dn;
 138     delta = Cn * Dn;
 139     hn *= delta;
 140     if(cabs(delta-1.0) < DBL_EPSILON) break;
 141   }
 142
 143   result->val = hn;
 144   result->err = 2.0*GSL_DBL_EPSILON * cabs(hn);
 145   result->err += GSL_DBL_EPSILON * (2.0 + 0.5*n) * cabs(result->val);
 146
 147   if(n == nmax)
 148     GSL_ERROR ("error in CF for F(a,x)", GSL_EMAXITER);
 149   else
 150     return GSL_SUCCESS;
 151 }
 152
 153 // Incomplete gamma fuction with complex second argument as continued fraction.
 154 int cx_gamma_inc_CF_e(const double a, const complex double z, qpms_csf_result *result)
 155 {
 156   qpms_csf_result F;
 157   gsl_sf_result pre;
 158   const complex double am1lgz = (a-1.0)*clog(z); // TODO check branches
 159   const int stat_F = cx_gamma_inc_F_CF(a, z, &F);
 160   const int stat_E = gsl_sf_exp_err_e(creal(am1lgz - z), GSL_DBL_EPSILON*cabs(am1lgz), &pre);
 161   complex double cpre = pre.val * cexp(I*cimag(am1lgz - z));// TODO add the error estimate for this
 162   //complex double cpre = cpow(z, a-1) * cexp(-z);
 163
 164
 165   result->val = F.val * cpre;
 166   result->err = fabs(F.err * pre.val) + fabs(F.val * pre.err);
 167   result->err += 2.0 * GSL_DBL_EPSILON * fabs(result->val);
 168
 169   return GSL_ERROR_SELECT_2(stat_F, stat_E);
 170 }
 171
 172
 173 // Incomplete gamma function for complex second argument.
 174 int complex_gamma_inc_e(double a, complex double x, int m, qpms_csf_result *result) {
 175   int retval;
 176   if (creal(x) >= 0 &&
 177       (0 == fabs(cimag(x)) || // x is real positive; just use the real fun
 178       fabs(cimag(x)) < fabs(creal(x)) * COMPLEXPART_REL_ZERO_LIMIT)) {
 179     gsl_sf_result real_gamma_inc_result;
 180     retval = gsl_sf_gamma_inc_e(a, creal(x), &real_gamma_inc_result);
 181     result->val = real_gamma_inc_result.val;
 182     result->err = real_gamma_inc_result.err;
 183   } else if (creal(x) >= 0 && cabs(x) > 0.5)
 184     retval = cx_gamma_inc_CF_e(a, x, result);
 185   else if (QPMS_LIKELY(a > 0 || fmod(a, 1.0)))
 186     retval = cx_gamma_inc_series_e(a, x, result);
 187   else
 188   /* FIXME cx_gamma_inc_series_e() probably fails for non-positive integer a.
 189    * This does not matter for 2D lattices in 3D space,
 190    * but it might cause problems in the other cases.
 191    */
 192     QPMS_NOT_IMPLEMENTED("Incomplete Gamma function with non-positive integer a.");
 193   if (m) { // Non-principal branch.
 194     /* This might be sub-optimal, as Γ(a) has probably been already evaluated
 195      * somewhere in the functions called above. */
 196     gsl_sf_result fullgamma;
 197     int retval_fg = gsl_sf_gamma_e(a, &fullgamma);
 198     if (GSL_EUNDRFLW == retval_fg)
 199       fullgamma.err += DBL_MIN;
 200     else if (GSL_SUCCESS != retval_fg){
 201       result->val = NAN + NAN*I; result->err = NAN;
 202       return GSL_ERROR_SELECT_2(retval_fg, retval);
 203     }
 204     complex double f = cexp(2*m*M_PI*a*I);
 205     result->val *= f;
 206     f = -f + 1;
 207     result->err += cabs(f) * fullgamma.err;
 208     result->val += f * fullgamma.val;
 209   }
 210   return retval;
 211 }
 212
 213 // Exponential integral for complex argument; !UNTESTED! and probably not needed, as I expressed everything in terms of inc. gammas anyways.
 214 int complex_expint_n_e(int n, complex double x, qpms_csf_result *result) {
 215   if (creal(x) >= 0 &&
 216     (0 == fabs(cimag(x)) || // x is real positive; just use the real fun
 217     fabs(cimag(x)) < fabs(creal(x)) * COMPLEXPART_REL_ZERO_LIMIT)) {
 218     gsl_sf_result real_expint_result;
 219     int retval = gsl_sf_expint_En_e(n, creal(x), &real_expint_result);
 220     result->val = real_expint_result.val;
 221     result->err = real_expint_result.err;
 222     return retval;
 223   } else {
 224     int retval = complex_gamma_inc_e(-n+1, x, 0, result);
 225     complex double f = cpow(x, 2*n-2);
 226     result->val *= f;
 227     result->err *= cabs(f);
 228     return retval;
 229   }
 230 }
 231
 232 // inspired by GSL's hyperg_2F1_series
 233 int hyperg_2F2_series(const double a, const double b, const double c, const double d,
 234     const double x, gsl_sf_result *result
 235     )
 236 {
 237   double sum_pos = 1.0;
 238   double sum_neg = 0.0;
 239   double del_pos = 1.0;
 240   double del_neg = 0.0;
 241   double del = 1.0;
 242   double del_prev;
 243   double k = 0.0;
 244   int i = 0;
 245
 246   if(fabs(c) < GSL_DBL_EPSILON || fabs(d) < GSL_DBL_EPSILON) {
 247     result->val = NAN;
 248     result->err = INFINITY;
 249     GSL_ERROR ("error", GSL_EDOM);
 250   }
 251
 252   do {
 253     if(++i > 30000) {
 254       result->val  = sum_pos - sum_neg;
 255       result->err  = del_pos + del_neg;
 256       result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg);
 257       result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k)+1.0) * fabs(result->val);
 258       GSL_ERROR ("error", GSL_EMAXITER);
 259     }
 260     del_prev = del;
 261     del *= (a+k)*(b+k) * x / ((c+k) * (d+k) * (k+1.0));  /* Gauss series */
 262
 263     if(del > 0.0) {
 264       del_pos  =  del;
 265       sum_pos +=  del;
 266     }
 267     else if(del == 0.0) {
 268       /* Exact termination (a or b was a negative integer).
 269        */
 270       del_pos = 0.0;
 271       del_neg = 0.0;
 272       break;
 273     }
 274     else {
 275       del_neg  = -del;
 276       sum_neg -=  del;
 277     }
 278
 279     /*
 280      * This stopping criteria is taken from the thesis
 281      * "Computation of Hypergeometic Functions" by J. Pearson, pg. 31
 282      * (http://people.maths.ox.ac.uk/porterm/research/pearson_final.pdf)
 283      * and fixes bug #45926
 284      */
 285     if (fabs(del_prev / (sum_pos - sum_neg)) < GSL_DBL_EPSILON &&
 286         fabs(del / (sum_pos - sum_neg)) < GSL_DBL_EPSILON)
 287       break;
 288
 289     k += 1.0;
 290   } while(fabs((del_pos + del_neg)/(sum_pos-sum_neg)) > GSL_DBL_EPSILON);
 291
 292   result->val  = sum_pos - sum_neg;
 293   result->err  = del_pos + del_neg;
 294   result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg);
 295   result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k) + 1.0) * fabs(result->val);
 296
 297   return GSL_SUCCESS;
 298 }
 299
 300 // Complex square root with branch selection
 301 static inline complex double csqrt_branch(complex double x, int xbranch) {
 302   return csqrt(x) * min1pow(xbranch);
 303 }
 304
 305
 306 // The Delta_n factor from [Kambe II], Appendix 3
 307 // \f[ \Delta_n = \int_n^\infty t^{-1/2 - n} \exp(-t + z^2/(4t))\ud t \f]
 308 /* If |Im z| is big, Faddeeva_z might cause double overflow. In such case,
 309  * use bigimz = true to use a slightly different formula to initialise
 310  * the first two elements.
 311  *
 312  * The actual choice is done outside this function in order to enable
 313  * testing/comparison of the results.
 314  */
 315 void ewald3_2_sigma_long_Delta_recurrent(complex double *target, double *err,
 316     int maxn, complex double x, int xbranch, complex double z, bool bigimz) {
 317   complex double expfac = cexp(-x + 0.25 * z*z / x);
 318   complex double sqrtx = csqrt_branch(x, xbranch); // TODO check carefully, which branch is needed
 319   double w_plus_abs = NAN, w_minus_abs = NAN; // Used only if err != NULL
 320   QPMS_ASSERT(maxn >= 0);
 321   // These are used to fill the first two elements for recurrence
 322   if(!bigimz) {
 323     complex double w_plus  = Faddeeva_w(+z/(2*sqrtx) + I*sqrtx, 0);
 324     complex double w_minus = Faddeeva_w(-z/(2*sqrtx) + I*sqrtx, 0);
 325     if (maxn >= 0)
 326       target[0] = 0.5 * M_SQRTPI * expfac * (w_minus + w_plus);
 327     if (maxn >= 1)
 328       target[1] = I / z * M_SQRTPI * expfac * (w_minus - w_plus);
 329     if(err) { w_plus_abs = cabs(w_plus); w_minus_abs = cabs(w_minus); }
 330   } else {
 331     /* A different strategy to avoid double overflow using the formula
 332      * w(y) = exp(-y*y) * erfc(-I*y):
 333      * Labeling
 334      * ž_± = ±z/(2*sqrtx) + I * sqrtx
 335      * and expfac = exp(ž**2), where ž**2 = -x + (z*z/4/x),
 336      * we have
 337      * expfac * w(ž_±) = exp(ž**2 - ž_±**2) erfc(-I * ž_±)
 338      *                 =       exp(∓ I * z) erfc(-I * ž_±)
 339      */
 340     complex double w_plus_n =  cexp(- I * z) * Faddeeva_erfc(-I * (+z/(2*sqrtx) + I*sqrtx), 0);
 341     complex double w_minus_n = cexp(+ I * z) * Faddeeva_erfc(-I * (-z/(2*sqrtx) + I*sqrtx), 0);
 342     if (maxn >= 0)
 343       target[0] = 0.5 * M_SQRTPI * (w_minus_n + w_plus_n);
 344     if (maxn >= 1)
 345       target[1] = I / z * M_SQRTPI * (w_minus_n - w_plus_n);
 346   }
 347   for(int n = 1; n < maxn; ++n) { // The rest via recurrence
 348     // TODO The cpow(x, 0.5 - n) might perhaps better be replaced with a recurrently computed variant
 349     target[n+1] = -(4 / (z*z)) * (-(0.5 - n) * target[n] + target[n-1] - sqrtx * cpow(x, -n) * expfac);
 350     if(isnan(creal(target[n+1])) || isnan(cimag(target[n+1]))) {
 351       QPMS_WARN("Encountered NaN.");
 352     }
 353   }
 354   if (err) {
 355     // The error estimates for library math functions are based on
 356     // https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
 357     // and are not guaranteed to be extremely precise.
 358     // The error estimate for Faddeeva's functions is based on the package's authors at
 359     // http://ab-initio.mit.edu/wiki/index.php/Faddeeva_Package
 360     // "we find that the accuracy is typically at at least 13 significant digits in both the real and imaginary parts"
 361     // FIXME the error estimate seems might be off by several orders of magnitude (try parameters x = -3, z = 0.5, maxn=20)
 362     // FIXME the error estimate does not take into account the alternative recurrence init. formula (bigimz)
 363     double expfac_abs = cabs(expfac);
 364     double w_plus_err = w_plus_abs * 1e-13, w_minus_err = w_minus_abs * 1e-13; // LPTODO argument error contrib.
 365     double expfac_err = expfac_abs * (4 * DBL_EPSILON); // LPTODO add argument error contrib.
 366     double z_abs = cabs(z);
 367     double z_err = z_abs * DBL_EPSILON;
 368     double x_abs = cabs(x);
 369     if (maxn >= 0)
 370       err[0] = 0.5 * M_SQRTPI * (expfac_abs * (w_minus_err + w_plus_err) + (w_minus_abs + w_plus_abs) * expfac_err);
 371     if (maxn >= 1)
 372       err[1] = 2 * err[0] / z_abs + cabs(target[1]) * z_err / (z_abs*z_abs);
 373     for(int n = 1; n < maxn; ++n) {
 374       err[n+1] = (2 * cabs(target[n+1]) / z_abs + 4 * ((0.5+n) * err[n] + err[n-1] +
 375             pow(x_abs, 0.5 - n) * (2*DBL_EPSILON * expfac_abs + expfac_err))  // LPTODO not ideal, pow() call is an overkill
 376             ) * z_err / (z_abs*z_abs);
 377     }
 378   }
 379 }
 380
 381
 382 void ewald3_2_sigma_long_Delta_series(complex double *target, double *err,
 383     int maxn, complex double x, int xbranch, complex double z) {
 384   complex double w = 0.25*z*z;
 385   double w_abs = cabs(w);
 386   int maxk;
 387   if (w_abs == 0)
 388     maxk = 0; // Delta is equal to the respective incomplete Gamma functions
 389   else {
 390     // Estimate a suitable maximum k, using Stirling's formula, so that w**maxk / maxk! is less than DBL_EPSILON
 391     // This implementation is quite stupid, but it is still cheap compared to the actual computation, so LPTODO better one
 392     maxk = 1;
 393     double log_w_abs = log(w_abs);
 394     while (maxk * (log_w_abs - log(maxk) + 1) >= -DBL_MANT_DIG)
 395       ++maxk;
 396   }
 397   // TODO asserts on maxn, maxk
 398
 399   complex double *Gammas;
 400   double *Gammas_err = NULL, *Gammas_abs = NULL;
 401   QPMS_CRASHING_CALLOC(Gammas, maxk+maxn+1, sizeof(*Gammas));
 402   if(err) {
 403     QPMS_CRASHING_CALLOC(Gammas_err, maxk+maxn+1, sizeof(*Gammas_err));
 404     QPMS_CRASHING_MALLOC(Gammas_abs, (maxk+maxn+1) * sizeof(*Gammas_abs));
 405   }
 406
 407   for(int j = 0; j <= maxn+maxk; ++j) {
 408     qpms_csf_result g;
 409     QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(0.5-j, x, xbranch, &g));
 410     Gammas[j] = g.val;
 411     if(err) {
 412       Gammas_abs[j] = cabs(g.val);
 413       Gammas_err[j] = g.err;
 414     }
 415   }
 416
 417   for(int n = 0; n <= maxn; ++n) target[n] = 0;
 418   if(err) for(int n = 0; n <= maxn; ++n) err[n] = 0;
 419
 420   complex double wpowk_over_fack = 1.;
 421   double wpowk_over_fack_abs = 1.;
 422   for(int k = 0; k <= maxk; ++k, wpowk_over_fack *= w/k) { // TODO? Kahan sum, Horner's method?
 423     // Also TODO? for small n, continue for higher k if possible/needed
 424     for(int n = 0; n <= maxn; ++n) {
 425       target[n] += Gammas[n+k] * wpowk_over_fack;
 426       if(err) {
 427         // DBL_EPSILON might not be the best estimate here, but...
 428         err[n] += wpowk_over_fack_abs * Gammas_err[n+k] + DBL_EPSILON * Gammas_abs[n+k];
 429         wpowk_over_fack_abs *= w_abs / (k+1);
 430       }
 431     }
 432   }
 433
 434   // TODO add an error estimate for the k-cutoff!!!
 435
 436   free(Gammas);
 437   free(Gammas_err);
 438   free(Gammas_abs);
 439 }
 440
 441
 442 void ewald3_2_sigma_long_Delta(complex double *target, double *err,
 443     int maxn, complex double x, int xbranch, complex double z) {
 444   double absz = cabs(z);
 445   if (absz < 2.) // TODO take into account also the other parameters
 446     ewald3_2_sigma_long_Delta_series(target, err, maxn, x, xbranch, z);
 447   else
 448     ewald3_2_sigma_long_Delta_recurrent(target, err, maxn, x, xbranch, z,
 449         fabs(cimag(z)) > DELTA_RECURRENT_EXPOVERFLOW_LIMIT);
 450 }
 451