Fix qpms_vswf_set_reindex().
[qpms.git] / qpms / ewaldsf.c
blobc2ac92d782e3520c72560ee949069912a27bbe69
1 #include "ewald.h"
2 #include <gsl/gsl_sf_gamma.h>
3 #include <gsl/gsl_sf_expint.h>
4 #include <gsl/gsl_sf_exp.h>
5 #include <gsl/gsl_sf_result.h>
6 #include <gsl/gsl_errno.h>
7 #include <gsl/gsl_machine.h> // Maybe I should rather use DBL_EPSILON instead of GSL_DBL_EPSILON.
8 #include "kahansum.h"
9 #include <math.h>
10 #include <complex.h>
11 //#include <gsl/gsl_integration.h>
12 #include <gsl/gsl_errno.h>
13 #include <float.h>
14 #include <stdbool.h>
15 #include <Faddeeva.h>
16 #include "tiny_inlines.h"
18 // Some magic constants
20 #ifndef COMPLEXPART_REL_ZERO_LIMIT
21 #define COMPLEXPART_REL_ZERO_LIMIT 1e-14
22 #endif
24 #ifndef DELTA_RECURRENT_EXPOVERFLOW_LIMIT
25 #define DELTA_RECURRENT_EXPOVERFLOW_LIMIT 10.
26 #endif
28 gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler;
30 void IgnoreUnderflowsGSLErrorHandler (const char * reason,
31 const char * file,
32 const int line,
33 const int gsl_errno) {
34 if (gsl_errno == GSL_EUNDRFLW)
35 return;
37 gsl_stream_printf ("ERROR", file, line, reason);
39 fflush(stdout);
40 fprintf (stderr, "Underflow-ignoring error handler invoked.\n");
41 fflush(stderr);
43 abort();
46 // DLMF 8.7.3 (latter expression) for complex second argument
47 // BTW if a is large negative, it might take a while to evaluate.
48 // This can't be used for non-positive integer a due to
49 // Г(a) in the formula.
50 int cx_gamma_inc_series_e(const double a, const complex double z, qpms_csf_result * result) {
51 if (a <= 0 && a == (int) a) {
52 result->val = NAN + NAN*I;
53 result->err = NAN;
54 GSL_ERROR("Undefined for non-positive integer values", GSL_EDOM);
56 gsl_sf_result fullgamma;
57 int retval = gsl_sf_gamma_e(a, &fullgamma);
58 if (GSL_EUNDRFLW == retval)
59 result->err += DBL_MIN;
60 else if (GSL_SUCCESS != retval){
61 result->val = NAN + NAN*I; result->err = NAN;
62 return retval;
65 complex double sumprefac = cpow(z, a) * cexp(-z);
66 double sumprefac_abs = cabs(sumprefac);
67 complex double sum, sumc; ckahaninit(&sum, &sumc);
68 double err, errc; kahaninit(&err, &errc);
70 bool breakswitch = false;
71 for (int k = 0; (!breakswitch) && (a + k + 1 <= GSL_SF_GAMMA_XMAX); ++k) {
72 gsl_sf_result fullgamma_ak;
73 if (GSL_SUCCESS != (retval = gsl_sf_gamma_e(a+k+1, &fullgamma_ak))) {
74 result->val = NAN + NAN*I; result->err = NAN;
75 return retval;
77 complex double summand = - cpow(z, k) / fullgamma_ak.val; // TODO test branch selection here with cimag(z) = -0.0
78 ckahanadd(&sum, &sumc, summand);
79 double summanderr = fabs(fullgamma_ak.err * cabs(summand / fullgamma_ak.val));
80 // TODO add also the rounding error
81 kahanadd(&err, &errc, summanderr);
82 // TODO put some smarter cutoff break here?
83 if (a + k >= 18 && (cabs(summand) < err || cabs(summand) < DBL_EPSILON))
84 breakswitch = true;
86 sum *= sumprefac; // Not sure if not breaking the Kahan summation here
87 sumc *= sumprefac;
88 err *= sumprefac_abs;
89 errc *= sumprefac_abs;
90 ckahanadd(&sum, &sumc, 1.);
91 kahanadd(&err, &errc, DBL_EPSILON);
92 result->err = cabs(sum) * fullgamma.err + err * fabs(fullgamma.val);
93 result->val = sum * fullgamma.val; // + sumc*fullgamma.val???
94 if (breakswitch)
95 return GSL_SUCCESS;
96 else GSL_ERROR("Overflow; the absolute value of the z argument is probably too large.", GSL_ETOL); // maybe different error code...
99 /* Continued fraction which occurs in evaluation
100 * of Q(a,z) or Gamma(a,z).
101 * Borrowed from GSL and adapted for complex z.
103 * 1 (1-a)/z 1/z (2-a)/z 2/z (3-a)/z
104 * F(a,z) = ---- ------- ----- -------- ----- -------- ...
105 * 1 + 1 + 1 + 1 + 1 + 1 +
108 static int
109 cx_gamma_inc_F_CF(const double a, const complex double z, qpms_csf_result * result)
111 const int nmax = 5000;
112 const double small = DBL_EPSILON * DBL_EPSILON * DBL_EPSILON;
114 complex double hn = 1.0; /* convergent */
115 complex double Cn = 1.0 / small;
116 complex double Dn = 1.0;
117 int n;
119 /* n == 1 has a_1, b_1, b_0 independent of a,z,
120 so that has been done by hand */
121 for ( n = 2 ; n < nmax ; n++ )
123 complex double an;
124 complex double delta;
126 if(n % 2)
127 an = 0.5*(n-1)/z;
128 else
129 an = (0.5*n-a)/z;
131 Dn = 1.0 + an * Dn;
132 if ( cabs(Dn) < small )
133 Dn = small;
134 Cn = 1.0 + an/Cn;
135 if ( cabs(Cn) < small )
136 Cn = small;
137 Dn = 1.0 / Dn;
138 delta = Cn * Dn;
139 hn *= delta;
140 if(cabs(delta-1.0) < DBL_EPSILON) break;
143 result->val = hn;
144 result->err = 2.0*GSL_DBL_EPSILON * cabs(hn);
145 result->err += GSL_DBL_EPSILON * (2.0 + 0.5*n) * cabs(result->val);
147 if(n == nmax)
148 GSL_ERROR ("error in CF for F(a,x)", GSL_EMAXITER);
149 else
150 return GSL_SUCCESS;
153 // Incomplete gamma fuction with complex second argument as continued fraction.
154 int cx_gamma_inc_CF_e(const double a, const complex double z, qpms_csf_result *result)
156 qpms_csf_result F;
157 gsl_sf_result pre;
158 const complex double am1lgz = (a-1.0)*clog(z); // TODO check branches
159 const int stat_F = cx_gamma_inc_F_CF(a, z, &F);
160 const int stat_E = gsl_sf_exp_err_e(creal(am1lgz - z), GSL_DBL_EPSILON*cabs(am1lgz), &pre);
161 complex double cpre = pre.val * cexp(I*cimag(am1lgz - z));// TODO add the error estimate for this
162 //complex double cpre = cpow(z, a-1) * cexp(-z);
165 result->val = F.val * cpre;
166 result->err = fabs(F.err * pre.val) + fabs(F.val * pre.err);
167 result->err += 2.0 * GSL_DBL_EPSILON * fabs(result->val);
169 return GSL_ERROR_SELECT_2(stat_F, stat_E);
173 // Incomplete gamma function for complex second argument.
174 int complex_gamma_inc_e(double a, complex double x, int m, qpms_csf_result *result) {
175 int retval;
176 if (creal(x) >= 0 &&
177 (0 == fabs(cimag(x)) || // x is real positive; just use the real fun
178 fabs(cimag(x)) < fabs(creal(x)) * COMPLEXPART_REL_ZERO_LIMIT)) {
179 gsl_sf_result real_gamma_inc_result;
180 retval = gsl_sf_gamma_inc_e(a, creal(x), &real_gamma_inc_result);
181 result->val = real_gamma_inc_result.val;
182 result->err = real_gamma_inc_result.err;
183 } else if (creal(x) >= 0 && cabs(x) > 0.5)
184 retval = cx_gamma_inc_CF_e(a, x, result);
185 else if (QPMS_LIKELY(a > 0 || fmod(a, 1.0)))
186 retval = cx_gamma_inc_series_e(a, x, result);
187 else
188 /* FIXME cx_gamma_inc_series_e() probably fails for non-positive integer a.
189 * This does not matter for 2D lattices in 3D space,
190 * but it might cause problems in the other cases.
192 QPMS_NOT_IMPLEMENTED("Incomplete Gamma function with non-positive integer a.");
193 if (m) { // Non-principal branch.
194 /* This might be sub-optimal, as Γ(a) has probably been already evaluated
195 * somewhere in the functions called above. */
196 gsl_sf_result fullgamma;
197 int retval_fg = gsl_sf_gamma_e(a, &fullgamma);
198 if (GSL_EUNDRFLW == retval_fg)
199 fullgamma.err += DBL_MIN;
200 else if (GSL_SUCCESS != retval_fg){
201 result->val = NAN + NAN*I; result->err = NAN;
202 return GSL_ERROR_SELECT_2(retval_fg, retval);
204 complex double f = cexp(2*m*M_PI*a*I);
205 result->val *= f;
206 f = -f + 1;
207 result->err += cabs(f) * fullgamma.err;
208 result->val += f * fullgamma.val;
210 return retval;
213 // Exponential integral for complex argument; !UNTESTED! and probably not needed, as I expressed everything in terms of inc. gammas anyways.
214 int complex_expint_n_e(int n, complex double x, qpms_csf_result *result) {
215 if (creal(x) >= 0 &&
216 (0 == fabs(cimag(x)) || // x is real positive; just use the real fun
217 fabs(cimag(x)) < fabs(creal(x)) * COMPLEXPART_REL_ZERO_LIMIT)) {
218 gsl_sf_result real_expint_result;
219 int retval = gsl_sf_expint_En_e(n, creal(x), &real_expint_result);
220 result->val = real_expint_result.val;
221 result->err = real_expint_result.err;
222 return retval;
223 } else {
224 int retval = complex_gamma_inc_e(-n+1, x, 0, result);
225 complex double f = cpow(x, 2*n-2);
226 result->val *= f;
227 result->err *= cabs(f);
228 return retval;
232 // inspired by GSL's hyperg_2F1_series
233 int hyperg_2F2_series(const double a, const double b, const double c, const double d,
234 const double x, gsl_sf_result *result
237 double sum_pos = 1.0;
238 double sum_neg = 0.0;
239 double del_pos = 1.0;
240 double del_neg = 0.0;
241 double del = 1.0;
242 double del_prev;
243 double k = 0.0;
244 int i = 0;
246 if(fabs(c) < GSL_DBL_EPSILON || fabs(d) < GSL_DBL_EPSILON) {
247 result->val = NAN;
248 result->err = INFINITY;
249 GSL_ERROR ("error", GSL_EDOM);
252 do {
253 if(++i > 30000) {
254 result->val = sum_pos - sum_neg;
255 result->err = del_pos + del_neg;
256 result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg);
257 result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k)+1.0) * fabs(result->val);
258 GSL_ERROR ("error", GSL_EMAXITER);
260 del_prev = del;
261 del *= (a+k)*(b+k) * x / ((c+k) * (d+k) * (k+1.0)); /* Gauss series */
263 if(del > 0.0) {
264 del_pos = del;
265 sum_pos += del;
267 else if(del == 0.0) {
268 /* Exact termination (a or b was a negative integer).
270 del_pos = 0.0;
271 del_neg = 0.0;
272 break;
274 else {
275 del_neg = -del;
276 sum_neg -= del;
280 * This stopping criteria is taken from the thesis
281 * "Computation of Hypergeometic Functions" by J. Pearson, pg. 31
282 * (http://people.maths.ox.ac.uk/porterm/research/pearson_final.pdf)
283 * and fixes bug #45926
285 if (fabs(del_prev / (sum_pos - sum_neg)) < GSL_DBL_EPSILON &&
286 fabs(del / (sum_pos - sum_neg)) < GSL_DBL_EPSILON)
287 break;
289 k += 1.0;
290 } while(fabs((del_pos + del_neg)/(sum_pos-sum_neg)) > GSL_DBL_EPSILON);
292 result->val = sum_pos - sum_neg;
293 result->err = del_pos + del_neg;
294 result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg);
295 result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k) + 1.0) * fabs(result->val);
297 return GSL_SUCCESS;
300 // Complex square root with branch selection
301 static inline complex double csqrt_branch(complex double x, int xbranch) {
302 return csqrt(x) * min1pow(xbranch);
306 // The Delta_n factor from [Kambe II], Appendix 3
307 // \f[ \Delta_n = \int_n^\infty t^{-1/2 - n} \exp(-t + z^2/(4t))\ud t \f]
308 /* If |Im z| is big, Faddeeva_z might cause double overflow. In such case,
309 * use bigimz = true to use a slightly different formula to initialise
310 * the first two elements.
312 * The actual choice is done outside this function in order to enable
313 * testing/comparison of the results.
315 void ewald3_2_sigma_long_Delta_recurrent(complex double *target, double *err,
316 int maxn, complex double x, int xbranch, complex double z, bool bigimz) {
317 complex double expfac = cexp(-x + 0.25 * z*z / x);
318 complex double sqrtx = csqrt_branch(x, xbranch); // TODO check carefully, which branch is needed
319 double w_plus_abs = NAN, w_minus_abs = NAN; // Used only if err != NULL
320 QPMS_ASSERT(maxn >= 0);
321 // These are used to fill the first two elements for recurrence
322 if(!bigimz) {
323 complex double w_plus = Faddeeva_w(+z/(2*sqrtx) + I*sqrtx, 0);
324 complex double w_minus = Faddeeva_w(-z/(2*sqrtx) + I*sqrtx, 0);
325 if (maxn >= 0)
326 target[0] = 0.5 * M_SQRTPI * expfac * (w_minus + w_plus);
327 if (maxn >= 1)
328 target[1] = I / z * M_SQRTPI * expfac * (w_minus - w_plus);
329 if(err) { w_plus_abs = cabs(w_plus); w_minus_abs = cabs(w_minus); }
330 } else {
331 /* A different strategy to avoid double overflow using the formula
332 * w(y) = exp(-y*y) * erfc(-I*y):
333 * Labeling
334 * ž_± = ±z/(2*sqrtx) + I * sqrtx
335 * and expfac = exp(ž**2), where ž**2 = -x + (z*z/4/x),
336 * we have
337 * expfac * w(ž_±) = exp(ž**2 - ž_±**2) erfc(-I * ž_±)
338 * = exp(∓ I * z) erfc(-I * ž_±)
340 complex double w_plus_n = cexp(- I * z) * Faddeeva_erfc(-I * (+z/(2*sqrtx) + I*sqrtx), 0);
341 complex double w_minus_n = cexp(+ I * z) * Faddeeva_erfc(-I * (-z/(2*sqrtx) + I*sqrtx), 0);
342 if (maxn >= 0)
343 target[0] = 0.5 * M_SQRTPI * (w_minus_n + w_plus_n);
344 if (maxn >= 1)
345 target[1] = I / z * M_SQRTPI * (w_minus_n - w_plus_n);
347 for(int n = 1; n < maxn; ++n) { // The rest via recurrence
348 // TODO The cpow(x, 0.5 - n) might perhaps better be replaced with a recurrently computed variant
349 target[n+1] = -(4 / (z*z)) * (-(0.5 - n) * target[n] + target[n-1] - sqrtx * cpow(x, -n) * expfac);
350 if(isnan(creal(target[n+1])) || isnan(cimag(target[n+1]))) {
351 QPMS_WARN("Encountered NaN.");
354 if (err) {
355 // The error estimates for library math functions are based on
356 // https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
357 // and are not guaranteed to be extremely precise.
358 // The error estimate for Faddeeva's functions is based on the package's authors at
359 // http://ab-initio.mit.edu/wiki/index.php/Faddeeva_Package
360 // "we find that the accuracy is typically at at least 13 significant digits in both the real and imaginary parts"
361 // FIXME the error estimate seems might be off by several orders of magnitude (try parameters x = -3, z = 0.5, maxn=20)
362 // FIXME the error estimate does not take into account the alternative recurrence init. formula (bigimz)
363 double expfac_abs = cabs(expfac);
364 double w_plus_err = w_plus_abs * 1e-13, w_minus_err = w_minus_abs * 1e-13; // LPTODO argument error contrib.
365 double expfac_err = expfac_abs * (4 * DBL_EPSILON); // LPTODO add argument error contrib.
366 double z_abs = cabs(z);
367 double z_err = z_abs * DBL_EPSILON;
368 double x_abs = cabs(x);
369 if (maxn >= 0)
370 err[0] = 0.5 * M_SQRTPI * (expfac_abs * (w_minus_err + w_plus_err) + (w_minus_abs + w_plus_abs) * expfac_err);
371 if (maxn >= 1)
372 err[1] = 2 * err[0] / z_abs + cabs(target[1]) * z_err / (z_abs*z_abs);
373 for(int n = 1; n < maxn; ++n) {
374 err[n+1] = (2 * cabs(target[n+1]) / z_abs + 4 * ((0.5+n) * err[n] + err[n-1] +
375 pow(x_abs, 0.5 - n) * (2*DBL_EPSILON * expfac_abs + expfac_err)) // LPTODO not ideal, pow() call is an overkill
376 ) * z_err / (z_abs*z_abs);
382 void ewald3_2_sigma_long_Delta_series(complex double *target, double *err,
383 int maxn, complex double x, int xbranch, complex double z) {
384 complex double w = 0.25*z*z;
385 double w_abs = cabs(w);
386 int maxk;
387 if (w_abs == 0)
388 maxk = 0; // Delta is equal to the respective incomplete Gamma functions
389 else {
390 // Estimate a suitable maximum k, using Stirling's formula, so that w**maxk / maxk! is less than DBL_EPSILON
391 // This implementation is quite stupid, but it is still cheap compared to the actual computation, so LPTODO better one
392 maxk = 1;
393 double log_w_abs = log(w_abs);
394 while (maxk * (log_w_abs - log(maxk) + 1) >= -DBL_MANT_DIG)
395 ++maxk;
397 // TODO asserts on maxn, maxk
399 complex double *Gammas;
400 double *Gammas_err = NULL, *Gammas_abs = NULL;
401 QPMS_CRASHING_CALLOC(Gammas, maxk+maxn+1, sizeof(*Gammas));
402 if(err) {
403 QPMS_CRASHING_CALLOC(Gammas_err, maxk+maxn+1, sizeof(*Gammas_err));
404 QPMS_CRASHING_MALLOC(Gammas_abs, (maxk+maxn+1) * sizeof(*Gammas_abs));
407 for(int j = 0; j <= maxn+maxk; ++j) {
408 qpms_csf_result g;
409 QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(0.5-j, x, xbranch, &g));
410 Gammas[j] = g.val;
411 if(err) {
412 Gammas_abs[j] = cabs(g.val);
413 Gammas_err[j] = g.err;
417 for(int n = 0; n <= maxn; ++n) target[n] = 0;
418 if(err) for(int n = 0; n <= maxn; ++n) err[n] = 0;
420 complex double wpowk_over_fack = 1.;
421 double wpowk_over_fack_abs = 1.;
422 for(int k = 0; k <= maxk; ++k, wpowk_over_fack *= w/k) { // TODO? Kahan sum, Horner's method?
423 // Also TODO? for small n, continue for higher k if possible/needed
424 for(int n = 0; n <= maxn; ++n) {
425 target[n] += Gammas[n+k] * wpowk_over_fack;
426 if(err) {
427 // DBL_EPSILON might not be the best estimate here, but...
428 err[n] += wpowk_over_fack_abs * Gammas_err[n+k] + DBL_EPSILON * Gammas_abs[n+k];
429 wpowk_over_fack_abs *= w_abs / (k+1);
434 // TODO add an error estimate for the k-cutoff!!!
436 free(Gammas);
437 free(Gammas_err);
438 free(Gammas_abs);
442 void ewald3_2_sigma_long_Delta(complex double *target, double *err,
443 int maxn, complex double x, int xbranch, complex double z) {
444 double absz = cabs(z);
445 if (absz < 2.) // TODO take into account also the other parameters
446 ewald3_2_sigma_long_Delta_series(target, err, maxn, x, xbranch, z);
447 else
448 ewald3_2_sigma_long_Delta_recurrent(target, err, maxn, x, xbranch, z,
449 fabs(cimag(z)) > DELTA_RECURRENT_EXPOVERFLOW_LIMIT);