WIP Ewald 2D in 3D general z != 0 constant factors.
[qpms.git] / qpms / ewald.c
blob0f12bca6a01224e9b56d6e7c6d1069c5ce39b499
1 #include "ewald.h"
2 #include <stdlib.h>
3 #include "indexing.h"
4 #include "kahansum.h"
5 #include <assert.h>
6 #include <string.h>
7 #include <complex.h>
8 #include "tiny_inlines.h"
9 #include "qpms_error.h"
10 #include <gsl/gsl_integration.h>
11 #include <gsl/gsl_errno.h>
12 #include <gsl/gsl_sf_legendre.h>
13 #include <gsl/gsl_sf_expint.h>
15 // parameters for the quadrature of integral in (4.6)
16 #ifndef INTEGRATION_WORKSPACE_LIMIT
17 #define INTEGRATION_WORKSPACE_LIMIT 30000
18 #endif
20 #ifndef INTEGRATION_EPSABS
21 #define INTEGRATION_EPSABS 1e-13
22 #endif
24 #ifndef INTEGRATION_EPSREL
25 #define INTEGRATION_EPSREL 1e-13
26 #endif
28 #ifndef M_SQRTPI
29 #define M_SQRTPI 1.7724538509055160272981674833411452L
30 #endif
33 // sloppy implementation of factorial
34 // We prefer to avoid tgamma/lgamma, as their errors are about 4+ bits
35 static inline double factorial(const int n) {
36 assert(n >= 0);
37 if (n < 0)
38 return 0; // should not happen in the functions below. (Therefore the assert above)
39 else if (n <= 20) {
40 double fac = 1;
41 for (int i = 1; i <= n; ++i)
42 fac *= i;
43 return fac;
45 else
46 return tgamma(n + 1); // hope it's precise and that overflow does not happen
49 // sloppy implementation of double factorial n!!
50 static inline double double_factorial(int n) {
51 assert(n >= 0);
52 if (n <= 25) {
53 double fac = 1;
54 while (n > 0) {
55 fac *= n;
56 n -= 2;
58 return fac;
59 } else {
60 if (n % 2) { // odd, (2*k - 1)!! = 2**k * Γ(k + 0.5) / sqrt(п)
61 const int k = n / 2 + 1;
62 return pow(2, k) * tgamma(k + 0.5) / M_SQRTPI;
63 } else { // even, n!! = 2**(n/2) * (n/2)!
64 const int k = n/2;
65 return pow(2, k) * factorial(k);
70 // sloppy implementation of (n/2)! = Γ(n/2 + 1)
71 // It is _slightly_ more precise than direct call of tgamma for small odd n
72 static inline double factorial_of_half(const int n2) {
73 assert(n2 >= 0);
74 if (n2 % 2 == 0) return factorial(n2/2);
75 else {
76 if (n2 <= 50) { // odd, use (k - 0.5)! = Γ(k + 0.5) = 2**(-k) (2*k - 1)!! sqrt(п) for small n2
77 const int k = n2 / 2 + 1;
78 double fac2 = 1;
79 for(int j = 2*k - 1; j > 0; j -= 2)
80 fac2 *= j;
81 return fac2 * pow(2, -k) * M_SQRTPI;
83 else return tgamma(1. + 0.5*n2);
87 static inline complex double csq(complex double x) { return x * x; }
88 static inline double sq(double x) { return x * x; }
90 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
91 typedef enum {
92 EWALD32_CONSTANTS_ORIG, // As in [1, (4,5)], NOT USED right now.
93 EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
94 * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
95 * replaced by the respective $Y_n^m(\pi/2,\alpha)$
96 * spherical harmonic. See notes/ewald.lyx.
99 } ewald3_constants_option;
101 // TODO perhaps rewrite everything as agnostic.
102 static const ewald3_constants_option type = EWALD32_CONSTANTS_AGNOSTIC;
104 qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, const ewald3_constants_option type */,
105 const int csphase)
107 qpms_ewald3_constants_t *c = malloc(sizeof(qpms_ewald3_constants_t));
108 //if (c == NULL) return NULL; // Do I really want to do this?
109 c->lMax = lMax;
110 c->nelem_sc = qpms_lMax2nelem_sc(lMax);
111 c->s1_jMaxes = malloc(c->nelem_sc * sizeof(qpms_l_t));
112 c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *));
113 //if (c->s1_jMaxes == NULL) return NULL;
115 // ----- the "xy-plane constants" ------
116 //determine sizes
117 size_t s1_constfacs_sz = 0;
118 for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
119 qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
120 if ((m + n) % 2 == 0)
121 s1_constfacs_sz += 1 + (c->s1_jMaxes[y] = (n-abs(m))/2);
122 else
123 c->s1_jMaxes[y] = -1;
126 c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
127 size_t s1_constfacs_sz_cumsum = 0;
128 for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
129 qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
130 if ((m + n) % 2 == 0) {
131 c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
132 // and here comes the actual calculation
133 for (qpms_l_t j = 0; j <= c->s1_jMaxes[y]; ++j){
134 switch(type) {
135 case EWALD32_CONSTANTS_ORIG: // NOT USED
136 c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2)
137 * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
138 * min1pow(j) * pow(0.5, n-2*j)
139 / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
140 * pow(0.5, 2*j-1);
141 break;
142 case EWALD32_CONSTANTS_AGNOSTIC:
143 c->s1_constfacs[y][j] = -2 * ipow(n+1) * M_SQRTPI
144 * factorial((n-m)/2) * factorial((n+m)/2)
145 * min1pow(j)
146 / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j));
147 break;
148 default:
149 QPMS_INVALID_ENUM(type);
152 s1_constfacs_sz_cumsum += 1 + c->s1_jMaxes[y];
154 else
155 c->s1_constfacs[y] = NULL;
158 // ------ the "z-axis constants" -----
159 // determine sizes
160 size_t s1_constfacs_1Dz_sz;
162 const qpms_l_t sz_n_lMax = lMax/2 + 1; // number of different j's for n = lMax
163 s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax
164 : isq(sz_n_lMax);
166 c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double));
167 c->s1_constfacs_1Dz = malloc((lMax+1)*sizeof(complex double *));
169 size_t s1_constfacs_1Dz_sz_cumsum = 0;
170 for (qpms_l_t n = 0; n <= lMax; ++n) {
171 c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum;
172 for (qpms_l_t j = 0; j <= n/2; ++j) {
173 switch(type) {
174 case EWALD32_CONSTANTS_AGNOSTIC:
175 c->s1_constfacs_1Dz[n][j] = -ipow(n+1) * min1pow(j) * factorial(n)
176 / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j));
177 break;
178 default:
179 QPMS_INVALID_ENUM(type); // wrong type argument or not implemented
182 s1_constfacs_1Dz_sz_cumsum += 1 + n / 2;
184 assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum);
187 c->legendre_csphase = csphase;
188 c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
189 c->legendre_plus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
190 c->legendre_minus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
191 // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
192 c->legendre_normconv = GSL_SF_LEGENDRE_NONE;
193 // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
194 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, 0, csphase, c->legendre0));
195 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, +1, csphase, c->legendre_plus1));
196 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, -1, csphase, c->legendre_minus1));
197 return c;
200 void qpms_ewald3_constants_free(qpms_ewald3_constants_t *c) {
201 free(c->legendre0);
202 free(c->legendre_plus1);
203 free(c->legendre_minus1);
204 free(c->s1_constfacs);
205 free(c->s1_constfacs_base);
206 free(c->s1_constfacs_1Dz_base);
207 free(c->s1_constfacs_1Dz);
208 free(c->s1_jMaxes);
209 free(c);
213 int ewald3_sigma0(complex double *result, double *err,
214 const qpms_ewald3_constants_t *c,
215 const double eta, const complex double k)
217 qpms_csf_result gam;
218 complex double z = -csq(k/(2*eta));
219 QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z,
220 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
221 QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, &gam));
222 *result = gam.val * c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI;
223 if(err)
224 *err = gam.err * fabs(c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI);
225 return 0;
228 // Wrapper over cpow to correctly handle the k->0 behaviour
229 static inline complex double cpow_0lim_zi(complex double x, long z)
231 if (x == 0 && z == 0)
232 return 1;
233 else if (x == 0 && z > 0) // lol branch cut in cpow
234 return 0;
235 else
236 return cpow(x, z);
239 int ewald3_21_xy_sigma_long (
240 complex double *target, // must be c->nelem_sc long
241 double *err,
242 const qpms_ewald3_constants_t *c,
243 const double eta, const complex double k,
244 const double unitcell_volume /* with the corresponding lattice dimensionality */,
245 const LatticeDimensionality latdim,
246 PGen *pgen_K, const bool pgen_generates_shifted_points
247 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
248 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
249 * and adds beta to the generated points before calculations.
250 * If true, it assumes that they are already shifted.
252 const cart3_t beta,
253 const cart3_t particle_shift
256 const bool k_is_real = (cimag(k) == 0);
257 assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
258 assert((latdim & LAT1D) || (latdim & LAT2D));
259 const qpms_y_t nelem_sc = c->nelem_sc;
260 assert(nelem_sc > 0);
261 const qpms_l_t lMax = c->lMax;
263 // Manual init of the ewald summation targets
264 complex double *target_c = calloc(nelem_sc, sizeof(complex double));
265 memset(target, 0, nelem_sc * sizeof(complex double));
266 double *err_c = NULL;
267 if (err) {
268 err_c = calloc(nelem_sc, sizeof(double));
269 memset(err, 0, nelem_sc * sizeof(double));
272 #ifdef EWALD_AUTO_CUTOFF
274 * Experimental: stop the evaluation if the relative contribution of the "last layer"
275 * is below DBL_EPSILON to speed up the calculation.
276 * This obviously can currently work only for "suitable PGen"s that generate points in
277 * layers with increasing distance from the origin.
278 * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
279 * increasing integer index (starting from 1).
280 * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
281 * during run time instead of using this EWALD_AUTO_CUTOFF macro.
283 size_t lw = 1; // "Layer" index.
284 size_t li = 0; // Counter of points inside the current "layer".
285 double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
286 kahaninit(&lsum, &lsum_c);
287 #endif
289 const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
290 if (k_is_real)
291 assert(creal(commonfac) > 0);
293 PGenSphReturnData pgen_retdata;
294 #ifndef NDEBUG
295 double rbeta_pq_prev;
296 #endif
297 // recycleable values if rbeta_pq stays the same:
298 complex double gamma_pq;
299 complex double z; // I?*κ*γ*particle_shift.z
300 complex double x; // κ*γ/(2*η)
301 complex double factor1d = 1; // the "additional" factor for the 1D case (then it is not 1)
302 // space for Gamma_pq[j]'s
303 complex double Gamma_pq[lMax/2+1];
304 double Gamma_pq_err[lMax/2+1];
306 // CHOOSE POINT BEGIN
307 // TODO mayby PGen_next_sph is not the best coordinate system choice here
308 while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
309 cart3_t K_pq_cart;
310 sph_t beta_pq_sph;
311 if (pgen_generates_shifted_points) {
312 beta_pq_sph = pgen_retdata.point_sph;
313 const cart3_t beta_pq_cart = sph2cart(beta_pq_sph);
314 K_pq_cart = cart3_add(cart3_scale(-1, beta), beta_pq_cart);
315 } else { // as in the old _points_and_shift functions
316 const sph_t K_pq_sph = pgen_retdata.point_sph;
317 K_pq_cart = sph2cart(K_pq_sph);
318 const cart3_t beta_pq_cart = cart3_add(K_pq_cart, beta);
319 beta_pq_sph = cart2sph(beta_pq_cart);
322 const double rbeta_pq = beta_pq_sph.r;
323 const double arg_pq = beta_pq_sph.phi;
324 //const double beta_pq_theta = beta_pq_sph.theta; //unused
326 // CHOOSE POINT END
328 const complex double phasefac = cexp(I*cart3_dot(K_pq_cart,particle_shift)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
330 const bool new_rbeta_pq = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
331 if (!new_rbeta_pq) assert(rbeta_pq == rbeta_pq_prev);
334 // R-DEPENDENT BEGIN
335 //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
336 if (new_rbeta_pq) {
337 gamma_pq = clilgamma(rbeta_pq/k);
338 complex double x = gamma_pq*k/(2*eta);
339 complex double x2 = x*x;
340 if(particle_shift.z == 0) {
341 for(qpms_l_t j = 0; j <= lMax/2; ++j) {
342 qpms_csf_result Gam;
343 int retval = complex_gamma_inc_e(0.5-j, x2,
344 // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
345 QPMS_LIKELY(creal(x2) < 0) && !signbit(cimag(x2)) ? -1 : 0, &Gam);
346 QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
347 Gamma_pq[j] = Gam.val;
348 if(err) Gamma_pq_err[j] = Gam.err;
350 if (latdim & LAT1D)
351 factor1d = M_SQRT1_2 * .5 * k * gamma_pq;
352 } else if (latdim & LAT2D) {
353 QPMS_UNTESTED;
354 // TODO check the branches/phases!
355 complex double z = I * k * gamma_pq * particle_shift.z;
356 ewald3_2_sigma_long_Delta(Gamma_pq, err ? Gamma_pq_err : NULL, lMax/2, x, 0 /* FIXME */, z);
357 } else {
358 QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
361 // R-DEPENDENT END
363 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
364 // and just fetched for each n, m pair
365 for(qpms_l_t n = 0; n <= lMax; ++n)
366 for(qpms_m_t m = -n; m <= n; ++m) {
367 if((m+n) % 2 != 0) // odd coefficients are zero.
368 continue;
369 const qpms_y_t y = qpms_mn2y_sc(m, n);
370 const complex double e_imalpha_pq = cexp(I*m*arg_pq);
371 complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
372 double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
373 assert((n-abs(m))/2 == c->s1_jMaxes[y]);
374 for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
375 complex double summand = cpow_0lim_zi(rbeta_pq/k, n-2*j)
376 * e_imalpha_pq * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
377 * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
378 * c->s1_constfacs[y][j];
379 if(err) {
380 // FIXME include also other errors than Gamma_pq's relative error
381 kahanadd(&jsum_err, &jsum_err_c, Gamma_pq_err[j] * cabs(summand));
383 summand *= Gamma_pq[j]; // GGG
384 ckahanadd(&jsum, &jsum_c, summand);
386 jsum *= phasefac * factor1d; // PFC
387 ckahanadd(target + y, target_c + y, jsum);
388 #ifdef EWALD_AUTO_CUTOFF
389 kahanadd(&lsum, &lsum_c, cabs(jsum));
390 #endif
391 if(err) kahanadd(err + y, err_c + y, jsum_err);
393 #ifndef NDEBUG
394 rbeta_pq_prev = rbeta_pq;
395 #endif
396 #ifdef EWALD_AUTO_CUTOFF
397 ++li;
398 double cursum_min = INFINITY;
399 for (qpms_y_t y = 0; y < nelem_sc; ++y) {
400 const double a = cabs(target[y]);
401 if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
402 cursum_min = MIN(cursum_min, a);
404 if (li >= 12 * lw + 1) {
405 if(lsum < DBL_EPSILON * cursum_min)
406 goto ewald3_21_xy_sigma_long_end_point_loop;
407 kahaninit(&lsum, &lsum_c);
408 li = 0;
409 ++lw;
411 #endif
412 } // END POINT LOOP
413 #ifdef EWALD_AUTO_CUTOFF
414 ewald3_21_xy_sigma_long_end_point_loop:
415 #endif
417 free(err_c);
418 free(target_c);
419 for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
420 target[y] *= commonfac;
421 if(err)
422 for(qpms_y_t y = 0; y < nelem_sc; ++y)
423 err[y] *= commonfac;
424 return 0;
428 // 1D chain along the z-axis; not many optimisations here as the same
429 // shifted beta radius could be recycled only once anyways
430 int ewald3_1_z_sigma_long (
431 complex double *target, // must be c->nelem_sc long
432 double *err,
433 const qpms_ewald3_constants_t *c,
434 const double eta, const complex double k,
435 const double unitcell_volume /* length (periodicity) in this case */,
436 const LatticeDimensionality latdim,
437 PGen *pgen_K, const bool pgen_generates_shifted_points
438 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
439 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
440 * and adds beta to the generated points before calculations.
441 * If true, it assumes that they are already shifted.
443 const cart3_t beta,
444 const cart3_t particle_shift
447 assert(LatticeDimensionality_checkflags(latdim, LAT_1D_IN_3D_ZONLY));
448 assert(beta.x == 0 && beta.y == 0);
449 assert(particle_shift.x == 0 && particle_shift.y == 0);
450 const double beta_z = beta.z;
451 const double particle_shift_z = particle_shift_z;
452 const qpms_y_t nelem_sc = c->nelem_sc;
453 const qpms_l_t lMax = c->lMax;
455 // Manual init of the ewald summation targets
456 complex double *target_c = calloc(nelem_sc, sizeof(complex double));
457 memset(target, 0, nelem_sc * sizeof(complex double));
458 double *err_c = NULL;
459 if (err) {
460 err_c = calloc(nelem_sc, sizeof(double));
461 memset(err, 0, nelem_sc * sizeof(double));
464 const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC)
465 assert(commonfac > 0);
467 // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
468 qpms_csf_result Gamma_pq[lMax/2+1];
470 PGenSphReturnData pgen_retdata;
471 // CHOOSE POINT BEGIN
472 // TODO FIXME USE PGen_next_z
473 while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
474 assert(pgen_retdata.flags & PGEN_AT_Z);
475 double K_z, beta_mu_z;
476 if (pgen_generates_shifted_points) {
477 beta_mu_z = ((pgen_retdata.point_sph.theta == 0) ?
478 pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); //!!!CHECKSIGN!!!
479 K_z = beta_mu_z - beta_z;
480 } else { // as in the old _points_and_shift functions
481 K_z = ((pgen_retdata.point_sph.theta == 0) ?
482 pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
483 beta_mu_z = K_z + beta_z;
485 double rbeta_mu = fabs(beta_mu_z);
486 // CHOOSE POINT END
488 const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
490 // R-DEPENDENT BEGIN
491 complex double gamma_pq = clilgamma(rbeta_mu/k); // For real beta and k this is real or pure imaginary ...
492 const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real.
493 for(qpms_l_t j = 0; j <= lMax/2; ++j) {
494 int retval = complex_gamma_inc_e(0.5-j, z,
495 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
496 QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, Gamma_pq+j);
497 QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
499 // R-DEPENDENT END
500 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
501 // and just fetched for each n
502 for(qpms_l_t n = 0; n <= lMax; ++n) {
503 const qpms_y_t y = qpms_mn2y_sc(0, n);
504 complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
505 double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
506 for(qpms_l_t j = 0; j <= n/2; ++j) {
507 complex double summand = pow(rbeta_mu/k, n-2*j)
508 * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop
509 c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] :
510 (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n))
512 // * min1pow_m_neg(m) // not needed as m == 0
513 * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation
514 * c->s1_constfacs_1Dz[n][j];
515 /* s1_consstfacs_1Dz[n][j] =
517 * -I**(n+1) (-1)**j * n!
518 * --------------------------
519 * j! * 2**(2*j) * (n - 2*j)!
522 if(err) {
523 // FIXME include also other errors than Gamma_pq's relative error
524 kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
526 summand *= Gamma_pq[j].val; // GGG
527 ckahanadd(&jsum, &jsum_c, summand);
529 jsum *= phasefac; // PFC
530 ckahanadd(target + y, target_c + y, jsum);
531 if(err) kahanadd(err + y, err_c + y, jsum_err);
533 } // END POINT LOOP
535 free(err_c);
536 free(target_c);
537 for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
538 target[y] *= commonfac;
539 if(err)
540 for(qpms_y_t y = 0; y < nelem_sc; ++y)
541 err[y] *= commonfac;
542 return 0;
545 int ewald3_sigma_long (
546 complex double *target, // must be c->nelem_sc long
547 double *err,
548 const qpms_ewald3_constants_t *c,
549 const double eta, const complex double k,
550 const double unitcell_volume /* with the corresponding lattice dimensionality */,
551 const LatticeDimensionality latdim,
552 PGen *pgen_K, const bool pgen_generates_shifted_points
553 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
554 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
555 * and adds beta to the generated points before calculations.
556 * If true, it assumes that they are already shifted.
558 const cart3_t beta,
559 const cart3_t particle_shift
562 assert(latdim & SPACE3D);
563 if (latdim & LAT_XYONLY)
564 return ewald3_21_xy_sigma_long(target, err, c, eta, k, unitcell_volume,
565 latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
566 else if (latdim & LAT_ZONLY)
567 return ewald3_1_z_sigma_long(target, err, c, eta, k, unitcell_volume,
568 latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
569 // TODO 3D case and general 2D case
570 else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
573 struct sigma2_integrand_params {
574 int n;
575 double k, R;
578 static double sigma2_integrand(double ksi, void *params) {
579 struct sigma2_integrand_params *p = (struct sigma2_integrand_params *) params;
580 return exp(-sq(p->R * ksi) + sq(p->k / ksi / 2)) * pow(ksi, 2*p->n);
583 static int ewald32_sr_integral(double r, double k, int n, double eta,
584 double *result, double *err, gsl_integration_workspace *workspace)
586 struct sigma2_integrand_params p;
589 const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
590 p.n = n;
591 eta *= R0;
592 p.k = k * R0;
593 p.R = r / R0; // i.e. p.R = 1
595 gsl_function F;
596 F.function = sigma2_integrand;
597 F.params = &p;
599 int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
600 INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
601 workspace, result, err);
602 double normfac = pow(R0, -2*p.n - 1);
603 *result *= normfac;
604 *err *= normfac;
605 return retval;
608 // a version allowing complex k
610 struct sigma2_integrand_params_ck {
611 int n;
612 complex double k;
613 double R;
616 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
617 // including the whole complex exponential (throwing the imaginary/real part away)
618 static double sigma2_integrand_ck_real(double ksi, void *params) {
619 struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
620 return creal(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
622 static double sigma2_integrand_ck_imag(double ksi, void *params) {
623 struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
624 return cimag(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
627 static int ewald32_sr_integral_ck(double r, complex double k, int n, double eta,
628 complex double *result, double *err, gsl_integration_workspace *workspace)
630 struct sigma2_integrand_params_ck p;
633 const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
634 p.n = n;
635 eta *= R0;
636 p.k = k * R0;
637 p.R = r / R0; // i.e. p.R = 1
639 gsl_function F;
640 F.params = &p;
641 double result_real, result_imag, err_real, err_imag;
643 F.function = sigma2_integrand_ck_real;
644 // TODO check return values
645 int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
646 INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
647 workspace, &result_real, &err_real);
649 F.function = sigma2_integrand_ck_imag;
650 // TODO check return values
651 retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
652 INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
653 workspace, &result_imag, &err_imag);
655 *result = result_real + I*result_imag;
656 *err = sqrt(sq(err_real) + sq(err_imag));
658 double normfac = pow(R0, -2*p.n - 1);
659 *result *= normfac;
660 *err *= normfac;
661 return retval;
664 int ewald3_sigma_short(
665 complex double *target, // must be c->nelem_sc long
666 double *err, // must be c->nelem_sc long or NULL
667 const qpms_ewald3_constants_t *c,
668 const double eta, const complex double k /* TODO COMPLEX */,
669 const LatticeDimensionality latdim, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
670 PGen *pgen_R, const bool pgen_generates_shifted_points
671 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
672 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
673 * and adds particle_shift to the generated points before calculations.
674 * If true, it assumes that they are already shifted (if calculating interaction between
675 * different particles in the unit cell).
677 const cart3_t beta,
678 const cart3_t particle_shift
681 const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
682 const double kreal = creal(k);
683 const qpms_y_t nelem_sc = c->nelem_sc;
684 const qpms_l_t lMax = c->lMax;
685 gsl_integration_workspace *workspace =
686 gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
688 double legendre_buf[gsl_sf_legendre_array_n(c->lMax)]; // work space for the legendre polynomials (used only in the general case)
690 // Manual init of the ewald summation targets
691 complex double * const target_c = calloc(nelem_sc, sizeof(complex double));
692 memset(target, 0, nelem_sc * sizeof(complex double));
693 double *err_c = NULL;
694 if (err) {
695 err_c = calloc(nelem_sc, sizeof(double));
696 memset(err, 0, nelem_sc * sizeof(double));
699 #ifdef EWALD_AUTO_CUTOFF
701 * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
703 size_t lw = 1; // "Layer" index.
704 size_t li = 0; // Counter of points inside the current "layer".
705 double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
706 kahaninit(&lsum, &lsum_c);
707 #endif
709 PGenSphReturnData pgen_retdata;
710 #ifndef NDEBUG
711 double r_pq_shifted_prev;
712 #endif
713 // recyclable variables if r_pq_shifted stays the same:
714 double intres[lMax+1], interr[lMax+1];
715 complex double cintres[lMax+1];
717 // CHOOSE POINT BEGIN
718 // TODO check whether _next_sph is the optimal coordinate system choice here
719 while ((pgen_retdata = PGen_next_sph(pgen_R)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
720 // CHOOSE POINT END
721 cart3_t Rpq_shifted_cart; // I will need both sph and cart representations anyway...
722 sph_t Rpq_shifted_sph;
723 if (pgen_generates_shifted_points) {
724 Rpq_shifted_sph = pgen_retdata.point_sph;
725 Rpq_shifted_cart = sph2cart(Rpq_shifted_sph);
726 } else { // as in the old _points_and_shift functions
727 //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
728 const sph_t bravais_point_sph = pgen_retdata.point_sph;
729 const cart3_t bravais_point_cart = sph2cart(bravais_point_sph);
730 Rpq_shifted_cart = cart3_add(bravais_point_cart, cart3_scale(-1, particle_shift)); // CHECKSIGN!!!
731 Rpq_shifted_sph = cart2sph(Rpq_shifted_cart);
734 // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
735 const double Rpq_shifted_arg = Rpq_shifted_sph.phi; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
736 const double Rpq_shifted_theta = Rpq_shifted_sph.theta; // POINT-DEPENDENT
737 const double r_pq_shifted = Rpq_shifted_sph.r;
739 // if the radius is the same as in previous cycle, most of the calculations can be recycled
740 const bool new_r_pq_shifted = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
741 if (!new_r_pq_shifted) assert(r_pq_shifted_prev == r_pq_shifted);
743 const complex double e_beta_Rpq = cexp(I*cart3_dot(beta, Rpq_shifted_cart)); // POINT-DEPENDENT
744 LatticeDimensionality speedup_regime = 0;
745 if ((latdim & LAT_2D_IN_3D_XYONLY) == LAT_2D_IN_3D_XYONLY) speedup_regime = LAT_2D_IN_3D_XYONLY;
746 if ((latdim & LAT_1D_IN_3D_ZONLY) == LAT_1D_IN_3D_ZONLY) speedup_regime = LAT_1D_IN_3D_ZONLY;
748 const double * legendre_array;
750 switch(speedup_regime) {
751 // speedup checks for special geometries and Legendre polynomials
752 case LAT_1D_IN_3D_ZONLY:
753 assert((pgen_retdata.flags & PGEN_AT_Z) == PGEN_AT_Z);
754 assert(Rpq_shifted_theta == M_PI || Rpq_shifted_theta == 0);
755 legendre_array = (Rpq_shifted_theta == 0) ? c->legendre_plus1 : c->legendre_minus1; // CHECKSIGN
756 break;
757 case LAT_2D_IN_3D_XYONLY:
758 assert((pgen_retdata.flags &PGEN_AT_XY) == PGEN_AT_XY);
759 assert(fabs(Rpq_shifted_theta - M_PI_2) < DBL_EPSILON * 1024);
760 // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
761 legendre_array = c->legendre0;
762 break;
763 default:
764 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, cos(Rpq_shifted_theta), c->legendre_csphase, legendre_buf));
765 legendre_array = legendre_buf;
766 break;
769 for(qpms_l_t n = 0; n <= lMax; ++n) {
770 const double complex prefacn = - I * (k_is_real ? pow(2./creal(k),n+1) : cpow(2./k, n+1)) * M_2_SQRTPI / 2; // profiling TODO put outside the R-loop and multiply in the end?
771 const double R_pq_pown = pow(r_pq_shifted, n); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
772 if (new_r_pq_shifted) {
773 int retval;
774 if (k_is_real) {
775 double intres_real;
776 retval = ewald32_sr_integral(r_pq_shifted, kreal, n, eta,
777 &intres_real, interr + n, workspace);
778 cintres[n] = intres_real;
779 } else
780 retval = ewald32_sr_integral_ck(r_pq_shifted, k, n, eta,
781 cintres+n, interr + n, workspace);
782 QPMS_ENSURE_SUCCESS(retval);
783 } // otherwise recycle the integrals
784 for (qpms_m_t m = -n; m <= n; ++m){
785 complex double e_imf;
786 // SPEEDUPS for some special geometries
787 if(speedup_regime == LAT_2D_IN_3D_XYONLY) { //2D lattice inside the xy plane
788 if((m+n) % 2 != 0) // odd coefficients are zero.
789 continue; // nothing needed, already done by memset
790 e_imf = cexp(I*m*Rpq_shifted_arg); // profiling TODO: calculate outside the n-loop?
791 } else if (speedup_regime == LAT_1D_IN_3D_ZONLY) { // 1D lattice along the z axis
792 if (m != 0) // m-non-zero coefficients are zero
793 continue; // nothing needed, already done by memset
794 e_imf = 1;
795 } else { // general 1D/2D/3D lattice in 3D space
796 e_imf = cexp(I*m*Rpq_shifted_arg);
799 const double leg = legendre_array[gsl_sf_legendre_array_index(n, abs(m))];
801 const qpms_y_t y = qpms_mn2y_sc(m,n);
802 if(err)
803 kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
804 * interr[n])); // TODO include also other errors
805 complex double thesummand = prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m);
806 ckahanadd(target + y, target_c + y, thesummand);
807 #ifdef EWALD_AUTO_CUTOFF
808 kahanadd(&lsum, &lsum_c, cabs(thesummand));
809 #endif
813 #ifndef NDEBUG
814 r_pq_shifted_prev = r_pq_shifted;
815 #endif
816 #ifdef EWALD_AUTO_CUTOFF
817 ++li;
818 double cursum_min = INFINITY;
819 for (qpms_y_t y = 0; y < nelem_sc; ++y) {
820 const double a = cabs(target[y]);
821 if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
822 cursum_min = MIN(cursum_min, a);
824 if (li >= 12 * lw + 1) {
825 if(lsum < DBL_EPSILON * cursum_min)
826 goto ewald3_sigma_short_end_point_loop;
827 kahaninit(&lsum, &lsum_c);
828 li = 0;
829 ++lw;
831 #endif
833 //END POINT LOOP
834 #ifdef EWALD_AUTO_CUTOFF
835 ewald3_sigma_short_end_point_loop:
836 #endif
838 gsl_integration_workspace_free(workspace);
839 if(err) free(err_c);
840 free(target_c);
841 return 0;