Implementation of the Δ_n factor as a series; error estimates.
[qpms.git] / qpms / ewald.c
blobc4e861b5b5df109f964a5aa5a553b5fded790e17
1 #include "ewald.h"
2 #include <stdlib.h>
3 #include "indexing.h"
4 #include "kahansum.h"
5 #include <assert.h>
6 #include <string.h>
7 #include <complex.h>
8 #include "tiny_inlines.h"
9 #include "qpms_error.h"
10 #include <gsl/gsl_integration.h>
11 #include <gsl/gsl_errno.h>
12 #include <gsl/gsl_sf_legendre.h>
13 #include <gsl/gsl_sf_expint.h>
15 // parameters for the quadrature of integral in (4.6)
16 #ifndef INTEGRATION_WORKSPACE_LIMIT
17 #define INTEGRATION_WORKSPACE_LIMIT 30000
18 #endif
20 #ifndef INTEGRATION_EPSABS
21 #define INTEGRATION_EPSABS 1e-13
22 #endif
24 #ifndef INTEGRATION_EPSREL
25 #define INTEGRATION_EPSREL 1e-13
26 #endif
28 #ifndef M_SQRTPI
29 #define M_SQRTPI 1.7724538509055160272981674833411452
30 #endif
33 // sloppy implementation of factorial
34 static inline double factorial(const int n) {
35 assert(n >= 0);
36 if (n < 0)
37 return 0; // should not happen in the functions below. (Therefore the assert above)
38 else if (n <= 20) {
39 double fac = 1;
40 for (int i = 1; i <= n; ++i)
41 fac *= i;
42 return fac;
44 else
45 return tgamma(n + 1); // hope it's precise and that overflow does not happen
48 static inline complex double csq(complex double x) { return x * x; }
49 static inline double sq(double x) { return x * x; }
51 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
52 typedef enum {
53 EWALD32_CONSTANTS_ORIG, // As in [1, (4,5)], NOT USED right now.
54 EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
55 * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
56 * replaced by the respective $Y_n^m(\pi/2,\alpha)$
57 * spherical harmonic. See notes/ewald.lyx.
60 } ewald3_constants_option;
62 // TODO perhaps rewrite everything as agnostic.
63 static const ewald3_constants_option type = EWALD32_CONSTANTS_AGNOSTIC;
65 qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, const ewald3_constants_option type */,
66 const int csphase)
68 qpms_ewald3_constants_t *c = malloc(sizeof(qpms_ewald3_constants_t));
69 //if (c == NULL) return NULL; // Do I really want to do this?
70 c->lMax = lMax;
71 c->nelem_sc = qpms_lMax2nelem_sc(lMax);
72 c->s1_jMaxes = malloc(c->nelem_sc * sizeof(qpms_l_t));
73 c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *));
74 //if (c->s1_jMaxes == NULL) return NULL;
76 // ----- the "xy-plane constants" ------
77 //determine sizes
78 size_t s1_constfacs_sz = 0;
79 for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
80 qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
81 if ((m + n) % 2 == 0)
82 s1_constfacs_sz += 1 + (c->s1_jMaxes[y] = (n-abs(m))/2);
83 else
84 c->s1_jMaxes[y] = -1;
87 c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
88 size_t s1_constfacs_sz_cumsum = 0;
89 for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
90 qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
91 if ((m + n) % 2 == 0) {
92 c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
93 // and here comes the actual calculation
94 for (qpms_l_t j = 0; j <= c->s1_jMaxes[y]; ++j){
95 switch(type) {
96 case EWALD32_CONSTANTS_ORIG: // NOT USED
97 c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2)
98 * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
99 * min1pow(j) * pow(0.5, n-2*j)
100 / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
101 * pow(0.5, 2*j-1);
102 break;
103 case EWALD32_CONSTANTS_AGNOSTIC:
104 c->s1_constfacs[y][j] = -2 * ipow(n+1) * M_SQRTPI
105 * factorial((n-m)/2) * factorial((n+m)/2)
106 * min1pow(j)
107 / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j));
108 break;
109 default:
110 QPMS_INVALID_ENUM(type);
113 s1_constfacs_sz_cumsum += 1 + c->s1_jMaxes[y];
115 else
116 c->s1_constfacs[y] = NULL;
119 // ------ the "z-axis constants" -----
120 // determine sizes
121 size_t s1_constfacs_1Dz_sz;
123 const qpms_l_t sz_n_lMax = lMax/2 + 1; // number of different j's for n = lMax
124 s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax
125 : isq(sz_n_lMax);
127 c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double));
128 c->s1_constfacs_1Dz = malloc((lMax+1)*sizeof(complex double *));
130 size_t s1_constfacs_1Dz_sz_cumsum = 0;
131 for (qpms_l_t n = 0; n <= lMax; ++n) {
132 c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum;
133 for (qpms_l_t j = 0; j <= n/2; ++j) {
134 switch(type) {
135 case EWALD32_CONSTANTS_AGNOSTIC:
136 c->s1_constfacs_1Dz[n][j] = -ipow(n+1) * min1pow(j) * factorial(n)
137 / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j));
138 break;
139 default:
140 QPMS_INVALID_ENUM(type); // wrong type argument or not implemented
143 s1_constfacs_1Dz_sz_cumsum += 1 + n / 2;
145 assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum);
148 c->legendre_csphase = csphase;
149 c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
150 c->legendre_plus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
151 c->legendre_minus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
152 // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
153 c->legendre_normconv = GSL_SF_LEGENDRE_NONE;
154 // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
155 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, 0, csphase, c->legendre0));
156 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, +1, csphase, c->legendre_plus1));
157 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, -1, csphase, c->legendre_minus1));
158 return c;
161 void qpms_ewald3_constants_free(qpms_ewald3_constants_t *c) {
162 free(c->legendre0);
163 free(c->legendre_plus1);
164 free(c->legendre_minus1);
165 free(c->s1_constfacs);
166 free(c->s1_constfacs_base);
167 free(c->s1_constfacs_1Dz_base);
168 free(c->s1_constfacs_1Dz);
169 free(c->s1_jMaxes);
170 free(c);
174 int ewald3_sigma0(complex double *result, double *err,
175 const qpms_ewald3_constants_t *c,
176 const double eta, const complex double k)
178 qpms_csf_result gam;
179 complex double z = -csq(k/(2*eta));
180 QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z,
181 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
182 QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, &gam));
183 *result = gam.val * c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI;
184 if(err)
185 *err = gam.err * fabs(c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI);
186 return 0;
189 // Wrapper over cpow to correctly handle the k->0 behaviour
190 static inline complex double cpow_0lim_zi(complex double x, long z)
192 if (x == 0 && z == 0)
193 return 1;
194 else if (x == 0 && z > 0) // lol branch cut in cpow
195 return 0;
196 else
197 return cpow(x, z);
200 int ewald3_21_xy_sigma_long (
201 complex double *target, // must be c->nelem_sc long
202 double *err,
203 const qpms_ewald3_constants_t *c,
204 const double eta, const complex double k,
205 const double unitcell_volume /* with the corresponding lattice dimensionality */,
206 const LatticeDimensionality latdim,
207 PGen *pgen_K, const bool pgen_generates_shifted_points
208 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
209 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
210 * and adds beta to the generated points before calculations.
211 * If true, it assumes that they are already shifted.
213 const cart3_t beta,
214 const cart3_t particle_shift
217 const bool k_is_real = (cimag(k) == 0);
218 assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
219 assert((latdim & LAT1D) || (latdim & LAT2D));
220 const qpms_y_t nelem_sc = c->nelem_sc;
221 assert(nelem_sc > 0);
222 const qpms_l_t lMax = c->lMax;
224 // Manual init of the ewald summation targets
225 complex double *target_c = calloc(nelem_sc, sizeof(complex double));
226 memset(target, 0, nelem_sc * sizeof(complex double));
227 double *err_c = NULL;
228 if (err) {
229 err_c = calloc(nelem_sc, sizeof(double));
230 memset(err, 0, nelem_sc * sizeof(double));
233 #ifdef EWALD_AUTO_CUTOFF
235 * Experimental: stop the evaluation if the relative contribution of the "last layer"
236 * is below DBL_EPSILON to speed up the calculation.
237 * This obviously can currently work only for "suitable PGen"s that generate points in
238 * layers with increasing distance from the origin.
239 * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
240 * increasing integer index (starting from 1).
241 * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
242 * during run time instead of using this EWALD_AUTO_CUTOFF macro.
244 size_t lw = 1; // "Layer" index.
245 size_t li = 0; // Counter of points inside the current "layer".
246 double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
247 kahaninit(&lsum, &lsum_c);
248 #endif
250 const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
251 if (k_is_real)
252 assert(creal(commonfac) > 0);
254 PGenSphReturnData pgen_retdata;
255 #ifndef NDEBUG
256 double rbeta_pq_prev;
257 #endif
258 // recycleable values if rbeta_pq stays the same:
259 complex double gamma_pq;
260 complex double z; // I?*κ*γ*particle_shift.z
261 complex double x; // κ*γ/(2*η)
262 complex double factor1d = 1; // the "additional" factor for the 1D case (then it is not 1)
263 // space for Gamma_pq[j]'s
264 complex double Gamma_pq[lMax/2+1];
265 double Gamma_pq_err[lMax/2+1];
267 // CHOOSE POINT BEGIN
268 // TODO mayby PGen_next_sph is not the best coordinate system choice here
269 while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
270 cart3_t K_pq_cart;
271 sph_t beta_pq_sph;
272 if (pgen_generates_shifted_points) {
273 beta_pq_sph = pgen_retdata.point_sph;
274 const cart3_t beta_pq_cart = sph2cart(beta_pq_sph);
275 K_pq_cart = cart3_add(cart3_scale(-1, beta), beta_pq_cart);
276 } else { // as in the old _points_and_shift functions
277 const sph_t K_pq_sph = pgen_retdata.point_sph;
278 K_pq_cart = sph2cart(K_pq_sph);
279 const cart3_t beta_pq_cart = cart3_add(K_pq_cart, beta);
280 beta_pq_sph = cart2sph(beta_pq_cart);
283 const double rbeta_pq = beta_pq_sph.r;
284 const double arg_pq = beta_pq_sph.phi;
285 //const double beta_pq_theta = beta_pq_sph.theta; //unused
287 // CHOOSE POINT END
289 const complex double phasefac = cexp(I*cart3_dot(K_pq_cart,particle_shift)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
291 const bool new_rbeta_pq = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
292 if (!new_rbeta_pq) assert(rbeta_pq == rbeta_pq_prev);
295 // R-DEPENDENT BEGIN
296 //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
297 if (new_rbeta_pq) {
298 gamma_pq = clilgamma(rbeta_pq/k);
299 complex double x = gamma_pq*k/(2*eta);
300 complex double x2 = x*x;
301 if(particle_shift.z == 0) {
302 for(qpms_l_t j = 0; j <= lMax/2; ++j) {
303 qpms_csf_result Gam;
304 int retval = complex_gamma_inc_e(0.5-j, x2,
305 // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
306 QPMS_LIKELY(creal(x2) < 0) && !signbit(cimag(x2)) ? -1 : 0, &Gam);
307 QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
308 Gamma_pq[j] = Gam.val;
309 if(err) Gamma_pq_err[j] = Gam.err;
311 if (latdim & LAT1D)
312 factor1d = M_SQRT1_2 * .5 * k * gamma_pq;
313 } else if (latdim & LAT2D) {
314 QPMS_UNTESTED;
315 // TODO check the branches/phases!
316 complex double z = I * k * gamma_pq * particle_shift.z;
317 ewald3_2_sigma_long_Delta(Gamma_pq, err ? Gamma_pq_err : NULL, lMax/2, x, z);
318 } else {
319 QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
322 // R-DEPENDENT END
324 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
325 // and just fetched for each n, m pair
326 for(qpms_l_t n = 0; n <= lMax; ++n)
327 for(qpms_m_t m = -n; m <= n; ++m) {
328 if((m+n) % 2 != 0) // odd coefficients are zero.
329 continue;
330 const qpms_y_t y = qpms_mn2y_sc(m, n);
331 const complex double e_imalpha_pq = cexp(I*m*arg_pq);
332 complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
333 double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
334 assert((n-abs(m))/2 == c->s1_jMaxes[y]);
335 for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
336 complex double summand = cpow_0lim_zi(rbeta_pq/k, n-2*j)
337 * e_imalpha_pq * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
338 * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
339 * c->s1_constfacs[y][j];
340 if(err) {
341 // FIXME include also other errors than Gamma_pq's relative error
342 kahanadd(&jsum_err, &jsum_err_c, Gamma_pq_err[j] * cabs(summand));
344 summand *= Gamma_pq[j]; // GGG
345 ckahanadd(&jsum, &jsum_c, summand);
347 jsum *= phasefac * factor1d; // PFC
348 ckahanadd(target + y, target_c + y, jsum);
349 #ifdef EWALD_AUTO_CUTOFF
350 kahanadd(&lsum, &lsum_c, cabs(jsum));
351 #endif
352 if(err) kahanadd(err + y, err_c + y, jsum_err);
354 #ifndef NDEBUG
355 rbeta_pq_prev = rbeta_pq;
356 #endif
357 #ifdef EWALD_AUTO_CUTOFF
358 ++li;
359 double cursum_min = INFINITY;
360 for (qpms_y_t y = 0; y < nelem_sc; ++y) {
361 const double a = cabs(target[y]);
362 if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
363 cursum_min = MIN(cursum_min, a);
365 if (li >= 12 * lw + 1) {
366 if(lsum < DBL_EPSILON * cursum_min)
367 goto ewald3_21_xy_sigma_long_end_point_loop;
368 kahaninit(&lsum, &lsum_c);
369 li = 0;
370 ++lw;
372 #endif
373 } // END POINT LOOP
374 #ifdef EWALD_AUTO_CUTOFF
375 ewald3_21_xy_sigma_long_end_point_loop:
376 #endif
378 free(err_c);
379 free(target_c);
380 for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
381 target[y] *= commonfac;
382 if(err)
383 for(qpms_y_t y = 0; y < nelem_sc; ++y)
384 err[y] *= commonfac;
385 return 0;
389 // 1D chain along the z-axis; not many optimisations here as the same
390 // shifted beta radius could be recycled only once anyways
391 int ewald3_1_z_sigma_long (
392 complex double *target, // must be c->nelem_sc long
393 double *err,
394 const qpms_ewald3_constants_t *c,
395 const double eta, const complex double k,
396 const double unitcell_volume /* length (periodicity) in this case */,
397 const LatticeDimensionality latdim,
398 PGen *pgen_K, const bool pgen_generates_shifted_points
399 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
400 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
401 * and adds beta to the generated points before calculations.
402 * If true, it assumes that they are already shifted.
404 const cart3_t beta,
405 const cart3_t particle_shift
408 assert(LatticeDimensionality_checkflags(latdim, LAT_1D_IN_3D_ZONLY));
409 assert(beta.x == 0 && beta.y == 0);
410 assert(particle_shift.x == 0 && particle_shift.y == 0);
411 const double beta_z = beta.z;
412 const double particle_shift_z = particle_shift_z;
413 const qpms_y_t nelem_sc = c->nelem_sc;
414 const qpms_l_t lMax = c->lMax;
416 // Manual init of the ewald summation targets
417 complex double *target_c = calloc(nelem_sc, sizeof(complex double));
418 memset(target, 0, nelem_sc * sizeof(complex double));
419 double *err_c = NULL;
420 if (err) {
421 err_c = calloc(nelem_sc, sizeof(double));
422 memset(err, 0, nelem_sc * sizeof(double));
425 const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC)
426 assert(commonfac > 0);
428 // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
429 qpms_csf_result Gamma_pq[lMax/2+1];
431 PGenSphReturnData pgen_retdata;
432 // CHOOSE POINT BEGIN
433 // TODO FIXME USE PGen_next_z
434 while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
435 assert(pgen_retdata.flags & PGEN_AT_Z);
436 double K_z, beta_mu_z;
437 if (pgen_generates_shifted_points) {
438 beta_mu_z = ((pgen_retdata.point_sph.theta == 0) ?
439 pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); //!!!CHECKSIGN!!!
440 K_z = beta_mu_z - beta_z;
441 } else { // as in the old _points_and_shift functions
442 K_z = ((pgen_retdata.point_sph.theta == 0) ?
443 pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
444 beta_mu_z = K_z + beta_z;
446 double rbeta_mu = fabs(beta_mu_z);
447 // CHOOSE POINT END
449 const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
451 // R-DEPENDENT BEGIN
452 complex double gamma_pq = clilgamma(rbeta_mu/k); // For real beta and k this is real or pure imaginary ...
453 const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real.
454 for(qpms_l_t j = 0; j <= lMax/2; ++j) {
455 int retval = complex_gamma_inc_e(0.5-j, z,
456 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
457 QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, Gamma_pq+j);
458 QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
460 // R-DEPENDENT END
461 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
462 // and just fetched for each n
463 for(qpms_l_t n = 0; n <= lMax; ++n) {
464 const qpms_y_t y = qpms_mn2y_sc(0, n);
465 complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
466 double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
467 for(qpms_l_t j = 0; j <= n/2; ++j) {
468 complex double summand = pow(rbeta_mu/k, n-2*j)
469 * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop
470 c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] :
471 (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n))
473 // * min1pow_m_neg(m) // not needed as m == 0
474 * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation
475 * c->s1_constfacs_1Dz[n][j];
476 /* s1_consstfacs_1Dz[n][j] =
478 * -I**(n+1) (-1)**j * n!
479 * --------------------------
480 * j! * 2**(2*j) * (n - 2*j)!
483 if(err) {
484 // FIXME include also other errors than Gamma_pq's relative error
485 kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
487 summand *= Gamma_pq[j].val; // GGG
488 ckahanadd(&jsum, &jsum_c, summand);
490 jsum *= phasefac; // PFC
491 ckahanadd(target + y, target_c + y, jsum);
492 if(err) kahanadd(err + y, err_c + y, jsum_err);
494 } // END POINT LOOP
496 free(err_c);
497 free(target_c);
498 for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
499 target[y] *= commonfac;
500 if(err)
501 for(qpms_y_t y = 0; y < nelem_sc; ++y)
502 err[y] *= commonfac;
503 return 0;
506 int ewald3_sigma_long (
507 complex double *target, // must be c->nelem_sc long
508 double *err,
509 const qpms_ewald3_constants_t *c,
510 const double eta, const complex double k,
511 const double unitcell_volume /* with the corresponding lattice dimensionality */,
512 const LatticeDimensionality latdim,
513 PGen *pgen_K, const bool pgen_generates_shifted_points
514 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
515 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
516 * and adds beta to the generated points before calculations.
517 * If true, it assumes that they are already shifted.
519 const cart3_t beta,
520 const cart3_t particle_shift
523 assert(latdim & SPACE3D);
524 if (latdim & LAT_XYONLY)
525 return ewald3_21_xy_sigma_long(target, err, c, eta, k, unitcell_volume,
526 latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
527 else if (latdim & LAT_ZONLY)
528 return ewald3_1_z_sigma_long(target, err, c, eta, k, unitcell_volume,
529 latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
530 // TODO 3D case and general 2D case
531 else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
534 struct sigma2_integrand_params {
535 int n;
536 double k, R;
539 static double sigma2_integrand(double ksi, void *params) {
540 struct sigma2_integrand_params *p = (struct sigma2_integrand_params *) params;
541 return exp(-sq(p->R * ksi) + sq(p->k / ksi / 2)) * pow(ksi, 2*p->n);
544 static int ewald32_sr_integral(double r, double k, int n, double eta,
545 double *result, double *err, gsl_integration_workspace *workspace)
547 struct sigma2_integrand_params p;
550 const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
551 p.n = n;
552 eta *= R0;
553 p.k = k * R0;
554 p.R = r / R0; // i.e. p.R = 1
556 gsl_function F;
557 F.function = sigma2_integrand;
558 F.params = &p;
560 int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
561 INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
562 workspace, result, err);
563 double normfac = pow(R0, -2*p.n - 1);
564 *result *= normfac;
565 *err *= normfac;
566 return retval;
569 // a version allowing complex k
571 struct sigma2_integrand_params_ck {
572 int n;
573 complex double k;
574 double R;
577 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
578 // including the whole complex exponential (throwing the imaginary/real part away)
579 static double sigma2_integrand_ck_real(double ksi, void *params) {
580 struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
581 return creal(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
583 static double sigma2_integrand_ck_imag(double ksi, void *params) {
584 struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
585 return cimag(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
588 static int ewald32_sr_integral_ck(double r, complex double k, int n, double eta,
589 complex double *result, double *err, gsl_integration_workspace *workspace)
591 struct sigma2_integrand_params_ck p;
594 const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
595 p.n = n;
596 eta *= R0;
597 p.k = k * R0;
598 p.R = r / R0; // i.e. p.R = 1
600 gsl_function F;
601 F.params = &p;
602 double result_real, result_imag, err_real, err_imag;
604 F.function = sigma2_integrand_ck_real;
605 // TODO check return values
606 int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
607 INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
608 workspace, &result_real, &err_real);
610 F.function = sigma2_integrand_ck_imag;
611 // TODO check return values
612 retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
613 INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
614 workspace, &result_imag, &err_imag);
616 *result = result_real + I*result_imag;
617 *err = sqrt(sq(err_real) + sq(err_imag));
619 double normfac = pow(R0, -2*p.n - 1);
620 *result *= normfac;
621 *err *= normfac;
622 return retval;
625 int ewald3_sigma_short(
626 complex double *target, // must be c->nelem_sc long
627 double *err, // must be c->nelem_sc long or NULL
628 const qpms_ewald3_constants_t *c,
629 const double eta, const complex double k /* TODO COMPLEX */,
630 const LatticeDimensionality latdim, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
631 PGen *pgen_R, const bool pgen_generates_shifted_points
632 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
633 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
634 * and adds particle_shift to the generated points before calculations.
635 * If true, it assumes that they are already shifted (if calculating interaction between
636 * different particles in the unit cell).
638 const cart3_t beta,
639 const cart3_t particle_shift
642 const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
643 const double kreal = creal(k);
644 const qpms_y_t nelem_sc = c->nelem_sc;
645 const qpms_l_t lMax = c->lMax;
646 gsl_integration_workspace *workspace =
647 gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
649 double legendre_buf[gsl_sf_legendre_array_n(c->lMax)]; // work space for the legendre polynomials (used only in the general case)
651 // Manual init of the ewald summation targets
652 complex double * const target_c = calloc(nelem_sc, sizeof(complex double));
653 memset(target, 0, nelem_sc * sizeof(complex double));
654 double *err_c = NULL;
655 if (err) {
656 err_c = calloc(nelem_sc, sizeof(double));
657 memset(err, 0, nelem_sc * sizeof(double));
660 #ifdef EWALD_AUTO_CUTOFF
662 * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
664 size_t lw = 1; // "Layer" index.
665 size_t li = 0; // Counter of points inside the current "layer".
666 double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
667 kahaninit(&lsum, &lsum_c);
668 #endif
670 PGenSphReturnData pgen_retdata;
671 #ifndef NDEBUG
672 double r_pq_shifted_prev;
673 #endif
674 // recyclable variables if r_pq_shifted stays the same:
675 double intres[lMax+1], interr[lMax+1];
676 complex double cintres[lMax+1];
678 // CHOOSE POINT BEGIN
679 // TODO check whether _next_sph is the optimal coordinate system choice here
680 while ((pgen_retdata = PGen_next_sph(pgen_R)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
681 // CHOOSE POINT END
682 cart3_t Rpq_shifted_cart; // I will need both sph and cart representations anyway...
683 sph_t Rpq_shifted_sph;
684 if (pgen_generates_shifted_points) {
685 Rpq_shifted_sph = pgen_retdata.point_sph;
686 Rpq_shifted_cart = sph2cart(Rpq_shifted_sph);
687 } else { // as in the old _points_and_shift functions
688 //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
689 const sph_t bravais_point_sph = pgen_retdata.point_sph;
690 const cart3_t bravais_point_cart = sph2cart(bravais_point_sph);
691 Rpq_shifted_cart = cart3_add(bravais_point_cart, cart3_scale(-1, particle_shift)); // CHECKSIGN!!!
692 Rpq_shifted_sph = cart2sph(Rpq_shifted_cart);
695 // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
696 const double Rpq_shifted_arg = Rpq_shifted_sph.phi; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
697 const double Rpq_shifted_theta = Rpq_shifted_sph.theta; // POINT-DEPENDENT
698 const double r_pq_shifted = Rpq_shifted_sph.r;
700 // if the radius is the same as in previous cycle, most of the calculations can be recycled
701 const bool new_r_pq_shifted = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
702 if (!new_r_pq_shifted) assert(r_pq_shifted_prev == r_pq_shifted);
704 const complex double e_beta_Rpq = cexp(I*cart3_dot(beta, Rpq_shifted_cart)); // POINT-DEPENDENT
705 LatticeDimensionality speedup_regime = 0;
706 if ((latdim & LAT_2D_IN_3D_XYONLY) == LAT_2D_IN_3D_XYONLY) speedup_regime = LAT_2D_IN_3D_XYONLY;
707 if ((latdim & LAT_1D_IN_3D_ZONLY) == LAT_1D_IN_3D_ZONLY) speedup_regime = LAT_1D_IN_3D_ZONLY;
709 const double * legendre_array;
711 switch(speedup_regime) {
712 // speedup checks for special geometries and Legendre polynomials
713 case LAT_1D_IN_3D_ZONLY:
714 assert((pgen_retdata.flags & PGEN_AT_Z) == PGEN_AT_Z);
715 assert(Rpq_shifted_theta == M_PI || Rpq_shifted_theta == 0);
716 legendre_array = (Rpq_shifted_theta == 0) ? c->legendre_plus1 : c->legendre_minus1; // CHECKSIGN
717 break;
718 case LAT_2D_IN_3D_XYONLY:
719 assert((pgen_retdata.flags &PGEN_AT_XY) == PGEN_AT_XY);
720 assert(fabs(Rpq_shifted_theta - M_PI_2) < DBL_EPSILON * 1024);
721 // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
722 legendre_array = c->legendre0;
723 break;
724 default:
725 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, cos(Rpq_shifted_theta), c->legendre_csphase, legendre_buf));
726 legendre_array = legendre_buf;
727 break;
730 for(qpms_l_t n = 0; n <= lMax; ++n) {
731 const double complex prefacn = - I * (k_is_real ? pow(2./creal(k),n+1) : cpow(2./k, n+1)) * M_2_SQRTPI / 2; // profiling TODO put outside the R-loop and multiply in the end?
732 const double R_pq_pown = pow(r_pq_shifted, n); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
733 if (new_r_pq_shifted) {
734 int retval;
735 if (k_is_real) {
736 double intres_real;
737 retval = ewald32_sr_integral(r_pq_shifted, kreal, n, eta,
738 &intres_real, interr + n, workspace);
739 cintres[n] = intres_real;
740 } else
741 retval = ewald32_sr_integral_ck(r_pq_shifted, k, n, eta,
742 cintres+n, interr + n, workspace);
743 QPMS_ENSURE_SUCCESS(retval);
744 } // otherwise recycle the integrals
745 for (qpms_m_t m = -n; m <= n; ++m){
746 complex double e_imf;
747 // SPEEDUPS for some special geometries
748 if(speedup_regime == LAT_2D_IN_3D_XYONLY) { //2D lattice inside the xy plane
749 if((m+n) % 2 != 0) // odd coefficients are zero.
750 continue; // nothing needed, already done by memset
751 e_imf = cexp(I*m*Rpq_shifted_arg); // profiling TODO: calculate outside the n-loop?
752 } else if (speedup_regime == LAT_1D_IN_3D_ZONLY) { // 1D lattice along the z axis
753 if (m != 0) // m-non-zero coefficients are zero
754 continue; // nothing needed, already done by memset
755 e_imf = 1;
756 } else { // general 1D/2D/3D lattice in 3D space
757 e_imf = cexp(I*m*Rpq_shifted_arg);
760 const double leg = legendre_array[gsl_sf_legendre_array_index(n, abs(m))];
762 const qpms_y_t y = qpms_mn2y_sc(m,n);
763 if(err)
764 kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
765 * interr[n])); // TODO include also other errors
766 complex double thesummand = prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m);
767 ckahanadd(target + y, target_c + y, thesummand);
768 #ifdef EWALD_AUTO_CUTOFF
769 kahanadd(&lsum, &lsum_c, cabs(thesummand));
770 #endif
774 #ifndef NDEBUG
775 r_pq_shifted_prev = r_pq_shifted;
776 #endif
777 #ifdef EWALD_AUTO_CUTOFF
778 ++li;
779 double cursum_min = INFINITY;
780 for (qpms_y_t y = 0; y < nelem_sc; ++y) {
781 const double a = cabs(target[y]);
782 if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
783 cursum_min = MIN(cursum_min, a);
785 if (li >= 12 * lw + 1) {
786 if(lsum < DBL_EPSILON * cursum_min)
787 goto ewald3_sigma_short_end_point_loop;
788 kahaninit(&lsum, &lsum_c);
789 li = 0;
790 ++lw;
792 #endif
794 //END POINT LOOP
795 #ifdef EWALD_AUTO_CUTOFF
796 ewald3_sigma_short_end_point_loop:
797 #endif
799 gsl_integration_workspace_free(workspace);
800 if(err) free(err_c);
801 free(target_c);
802 return 0;