8 #include "tiny_inlines.h"
9 #include "qpms_error.h"
10 #include <gsl/gsl_integration.h>
11 #include <gsl/gsl_errno.h>
12 #include <gsl/gsl_sf_legendre.h>
13 #include <gsl/gsl_sf_expint.h>
15 // parameters for the quadrature of integral in (4.6)
16 #ifndef INTEGRATION_WORKSPACE_LIMIT
17 #define INTEGRATION_WORKSPACE_LIMIT 30000
20 #ifndef INTEGRATION_EPSABS
21 #define INTEGRATION_EPSABS 1e-13
24 #ifndef INTEGRATION_EPSREL
25 #define INTEGRATION_EPSREL 1e-13
29 #define M_SQRTPI 1.7724538509055160272981674833411452L
33 // sloppy implementation of factorial
34 // We prefer to avoid tgamma/lgamma, as their errors are about 4+ bits
35 static inline double factorial(const int n
) {
38 return 0; // should not happen in the functions below. (Therefore the assert above)
41 for (int i
= 1; i
<= n
; ++i
)
46 return tgamma(n
+ 1); // hope it's precise and that overflow does not happen
49 // sloppy implementation of double factorial n!!
50 static inline double double_factorial(int n
) {
60 if (n
% 2) { // odd, (2*k - 1)!! = 2**k * Γ(k + 0.5) / sqrt(п)
61 const int k
= n
/ 2 + 1;
62 return pow(2, k
) * tgamma(k
+ 0.5) / M_SQRTPI
;
63 } else { // even, n!! = 2**(n/2) * (n/2)!
65 return pow(2, k
) * factorial(k
);
70 // sloppy implementation of (n/2)! = Γ(n/2 + 1)
71 // It is _slightly_ more precise than direct call of tgamma for small odd n
72 static inline double factorial_of_half(const int n2
) {
74 if (n2
% 2 == 0) return factorial(n2
/2);
76 if (n2
<= 50) { // odd, use (k - 0.5)! = Γ(k + 0.5) = 2**(-k) (2*k - 1)!! sqrt(п) for small n2
77 const int k
= n2
/ 2 + 1;
79 for(int j
= 2*k
- 1; j
> 0; j
-= 2)
81 return fac2
* pow(2, -k
) * M_SQRTPI
;
83 else return tgamma(1. + 0.5*n2
);
87 static inline complex double csq(complex double x
) { return x
* x
; }
88 static inline double sq(double x
) { return x
* x
; }
90 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
92 EWALD32_CONSTANTS_ORIG
, // As in [1, (4,5)], NOT USED right now.
93 EWALD32_CONSTANTS_AGNOSTIC
/* Not depending on the spherical harmonic sign/normalisation
94 * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
95 * replaced by the respective $Y_n^m(\pi/2,\alpha)$
96 * spherical harmonic. See notes/ewald.lyx.
99 } ewald3_constants_option
;
101 // TODO perhaps rewrite everything as agnostic.
102 static const ewald3_constants_option type
= EWALD32_CONSTANTS_AGNOSTIC
;
104 qpms_ewald3_constants_t
*qpms_ewald3_constants_init(const qpms_l_t lMax
/*, const ewald3_constants_option type */,
107 qpms_ewald3_constants_t
*c
= malloc(sizeof(qpms_ewald3_constants_t
));
108 //if (c == NULL) return NULL; // Do I really want to do this?
110 c
->nelem_sc
= qpms_lMax2nelem_sc(lMax
);
111 c
->s1_jMaxes
= malloc(c
->nelem_sc
* sizeof(qpms_l_t
));
112 c
->s1_constfacs
= malloc(c
->nelem_sc
* sizeof(complex double *));
113 //if (c->s1_jMaxes == NULL) return NULL;
115 // ----- the "xy-plane constants" ------
117 size_t s1_constfacs_sz
= 0;
118 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
119 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
120 if ((m
+ n
) % 2 == 0)
121 s1_constfacs_sz
+= 1 + (c
->s1_jMaxes
[y
] = (n
-abs(m
))/2);
123 c
->s1_jMaxes
[y
] = -1;
126 c
->s1_constfacs_base
= malloc(s1_constfacs_sz
* sizeof(complex double));
127 size_t s1_constfacs_sz_cumsum
= 0;
128 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
129 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
130 if ((m
+ n
) % 2 == 0) {
131 c
->s1_constfacs
[y
] = c
->s1_constfacs_base
+ s1_constfacs_sz_cumsum
;
132 // and here comes the actual calculation
133 for (qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]; ++j
){
135 case EWALD32_CONSTANTS_ORIG
: // NOT USED
136 c
->s1_constfacs
[y
][j
] = -0.5 * ipow(n
+1) * min1pow((n
+m
)/2)
137 * sqrt((2*n
+ 1) * factorial(n
-m
) * factorial(n
+m
))
138 * min1pow(j
) * pow(0.5, n
-2*j
)
139 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
))
142 case EWALD32_CONSTANTS_AGNOSTIC
:
143 c
->s1_constfacs
[y
][j
] = -2 * ipow(n
+1) * M_SQRTPI
144 * factorial((n
-m
)/2) * factorial((n
+m
)/2)
146 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
));
149 QPMS_INVALID_ENUM(type
);
152 s1_constfacs_sz_cumsum
+= 1 + c
->s1_jMaxes
[y
];
155 c
->s1_constfacs
[y
] = NULL
;
158 // ------ the "z-axis constants" -----
160 size_t s1_constfacs_1Dz_sz
;
162 const qpms_l_t sz_n_lMax
= lMax
/2 + 1; // number of different j's for n = lMax
163 s1_constfacs_1Dz_sz
= (lMax
% 2) ? isq(sz_n_lMax
) + sz_n_lMax
166 c
->s1_constfacs_1Dz_base
= malloc(s1_constfacs_1Dz_sz
* sizeof(complex double));
167 c
->s1_constfacs_1Dz
= malloc((lMax
+1)*sizeof(complex double *));
169 size_t s1_constfacs_1Dz_sz_cumsum
= 0;
170 for (qpms_l_t n
= 0; n
<= lMax
; ++n
) {
171 c
->s1_constfacs_1Dz
[n
] = c
->s1_constfacs_1Dz_base
+ s1_constfacs_1Dz_sz_cumsum
;
172 for (qpms_l_t j
= 0; j
<= n
/2; ++j
) {
174 case EWALD32_CONSTANTS_AGNOSTIC
:
175 c
->s1_constfacs_1Dz
[n
][j
] = -ipow(n
+1) * min1pow(j
) * factorial(n
)
176 / (factorial(j
) * pow(2, 2*j
) * factorial(n
- 2*j
));
179 QPMS_INVALID_ENUM(type
); // wrong type argument or not implemented
182 s1_constfacs_1Dz_sz_cumsum
+= 1 + n
/ 2;
184 assert(s1_constfacs_1Dz_sz
== s1_constfacs_1Dz_sz_cumsum
);
187 c
->legendre_csphase
= csphase
;
188 c
->legendre0
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
189 c
->legendre_plus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
190 c
->legendre_minus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
191 // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
192 c
->legendre_normconv
= GSL_SF_LEGENDRE_NONE
;
193 // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
194 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, 0, csphase
, c
->legendre0
));
195 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, +1, csphase
, c
->legendre_plus1
));
196 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, -1, csphase
, c
->legendre_minus1
));
200 void qpms_ewald3_constants_free(qpms_ewald3_constants_t
*c
) {
202 free(c
->legendre_plus1
);
203 free(c
->legendre_minus1
);
204 free(c
->s1_constfacs
);
205 free(c
->s1_constfacs_base
);
206 free(c
->s1_constfacs_1Dz_base
);
207 free(c
->s1_constfacs_1Dz
);
213 int ewald3_sigma0(complex double *result
, double *err
,
214 const qpms_ewald3_constants_t
*c
,
215 const double eta
, const complex double k
)
218 complex double z
= -csq(k
/(2*eta
));
219 QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z
,
220 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
221 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, &gam
));
222 *result
= gam
.val
* c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
;
224 *err
= gam
.err
* fabs(c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
);
228 // Wrapper over cpow to correctly handle the k->0 behaviour
229 static inline complex double cpow_0lim_zi(complex double x
, long z
)
231 if (x
== 0 && z
== 0)
233 else if (x
== 0 && z
> 0) // lol branch cut in cpow
239 int ewald3_21_xy_sigma_long (
240 complex double *target
, // must be c->nelem_sc long
242 const qpms_ewald3_constants_t
*c
,
243 const double eta
, const complex double k
,
244 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
245 const LatticeDimensionality latdim
,
246 PGen
*pgen_K
, const bool pgen_generates_shifted_points
247 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
248 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
249 * and adds beta to the generated points before calculations.
250 * If true, it assumes that they are already shifted.
253 const cart3_t particle_shift
256 const bool k_is_real
= (cimag(k
) == 0);
257 assert((latdim
& LAT_XYONLY
) && (latdim
& SPACE3D
));
258 assert((latdim
& LAT1D
) || (latdim
& LAT2D
));
259 const qpms_y_t nelem_sc
= c
->nelem_sc
;
260 assert(nelem_sc
> 0);
261 const qpms_l_t lMax
= c
->lMax
;
263 // Manual init of the ewald summation targets
264 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
265 memset(target
, 0, nelem_sc
* sizeof(complex double));
266 double *err_c
= NULL
;
268 err_c
= calloc(nelem_sc
, sizeof(double));
269 memset(err
, 0, nelem_sc
* sizeof(double));
272 #ifdef EWALD_AUTO_CUTOFF
274 * Experimental: stop the evaluation if the relative contribution of the "last layer"
275 * is below DBL_EPSILON to speed up the calculation.
276 * This obviously can currently work only for "suitable PGen"s that generate points in
277 * layers with increasing distance from the origin.
278 * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
279 * increasing integer index (starting from 1).
280 * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
281 * during run time instead of using this EWALD_AUTO_CUTOFF macro.
283 size_t lw
= 1; // "Layer" index.
284 size_t li
= 0; // Counter of points inside the current "layer".
285 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
286 kahaninit(&lsum
, &lsum_c
);
289 const complex double commonfac
= 1/(k
*k
*unitcell_volume
); // used in the very end (CFC)
291 assert(creal(commonfac
) > 0);
293 PGenSphReturnData pgen_retdata
;
295 double rbeta_pq_prev
;
297 // recycleable values if rbeta_pq stays the same:
298 complex double gamma_pq
;
299 complex double z
; // I?*κ*γ*particle_shift.z
300 complex double x
; // κ*γ/(2*η)
301 complex double factor1d
= 1; // the "additional" factor for the 1D case (then it is not 1)
302 // space for Gamma_pq[j]'s
303 complex double Gamma_pq
[lMax
/2+1];
304 double Gamma_pq_err
[lMax
/2+1];
306 // CHOOSE POINT BEGIN
307 // TODO mayby PGen_next_sph is not the best coordinate system choice here
308 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
311 if (pgen_generates_shifted_points
) {
312 beta_pq_sph
= pgen_retdata
.point_sph
;
313 const cart3_t beta_pq_cart
= sph2cart(beta_pq_sph
);
314 K_pq_cart
= cart3_add(cart3_scale(-1, beta
), beta_pq_cart
);
315 } else { // as in the old _points_and_shift functions
316 const sph_t K_pq_sph
= pgen_retdata
.point_sph
;
317 K_pq_cart
= sph2cart(K_pq_sph
);
318 const cart3_t beta_pq_cart
= cart3_add(K_pq_cart
, beta
);
319 beta_pq_sph
= cart2sph(beta_pq_cart
);
322 const double rbeta_pq
= beta_pq_sph
.r
;
323 const double arg_pq
= beta_pq_sph
.phi
;
324 //const double beta_pq_theta = beta_pq_sph.theta; //unused
328 const complex double phasefac
= cexp(I
*cart3_dot(K_pq_cart
,particle_shift
)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
330 const bool new_rbeta_pq
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
331 if (!new_rbeta_pq
) assert(rbeta_pq
== rbeta_pq_prev
);
335 //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
337 gamma_pq
= clilgamma(rbeta_pq
/k
);
338 complex double x
= gamma_pq
*k
/(2*eta
);
339 complex double x2
= x
*x
;
340 if(particle_shift
.z
== 0) {
341 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
343 int retval
= complex_gamma_inc_e(0.5-j
, x2
,
344 // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
345 QPMS_LIKELY(creal(x2
) < 0) && !signbit(cimag(x2
)) ? -1 : 0, &Gam
);
346 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
347 Gamma_pq
[j
] = Gam
.val
;
348 if(err
) Gamma_pq_err
[j
] = Gam
.err
;
351 factor1d
= M_SQRT1_2
* .5 * k
* gamma_pq
;
352 } else if (latdim
& LAT2D
) {
354 // TODO check the branches/phases!
355 complex double z
= I
* k
* gamma_pq
* particle_shift
.z
;
356 ewald3_2_sigma_long_Delta(Gamma_pq
, err
? Gamma_pq_err
: NULL
, lMax
/2, x
, 0 /* FIXME */, z
);
358 QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
363 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
364 // and just fetched for each n, m pair
365 for(qpms_l_t n
= 0; n
<= lMax
; ++n
)
366 for(qpms_m_t m
= -n
; m
<= n
; ++m
) {
367 if((m
+n
) % 2 != 0) // odd coefficients are zero.
369 const qpms_y_t y
= qpms_mn2y_sc(m
, n
);
370 const complex double e_imalpha_pq
= cexp(I
*m
*arg_pq
);
371 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
372 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
373 assert((n
-abs(m
))/2 == c
->s1_jMaxes
[y
]);
374 for(qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]/*(n-abs(m))/2*/; ++j
) { // FIXME </<= ?
375 complex double summand
= cpow_0lim_zi(rbeta_pq
/k
, n
-2*j
)
376 * e_imalpha_pq
* c
->legendre0
[gsl_sf_legendre_array_index(n
,abs(m
))] * min1pow_m_neg(m
) // This line can actually go outside j-loop
377 * cpow(gamma_pq
, 2*j
-1) // * Gamma_pq[j] bellow (GGG) after error computation
378 * c
->s1_constfacs
[y
][j
];
380 // FIXME include also other errors than Gamma_pq's relative error
381 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq_err
[j
] * cabs(summand
));
383 summand
*= Gamma_pq
[j
]; // GGG
384 ckahanadd(&jsum
, &jsum_c
, summand
);
386 jsum
*= phasefac
* factor1d
; // PFC
387 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
388 #ifdef EWALD_AUTO_CUTOFF
389 kahanadd(&lsum
, &lsum_c
, cabs(jsum
));
391 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
394 rbeta_pq_prev
= rbeta_pq
;
396 #ifdef EWALD_AUTO_CUTOFF
398 double cursum_min
= INFINITY
;
399 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
400 const double a
= cabs(target
[y
]);
401 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
402 cursum_min
= MIN(cursum_min
, a
);
404 if (li
>= 12 * lw
+ 1) {
405 if(lsum
< DBL_EPSILON
* cursum_min
)
406 goto ewald3_21_xy_sigma_long_end_point_loop
;
407 kahaninit(&lsum
, &lsum_c
);
413 #ifdef EWALD_AUTO_CUTOFF
414 ewald3_21_xy_sigma_long_end_point_loop
:
419 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
420 target
[y
] *= commonfac
;
422 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
428 // 1D chain along the z-axis; not many optimisations here as the same
429 // shifted beta radius could be recycled only once anyways
430 int ewald3_1_z_sigma_long (
431 complex double *target
, // must be c->nelem_sc long
433 const qpms_ewald3_constants_t
*c
,
434 const double eta
, const complex double k
,
435 const double unitcell_volume
/* length (periodicity) in this case */,
436 const LatticeDimensionality latdim
,
437 PGen
*pgen_K
, const bool pgen_generates_shifted_points
438 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
439 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
440 * and adds beta to the generated points before calculations.
441 * If true, it assumes that they are already shifted.
444 const cart3_t particle_shift
447 assert(LatticeDimensionality_checkflags(latdim
, LAT_1D_IN_3D_ZONLY
));
448 assert(beta
.x
== 0 && beta
.y
== 0);
449 assert(particle_shift
.x
== 0 && particle_shift
.y
== 0);
450 const double beta_z
= beta
.z
;
451 const double particle_shift_z
= particle_shift_z
;
452 const qpms_y_t nelem_sc
= c
->nelem_sc
;
453 const qpms_l_t lMax
= c
->lMax
;
455 // Manual init of the ewald summation targets
456 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
457 memset(target
, 0, nelem_sc
* sizeof(complex double));
458 double *err_c
= NULL
;
460 err_c
= calloc(nelem_sc
, sizeof(double));
461 memset(err
, 0, nelem_sc
* sizeof(double));
464 const double commonfac
= 1/(k
*unitcell_volume
); // multiplied in the very end (CFC)
465 assert(commonfac
> 0);
467 // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
468 qpms_csf_result Gamma_pq
[lMax
/2+1];
470 PGenSphReturnData pgen_retdata
;
471 // CHOOSE POINT BEGIN
472 // TODO FIXME USE PGen_next_z
473 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
474 assert(pgen_retdata
.flags
& PGEN_AT_Z
);
475 double K_z
, beta_mu_z
;
476 if (pgen_generates_shifted_points
) {
477 beta_mu_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
478 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); //!!!CHECKSIGN!!!
479 K_z
= beta_mu_z
- beta_z
;
480 } else { // as in the old _points_and_shift functions
481 K_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
482 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); // !!!CHECKSIGN!!!
483 beta_mu_z
= K_z
+ beta_z
;
485 double rbeta_mu
= fabs(beta_mu_z
);
488 const complex double phasefac
= cexp(I
* K_z
* particle_shift_z
); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
491 complex double gamma_pq
= clilgamma(rbeta_mu
/k
); // For real beta and k this is real or pure imaginary ...
492 const complex double z
= csq(gamma_pq
*k
/(2*eta
));// ... so the square (this) is in fact real.
493 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
494 int retval
= complex_gamma_inc_e(0.5-j
, z
,
495 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
496 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, Gamma_pq
+j
);
497 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
500 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
501 // and just fetched for each n
502 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
503 const qpms_y_t y
= qpms_mn2y_sc(0, n
);
504 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
505 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
506 for(qpms_l_t j
= 0; j
<= n
/2; ++j
) {
507 complex double summand
= pow(rbeta_mu
/k
, n
-2*j
)
508 * ((beta_mu_z
> 0) ? // TODO this can go outsize the j-loop
509 c
->legendre_plus1
[gsl_sf_legendre_array_index(n
,0)] :
510 (c
->legendre_minus1
[gsl_sf_legendre_array_index(n
,0)] * min1pow(n
))
512 // * min1pow_m_neg(m) // not needed as m == 0
513 * cpow(gamma_pq
, 2*j
) // * Gamma_pq[j] bellow (GGG) after error computation
514 * c
->s1_constfacs_1Dz
[n
][j
];
515 /* s1_consstfacs_1Dz[n][j] =
517 * -I**(n+1) (-1)**j * n!
518 * --------------------------
519 * j! * 2**(2*j) * (n - 2*j)!
523 // FIXME include also other errors than Gamma_pq's relative error
524 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq
[j
].err
* cabs(summand
));
526 summand
*= Gamma_pq
[j
].val
; // GGG
527 ckahanadd(&jsum
, &jsum_c
, summand
);
529 jsum
*= phasefac
; // PFC
530 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
531 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
537 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
538 target
[y
] *= commonfac
;
540 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
545 int ewald3_sigma_long (
546 complex double *target
, // must be c->nelem_sc long
548 const qpms_ewald3_constants_t
*c
,
549 const double eta
, const complex double k
,
550 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
551 const LatticeDimensionality latdim
,
552 PGen
*pgen_K
, const bool pgen_generates_shifted_points
553 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
554 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
555 * and adds beta to the generated points before calculations.
556 * If true, it assumes that they are already shifted.
559 const cart3_t particle_shift
562 assert(latdim
& SPACE3D
);
563 if (latdim
& LAT_XYONLY
)
564 return ewald3_21_xy_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
565 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
566 else if (latdim
& LAT_ZONLY
)
567 return ewald3_1_z_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
568 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
569 // TODO 3D case and general 2D case
570 else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
573 struct sigma2_integrand_params
{
578 static double sigma2_integrand(double ksi
, void *params
) {
579 struct sigma2_integrand_params
*p
= (struct sigma2_integrand_params
*) params
;
580 return exp(-sq(p
->R
* ksi
) + sq(p
->k
/ ksi
/ 2)) * pow(ksi
, 2*p
->n
);
583 static int ewald32_sr_integral(double r
, double k
, int n
, double eta
,
584 double *result
, double *err
, gsl_integration_workspace
*workspace
)
586 struct sigma2_integrand_params p
;
589 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
593 p
.R
= r
/ R0
; // i.e. p.R = 1
596 F
.function
= sigma2_integrand
;
599 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
600 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
601 workspace
, result
, err
);
602 double normfac
= pow(R0
, -2*p
.n
- 1);
608 // a version allowing complex k
610 struct sigma2_integrand_params_ck
{
616 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
617 // including the whole complex exponential (throwing the imaginary/real part away)
618 static double sigma2_integrand_ck_real(double ksi
, void *params
) {
619 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
620 return creal(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
622 static double sigma2_integrand_ck_imag(double ksi
, void *params
) {
623 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
624 return cimag(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
627 static int ewald32_sr_integral_ck(double r
, complex double k
, int n
, double eta
,
628 complex double *result
, double *err
, gsl_integration_workspace
*workspace
)
630 struct sigma2_integrand_params_ck p
;
633 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
637 p
.R
= r
/ R0
; // i.e. p.R = 1
641 double result_real
, result_imag
, err_real
, err_imag
;
643 F
.function
= sigma2_integrand_ck_real
;
644 // TODO check return values
645 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
646 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
647 workspace
, &result_real
, &err_real
);
649 F
.function
= sigma2_integrand_ck_imag
;
650 // TODO check return values
651 retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
652 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
653 workspace
, &result_imag
, &err_imag
);
655 *result
= result_real
+ I
*result_imag
;
656 *err
= sqrt(sq(err_real
) + sq(err_imag
));
658 double normfac
= pow(R0
, -2*p
.n
- 1);
664 int ewald3_sigma_short(
665 complex double *target
, // must be c->nelem_sc long
666 double *err
, // must be c->nelem_sc long or NULL
667 const qpms_ewald3_constants_t
*c
,
668 const double eta
, const complex double k
/* TODO COMPLEX */,
669 const LatticeDimensionality latdim
, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
670 PGen
*pgen_R
, const bool pgen_generates_shifted_points
671 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
672 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
673 * and adds particle_shift to the generated points before calculations.
674 * If true, it assumes that they are already shifted (if calculating interaction between
675 * different particles in the unit cell).
678 const cart3_t particle_shift
681 const bool k_is_real
= (cimag(k
) == 0); // TODO check how the compiler optimises the loops
682 const double kreal
= creal(k
);
683 const qpms_y_t nelem_sc
= c
->nelem_sc
;
684 const qpms_l_t lMax
= c
->lMax
;
685 gsl_integration_workspace
*workspace
=
686 gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT
);
688 double legendre_buf
[gsl_sf_legendre_array_n(c
->lMax
)]; // work space for the legendre polynomials (used only in the general case)
690 // Manual init of the ewald summation targets
691 complex double * const target_c
= calloc(nelem_sc
, sizeof(complex double));
692 memset(target
, 0, nelem_sc
* sizeof(complex double));
693 double *err_c
= NULL
;
695 err_c
= calloc(nelem_sc
, sizeof(double));
696 memset(err
, 0, nelem_sc
* sizeof(double));
699 #ifdef EWALD_AUTO_CUTOFF
701 * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
703 size_t lw
= 1; // "Layer" index.
704 size_t li
= 0; // Counter of points inside the current "layer".
705 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
706 kahaninit(&lsum
, &lsum_c
);
709 PGenSphReturnData pgen_retdata
;
711 double r_pq_shifted_prev
;
713 // recyclable variables if r_pq_shifted stays the same:
714 double intres
[lMax
+1], interr
[lMax
+1];
715 complex double cintres
[lMax
+1];
717 // CHOOSE POINT BEGIN
718 // TODO check whether _next_sph is the optimal coordinate system choice here
719 while ((pgen_retdata
= PGen_next_sph(pgen_R
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
721 cart3_t Rpq_shifted_cart
; // I will need both sph and cart representations anyway...
722 sph_t Rpq_shifted_sph
;
723 if (pgen_generates_shifted_points
) {
724 Rpq_shifted_sph
= pgen_retdata
.point_sph
;
725 Rpq_shifted_cart
= sph2cart(Rpq_shifted_sph
);
726 } else { // as in the old _points_and_shift functions
727 //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
728 const sph_t bravais_point_sph
= pgen_retdata
.point_sph
;
729 const cart3_t bravais_point_cart
= sph2cart(bravais_point_sph
);
730 Rpq_shifted_cart
= cart3_add(bravais_point_cart
, cart3_scale(-1, particle_shift
)); // CHECKSIGN!!!
731 Rpq_shifted_sph
= cart2sph(Rpq_shifted_cart
);
734 // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
735 const double Rpq_shifted_arg
= Rpq_shifted_sph
.phi
; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
736 const double Rpq_shifted_theta
= Rpq_shifted_sph
.theta
; // POINT-DEPENDENT
737 const double r_pq_shifted
= Rpq_shifted_sph
.r
;
739 // if the radius is the same as in previous cycle, most of the calculations can be recycled
740 const bool new_r_pq_shifted
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
741 if (!new_r_pq_shifted
) assert(r_pq_shifted_prev
== r_pq_shifted
);
743 const complex double e_beta_Rpq
= cexp(I
*cart3_dot(beta
, Rpq_shifted_cart
)); // POINT-DEPENDENT
744 LatticeDimensionality speedup_regime
= 0;
745 if ((latdim
& LAT_2D_IN_3D_XYONLY
) == LAT_2D_IN_3D_XYONLY
) speedup_regime
= LAT_2D_IN_3D_XYONLY
;
746 if ((latdim
& LAT_1D_IN_3D_ZONLY
) == LAT_1D_IN_3D_ZONLY
) speedup_regime
= LAT_1D_IN_3D_ZONLY
;
748 const double * legendre_array
;
750 switch(speedup_regime
) {
751 // speedup checks for special geometries and Legendre polynomials
752 case LAT_1D_IN_3D_ZONLY
:
753 assert((pgen_retdata
.flags
& PGEN_AT_Z
) == PGEN_AT_Z
);
754 assert(Rpq_shifted_theta
== M_PI
|| Rpq_shifted_theta
== 0);
755 legendre_array
= (Rpq_shifted_theta
== 0) ? c
->legendre_plus1
: c
->legendre_minus1
; // CHECKSIGN
757 case LAT_2D_IN_3D_XYONLY
:
758 assert((pgen_retdata
.flags
&PGEN_AT_XY
) == PGEN_AT_XY
);
759 assert(fabs(Rpq_shifted_theta
- M_PI_2
) < DBL_EPSILON
* 1024);
760 // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
761 legendre_array
= c
->legendre0
;
764 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, cos(Rpq_shifted_theta
), c
->legendre_csphase
, legendre_buf
));
765 legendre_array
= legendre_buf
;
769 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
770 const double complex prefacn
= - I
* (k_is_real
? pow(2./creal(k
),n
+1) : cpow(2./k
, n
+1)) * M_2_SQRTPI
/ 2; // profiling TODO put outside the R-loop and multiply in the end?
771 const double R_pq_pown
= pow(r_pq_shifted
, n
); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
772 if (new_r_pq_shifted
) {
776 retval
= ewald32_sr_integral(r_pq_shifted
, kreal
, n
, eta
,
777 &intres_real
, interr
+ n
, workspace
);
778 cintres
[n
] = intres_real
;
780 retval
= ewald32_sr_integral_ck(r_pq_shifted
, k
, n
, eta
,
781 cintres
+n
, interr
+ n
, workspace
);
782 QPMS_ENSURE_SUCCESS(retval
);
783 } // otherwise recycle the integrals
784 for (qpms_m_t m
= -n
; m
<= n
; ++m
){
785 complex double e_imf
;
786 // SPEEDUPS for some special geometries
787 if(speedup_regime
== LAT_2D_IN_3D_XYONLY
) { //2D lattice inside the xy plane
788 if((m
+n
) % 2 != 0) // odd coefficients are zero.
789 continue; // nothing needed, already done by memset
790 e_imf
= cexp(I
*m
*Rpq_shifted_arg
); // profiling TODO: calculate outside the n-loop?
791 } else if (speedup_regime
== LAT_1D_IN_3D_ZONLY
) { // 1D lattice along the z axis
792 if (m
!= 0) // m-non-zero coefficients are zero
793 continue; // nothing needed, already done by memset
795 } else { // general 1D/2D/3D lattice in 3D space
796 e_imf
= cexp(I
*m
*Rpq_shifted_arg
);
799 const double leg
= legendre_array
[gsl_sf_legendre_array_index(n
, abs(m
))];
801 const qpms_y_t y
= qpms_mn2y_sc(m
,n
);
803 kahanadd(err
+ y
, err_c
+ y
, cabs(leg
* (prefacn
/ I
) * R_pq_pown
804 * interr
[n
])); // TODO include also other errors
805 complex double thesummand
= prefacn
* R_pq_pown
* leg
* cintres
[n
] * e_beta_Rpq
* e_imf
* min1pow_m_neg(m
);
806 ckahanadd(target
+ y
, target_c
+ y
, thesummand
);
807 #ifdef EWALD_AUTO_CUTOFF
808 kahanadd(&lsum
, &lsum_c
, cabs(thesummand
));
814 r_pq_shifted_prev
= r_pq_shifted
;
816 #ifdef EWALD_AUTO_CUTOFF
818 double cursum_min
= INFINITY
;
819 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
820 const double a
= cabs(target
[y
]);
821 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
822 cursum_min
= MIN(cursum_min
, a
);
824 if (li
>= 12 * lw
+ 1) {
825 if(lsum
< DBL_EPSILON
* cursum_min
)
826 goto ewald3_sigma_short_end_point_loop
;
827 kahaninit(&lsum
, &lsum_c
);
834 #ifdef EWALD_AUTO_CUTOFF
835 ewald3_sigma_short_end_point_loop
:
838 gsl_integration_workspace_free(workspace
);