8 #include "tiny_inlines.h"
9 #include "qpms_error.h"
10 #include <gsl/gsl_integration.h>
11 #include <gsl/gsl_errno.h>
12 #include <gsl/gsl_sf_legendre.h>
13 #include <gsl/gsl_sf_expint.h>
15 // parameters for the quadrature of integral in (4.6)
16 #ifndef INTEGRATION_WORKSPACE_LIMIT
17 #define INTEGRATION_WORKSPACE_LIMIT 30000
20 #ifndef INTEGRATION_EPSABS
21 #define INTEGRATION_EPSABS 1e-13
24 #ifndef INTEGRATION_EPSREL
25 #define INTEGRATION_EPSREL 1e-13
29 #define M_SQRTPI 1.7724538509055160272981674833411452L
33 // sloppy implementation of factorial
34 // We prefer to avoid tgamma/lgamma, as their errors are about 4+ bits
35 static inline double factorial(const int n
) {
38 return 0; // should not happen in the functions below. (Therefore the assert above)
41 for (int i
= 1; i
<= n
; ++i
)
46 return tgamma(n
+ 1); // hope it's precise and that overflow does not happen
49 // sloppy implementation of double factorial n!!
50 static inline double double_factorial(int n
) {
60 if (n
% 2) { // odd, (2*k - 1)!! = 2**k * Γ(k + 0.5) / sqrt(п)
61 const int k
= n
/ 2 + 1;
62 return pow(2, k
) * tgamma(k
+ 0.5) / M_SQRTPI
;
63 } else { // even, n!! = 2**(n/2) * (n/2)!
65 return pow(2, k
) * factorial(k
);
70 // sloppy implementation of (n/2)! = Γ(n/2 + 1)
71 // It is _slightly_ more precise than direct call of tgamma for small odd n
72 static inline double factorial_of_half(const int n2
) {
74 if (n2
% 2 == 0) return factorial(n2
/2);
76 if (n2
<= 50) { // odd, use (k - 0.5)! = Γ(k + 0.5) = 2**(-k) (2*k - 1)!! sqrt(п) for small n2
77 const int k
= n2
/ 2 + 1;
79 for(int j
= 2*k
- 1; j
> 0; j
-= 2)
81 return fac2
* pow(2, -k
) * M_SQRTPI
;
83 else return tgamma(1. + 0.5*n2
);
87 static inline complex double csq(complex double x
) { return x
* x
; }
88 static inline double sq(double x
) { return x
* x
; }
90 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
92 EWALD32_CONSTANTS_ORIG
, // As in [1, (4,5)], NOT USED right now.
93 EWALD32_CONSTANTS_AGNOSTIC
/* Not depending on the spherical harmonic sign/normalisation
94 * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
95 * replaced by the respective $Y_n^m(\pi/2,\alpha)$
96 * spherical harmonic. See notes/ewald.lyx.
99 } ewald3_constants_option
;
101 // TODO perhaps rewrite everything as agnostic.
102 static const ewald3_constants_option type
= EWALD32_CONSTANTS_AGNOSTIC
;
104 qpms_ewald3_constants_t
*qpms_ewald3_constants_init(const qpms_l_t lMax
/*, const ewald3_constants_option type */,
107 qpms_ewald3_constants_t
*c
= malloc(sizeof(qpms_ewald3_constants_t
));
108 //if (c == NULL) return NULL; // Do I really want to do this?
110 c
->nelem_sc
= qpms_lMax2nelem_sc(lMax
);
111 c
->s1_jMaxes
= malloc(c
->nelem_sc
* sizeof(qpms_l_t
));
112 c
->s1_constfacs
= malloc(c
->nelem_sc
* sizeof(complex double *));
113 //if (c->s1_jMaxes == NULL) return NULL;
115 // ----- the "xy-plane constants" ------
117 size_t s1_constfacs_sz
= 0;
118 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
119 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
120 if ((m
+ n
) % 2 == 0)
121 s1_constfacs_sz
+= 1 + (c
->s1_jMaxes
[y
] = (n
-abs(m
))/2);
123 c
->s1_jMaxes
[y
] = -1;
126 c
->s1_constfacs_base
= malloc(s1_constfacs_sz
* sizeof(complex double));
127 size_t s1_constfacs_sz_cumsum
= 0;
128 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
129 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
130 if ((m
+ n
) % 2 == 0) {
131 c
->s1_constfacs
[y
] = c
->s1_constfacs_base
+ s1_constfacs_sz_cumsum
;
132 // and here comes the actual calculation
133 for (qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]; ++j
){
135 case EWALD32_CONSTANTS_ORIG
: // NOT USED
136 c
->s1_constfacs
[y
][j
] = -0.5 * ipow(n
+1) * min1pow((n
+m
)/2)
137 * sqrt((2*n
+ 1) * factorial(n
-m
) * factorial(n
+m
))
138 * min1pow(j
) * pow(0.5, n
-2*j
)
139 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
))
142 case EWALD32_CONSTANTS_AGNOSTIC
:
143 c
->s1_constfacs
[y
][j
] = -2 * ipow(n
+1) * M_SQRTPI
144 * factorial((n
-m
)/2) * factorial((n
+m
)/2)
146 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
));
149 QPMS_INVALID_ENUM(type
);
152 s1_constfacs_sz_cumsum
+= 1 + c
->s1_jMaxes
[y
];
155 c
->s1_constfacs
[y
] = NULL
;
158 // ----- New generation 2D-in-3D constants ------
159 // TODO it is not necessary to treat +|m| and -|m| cases separately
160 // N.B. currently, this is only valid for EWALD32_CONSTANTS_AGNOSTIC (NOT CHECKED!)
161 c
->S1_constfacs
= malloc((1+c
->nelem_sc
) * sizeof(complex double *));
163 size_t S1_constfacs_sz
= 0;
164 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
165 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
166 const qpms_l_t L_M
= n
- abs(m
);
167 for(qpms_l_t j
= 0; j
<= L_M
; ++j
) { // outer sum
168 // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
169 for(qpms_l_t s
= j
+ (L_M
- j
) % 2;
170 (s
<= 2 * j
) && (s
<= L_M
);
177 c
->S1_constfacs_base
= malloc(S1_constfacs_sz
* sizeof(complex double));
178 size_t S1_constfacs_sz_cumsum
= 0; // second count
179 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
180 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
181 const complex double yfactor
= -2 * ipow(n
+1) * M_SQRTPI
182 * factorial((n
-m
)/2) * factorial((n
+m
)/2);
183 c
->S1_constfacs
[y
] = c
->S1_constfacs_base
+ S1_constfacs_sz_cumsum
;
184 size_t coeffs_per_y
= 0;
185 const qpms_l_t L_M
= n
- abs(m
);
186 for(qpms_l_t j
= 0; j
<= L_M
; ++j
) { // outer sum
187 // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
188 for(qpms_l_t s
= j
+ (L_M
- j
) % 2;
189 (s
<= 2 * j
) && (s
<= L_M
);
191 c
->S1_constfacs_base
[S1_constfacs_sz_cumsum
] =
193 / factorial(2*j
-s
) / factorial(s
- j
)
194 / factorial_of_half(n
- m
- s
) / factorial_of_half(n
+ m
- s
);
195 ++S1_constfacs_sz_cumsum
;
198 S1_constfacs_sz_cumsum
+= coeffs_per_y
;
200 QPMS_ASSERT(S1_constfacs_sz_cumsum
= S1_constfacs_sz
);
201 c
->S1_constfacs
[c
->nelem_sc
] = c
->S1_constfacs_base
+ S1_constfacs_sz
; // For easier limit checks
203 // ------ the "z-axis constants" -----
205 size_t s1_constfacs_1Dz_sz
;
207 const qpms_l_t sz_n_lMax
= lMax
/2 + 1; // number of different j's for n = lMax
208 s1_constfacs_1Dz_sz
= (lMax
% 2) ? isq(sz_n_lMax
) + sz_n_lMax
211 c
->s1_constfacs_1Dz_base
= malloc(s1_constfacs_1Dz_sz
* sizeof(complex double));
212 c
->s1_constfacs_1Dz
= malloc((lMax
+1)*sizeof(complex double *));
214 size_t s1_constfacs_1Dz_sz_cumsum
= 0;
215 for (qpms_l_t n
= 0; n
<= lMax
; ++n
) {
216 c
->s1_constfacs_1Dz
[n
] = c
->s1_constfacs_1Dz_base
+ s1_constfacs_1Dz_sz_cumsum
;
217 for (qpms_l_t j
= 0; j
<= n
/2; ++j
) {
219 case EWALD32_CONSTANTS_AGNOSTIC
:
220 c
->s1_constfacs_1Dz
[n
][j
] = -ipow(n
+1) * min1pow(j
) * factorial(n
)
221 / (factorial(j
) * pow(2, 2*j
) * factorial(n
- 2*j
));
224 QPMS_INVALID_ENUM(type
); // wrong type argument or not implemented
227 s1_constfacs_1Dz_sz_cumsum
+= 1 + n
/ 2;
229 assert(s1_constfacs_1Dz_sz
== s1_constfacs_1Dz_sz_cumsum
);
232 c
->legendre_csphase
= csphase
;
233 c
->legendre0
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
234 c
->legendre_plus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
235 c
->legendre_minus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
236 // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
237 c
->legendre_normconv
= GSL_SF_LEGENDRE_NONE
;
238 // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
239 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, 0, csphase
, c
->legendre0
));
240 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, +1, csphase
, c
->legendre_plus1
));
241 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, -1, csphase
, c
->legendre_minus1
));
245 void qpms_ewald3_constants_free(qpms_ewald3_constants_t
*c
) {
247 free(c
->legendre_plus1
);
248 free(c
->legendre_minus1
);
249 free(c
->s1_constfacs
);
250 free(c
->s1_constfacs_base
);
251 free(c
->S1_constfacs
);
252 free(c
->S1_constfacs_base
);
253 free(c
->s1_constfacs_1Dz_base
);
254 free(c
->s1_constfacs_1Dz
);
260 int ewald3_sigma0(complex double *result
, double *err
,
261 const qpms_ewald3_constants_t
*c
,
262 const double eta
, const complex double k
)
265 complex double z
= -csq(k
/(2*eta
));
266 QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z
,
267 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
268 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, &gam
));
269 *result
= gam
.val
* c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
;
271 *err
= gam
.err
* fabs(c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
);
275 // Wrapper over cpow to correctly handle the k->0 behaviour
276 static inline complex double cpow_0lim_zi(complex double x
, long z
)
278 if (x
== 0 && z
== 0)
280 else if (x
== 0 && z
> 0) // lol branch cut in cpow
286 int ewald3_21_xy_sigma_long (
287 complex double *target
, // must be c->nelem_sc long
289 const qpms_ewald3_constants_t
*c
,
290 const double eta
, const complex double k
,
291 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
292 const LatticeDimensionality latdim
,
293 PGen
*pgen_K
, const bool pgen_generates_shifted_points
294 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
295 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
296 * and adds beta to the generated points before calculations.
297 * If true, it assumes that they are already shifted.
300 const cart3_t particle_shift
303 const bool k_is_real
= (cimag(k
) == 0);
304 assert((latdim
& LAT_XYONLY
) && (latdim
& SPACE3D
));
305 assert((latdim
& LAT1D
) || (latdim
& LAT2D
));
306 const qpms_y_t nelem_sc
= c
->nelem_sc
;
307 assert(nelem_sc
> 0);
308 const qpms_l_t lMax
= c
->lMax
;
310 // Manual init of the ewald summation targets
311 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
312 memset(target
, 0, nelem_sc
* sizeof(complex double));
313 double *err_c
= NULL
;
315 err_c
= calloc(nelem_sc
, sizeof(double));
316 memset(err
, 0, nelem_sc
* sizeof(double));
319 #ifdef EWALD_AUTO_CUTOFF
321 * Experimental: stop the evaluation if the relative contribution of the "last layer"
322 * is below DBL_EPSILON to speed up the calculation.
323 * This obviously can currently work only for "suitable PGen"s that generate points in
324 * layers with increasing distance from the origin.
325 * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
326 * increasing integer index (starting from 1).
327 * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
328 * during run time instead of using this EWALD_AUTO_CUTOFF macro.
330 size_t lw
= 1; // "Layer" index.
331 size_t li
= 0; // Counter of points inside the current "layer".
332 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
333 kahaninit(&lsum
, &lsum_c
);
336 const complex double commonfac
= 1/(k
*k
*unitcell_volume
); // used in the very end (CFC)
338 assert(creal(commonfac
) > 0);
340 PGenSphReturnData pgen_retdata
;
342 double rbeta_pq_prev
;
344 // recycleable values if rbeta_pq stays the same:
345 complex double gamma_pq
;
346 complex double z
; // I?*κ*γ*particle_shift.z
347 complex double x
; // κ*γ/(2*η)
348 complex double factor1d
= 1; // the "additional" factor for the 1D case (then it is not 1)
349 // space for Gamma_pq[j]'s
350 complex double Gamma_pq
[lMax
/2+1];
351 double Gamma_pq_err
[lMax
/2+1];
353 // CHOOSE POINT BEGIN
354 // TODO maybe PGen_next_sph is not the best coordinate system choice here
355 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
358 if (pgen_generates_shifted_points
) {
359 beta_pq_sph
= pgen_retdata
.point_sph
;
360 const cart3_t beta_pq_cart
= sph2cart(beta_pq_sph
);
361 K_pq_cart
= cart3_add(cart3_scale(-1, beta
), beta_pq_cart
);
362 } else { // as in the old _points_and_shift functions
363 const sph_t K_pq_sph
= pgen_retdata
.point_sph
;
364 K_pq_cart
= sph2cart(K_pq_sph
);
365 const cart3_t beta_pq_cart
= cart3_add(K_pq_cart
, beta
);
366 beta_pq_sph
= cart2sph(beta_pq_cart
);
369 const double rbeta_pq
= beta_pq_sph
.r
;
370 const double arg_pq
= beta_pq_sph
.phi
;
371 //const double beta_pq_theta = beta_pq_sph.theta; //unused
375 const complex double phasefac
= cexp(I
*cart3_dot(K_pq_cart
,particle_shift
)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
377 const bool new_rbeta_pq
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
378 if (!new_rbeta_pq
) assert(rbeta_pq
== rbeta_pq_prev
);
382 //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
384 gamma_pq
= clilgamma(rbeta_pq
/k
);
385 complex double x
= gamma_pq
*k
/(2*eta
);
386 complex double x2
= x
*x
;
387 if(particle_shift
.z
== 0) {
388 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
390 int retval
= complex_gamma_inc_e(0.5-j
, x2
,
391 // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
392 QPMS_LIKELY(creal(x2
) < 0) && !signbit(cimag(x2
)) ? -1 : 0, &Gam
);
393 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
394 Gamma_pq
[j
] = Gam
.val
;
395 if(err
) Gamma_pq_err
[j
] = Gam
.err
;
398 factor1d
= M_SQRT1_2
* .5 * k
* gamma_pq
;
399 } else if (latdim
& LAT2D
) {
401 // TODO check the branches/phases!
402 complex double z
= I
* k
* gamma_pq
* particle_shift
.z
;
403 ewald3_2_sigma_long_Delta(Gamma_pq
, err
? Gamma_pq_err
: NULL
, lMax
/2, x
, 0 /* FIXME */, z
);
405 QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
410 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
411 // and just fetched for each n, m pair
412 for(qpms_l_t n
= 0; n
<= lMax
; ++n
)
413 for(qpms_m_t m
= -n
; m
<= n
; ++m
) {
414 if((particle_shift
.z
== 0) && ((m
+n
) % 2 != 0)) // odd coefficients are zero.
416 const qpms_y_t y
= qpms_mn2y_sc(m
, n
);
417 size_t constidx
= 0; // constants offset
418 const complex double e_imalpha_pq
= cexp(I
*m
*arg_pq
);
419 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
420 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
421 if (particle_shift
.z
== 0) { // TODO remove when the general case is stable and tested
422 assert((n
-abs(m
))/2 == c
->s1_jMaxes
[y
]);
423 for(qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]/*(n-abs(m))/2*/; ++j
) { // FIXME </<= ?
424 complex double summand
= cpow_0lim_zi(rbeta_pq
/k
, n
-2*j
)
425 * e_imalpha_pq
* c
->legendre0
[gsl_sf_legendre_array_index(n
,abs(m
))] * min1pow_m_neg(m
) // This line can actually go outside j-loop
426 * cpow(gamma_pq
, 2*j
-1) // * Gamma_pq[j] bellow (GGG) after error computation
427 * c
->s1_constfacs
[y
][j
];
429 // FIXME include also other errors than Gamma_pq's relative error
430 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq_err
[j
] * cabs(summand
));
432 summand
*= Gamma_pq
[j
]; // GGG
433 ckahanadd(&jsum
, &jsum_c
, summand
);
435 jsum
*= phasefac
* factor1d
; // PFC
436 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
437 #ifdef EWALD_AUTO_CUTOFF
438 kahanadd(&lsum
, &lsum_c
, cabs(jsum
));
440 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
441 } else { // particle_shift.z != 0
442 const qpms_l_t L_M
= n
- abs(m
);
443 for(qpms_l_t j
= 0; j
<= L_M
; ++j
) { // outer sum
444 complex double ssum
, ssum_c
; ckahaninit(&ssum
, &ssum_c
);
445 // TODO errors of ssum
446 // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
447 for(qpms_l_t s
= j
+ (L_M
- j
) % 2;
448 (s
<= 2 * j
) && (s
<= L_M
);
450 complex double ssummand
= c
->S1_constfacs
[y
][constidx
]
451 * cpow(-k
* particle_shift
.z
, 2*j
- s
) * cpow_0lim_zi(rbeta_pq
/ k
, n
- s
);
452 ckahanadd(&ssum
, &ssum_c
, ssummand
);
455 const complex double jfactor
= e_imalpha_pq
* Gamma_pq
[j
] * cpow(gamma_pq
, 2*j
- 1);
456 if (err
) { // FIXME include also other sources of error than Gamma_pq's relative error
457 double jfactor_err
= Gamma_pq_err
[j
] * pow(cabs(gamma_pq
), 2*j
- 1);
458 kahanadd(&jsum_err
, &jsum_err_c
, jfactor_err
* ssum
);
460 complex double jsummand
= jfactor
* ssum
;
461 ckahanadd(&jsum
, &jsum_c
, jsummand
);
463 jsum
*= phasefac
; // factor1d not here, off-axis sums not implemented/allowed.
464 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
465 #ifdef EWALD_AUTO_CUTOFF
466 kahanadd(&lsum
, &lsum_c
, cabs(jsum
));
468 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
472 rbeta_pq_prev
= rbeta_pq
;
474 #ifdef EWALD_AUTO_CUTOFF
476 double cursum_min
= INFINITY
;
477 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
478 const double a
= cabs(target
[y
]);
479 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
480 cursum_min
= MIN(cursum_min
, a
);
482 if (li
>= 12 * lw
+ 1) {
483 if(lsum
< DBL_EPSILON
* cursum_min
)
484 goto ewald3_21_xy_sigma_long_end_point_loop
;
485 kahaninit(&lsum
, &lsum_c
);
491 #ifdef EWALD_AUTO_CUTOFF
492 ewald3_21_xy_sigma_long_end_point_loop
:
497 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
498 target
[y
] *= commonfac
;
500 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
506 // 1D chain along the z-axis; not many optimisations here as the same
507 // shifted beta radius could be recycled only once anyways
508 int ewald3_1_z_sigma_long (
509 complex double *target
, // must be c->nelem_sc long
511 const qpms_ewald3_constants_t
*c
,
512 const double eta
, const complex double k
,
513 const double unitcell_volume
/* length (periodicity) in this case */,
514 const LatticeDimensionality latdim
,
515 PGen
*pgen_K
, const bool pgen_generates_shifted_points
516 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
517 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
518 * and adds beta to the generated points before calculations.
519 * If true, it assumes that they are already shifted.
522 const cart3_t particle_shift
525 assert(LatticeDimensionality_checkflags(latdim
, LAT_1D_IN_3D_ZONLY
));
526 assert(beta
.x
== 0 && beta
.y
== 0);
527 assert(particle_shift
.x
== 0 && particle_shift
.y
== 0);
528 const double beta_z
= beta
.z
;
529 const double particle_shift_z
= particle_shift_z
;
530 const qpms_y_t nelem_sc
= c
->nelem_sc
;
531 const qpms_l_t lMax
= c
->lMax
;
533 // Manual init of the ewald summation targets
534 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
535 memset(target
, 0, nelem_sc
* sizeof(complex double));
536 double *err_c
= NULL
;
538 err_c
= calloc(nelem_sc
, sizeof(double));
539 memset(err
, 0, nelem_sc
* sizeof(double));
542 const double commonfac
= 1/(k
*unitcell_volume
); // multiplied in the very end (CFC)
543 assert(commonfac
> 0);
545 // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
546 qpms_csf_result Gamma_pq
[lMax
/2+1];
548 PGenSphReturnData pgen_retdata
;
549 // CHOOSE POINT BEGIN
550 // TODO FIXME USE PGen_next_z
551 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
552 assert(pgen_retdata
.flags
& PGEN_AT_Z
);
553 double K_z
, beta_mu_z
;
554 if (pgen_generates_shifted_points
) {
555 beta_mu_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
556 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); //!!!CHECKSIGN!!!
557 K_z
= beta_mu_z
- beta_z
;
558 } else { // as in the old _points_and_shift functions
559 K_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
560 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); // !!!CHECKSIGN!!!
561 beta_mu_z
= K_z
+ beta_z
;
563 double rbeta_mu
= fabs(beta_mu_z
);
566 const complex double phasefac
= cexp(I
* K_z
* particle_shift_z
); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
569 complex double gamma_pq
= clilgamma(rbeta_mu
/k
); // For real beta and k this is real or pure imaginary ...
570 const complex double z
= csq(gamma_pq
*k
/(2*eta
));// ... so the square (this) is in fact real.
571 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
572 int retval
= complex_gamma_inc_e(0.5-j
, z
,
573 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
574 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, Gamma_pq
+j
);
575 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
578 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
579 // and just fetched for each n
580 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
581 const qpms_y_t y
= qpms_mn2y_sc(0, n
);
582 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
583 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
584 for(qpms_l_t j
= 0; j
<= n
/2; ++j
) {
585 complex double summand
= pow(rbeta_mu
/k
, n
-2*j
)
586 * ((beta_mu_z
> 0) ? // TODO this can go outsize the j-loop
587 c
->legendre_plus1
[gsl_sf_legendre_array_index(n
,0)] :
588 (c
->legendre_minus1
[gsl_sf_legendre_array_index(n
,0)] * min1pow(n
))
590 // * min1pow_m_neg(m) // not needed as m == 0
591 * cpow(gamma_pq
, 2*j
) // * Gamma_pq[j] bellow (GGG) after error computation
592 * c
->s1_constfacs_1Dz
[n
][j
];
593 /* s1_consstfacs_1Dz[n][j] =
595 * -I**(n+1) (-1)**j * n!
596 * --------------------------
597 * j! * 2**(2*j) * (n - 2*j)!
601 // FIXME include also other errors than Gamma_pq's relative error
602 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq
[j
].err
* cabs(summand
));
604 summand
*= Gamma_pq
[j
].val
; // GGG
605 ckahanadd(&jsum
, &jsum_c
, summand
);
607 jsum
*= phasefac
; // PFC
608 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
609 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
615 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
616 target
[y
] *= commonfac
;
618 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
623 int ewald3_sigma_long (
624 complex double *target
, // must be c->nelem_sc long
626 const qpms_ewald3_constants_t
*c
,
627 const double eta
, const complex double k
,
628 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
629 const LatticeDimensionality latdim
,
630 PGen
*pgen_K
, const bool pgen_generates_shifted_points
631 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
632 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
633 * and adds beta to the generated points before calculations.
634 * If true, it assumes that they are already shifted.
637 const cart3_t particle_shift
640 assert(latdim
& SPACE3D
);
641 if (latdim
& LAT_XYONLY
)
642 return ewald3_21_xy_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
643 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
644 else if (latdim
& LAT_ZONLY
)
645 return ewald3_1_z_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
646 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
647 // TODO 3D case and general 2D case
648 else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
651 struct sigma2_integrand_params
{
656 static double sigma2_integrand(double ksi
, void *params
) {
657 struct sigma2_integrand_params
*p
= (struct sigma2_integrand_params
*) params
;
658 return exp(-sq(p
->R
* ksi
) + sq(p
->k
/ ksi
/ 2)) * pow(ksi
, 2*p
->n
);
661 static int ewald32_sr_integral(double r
, double k
, int n
, double eta
,
662 double *result
, double *err
, gsl_integration_workspace
*workspace
)
664 struct sigma2_integrand_params p
;
667 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
671 p
.R
= r
/ R0
; // i.e. p.R = 1
674 F
.function
= sigma2_integrand
;
677 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
678 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
679 workspace
, result
, err
);
680 double normfac
= pow(R0
, -2*p
.n
- 1);
686 // a version allowing complex k
688 struct sigma2_integrand_params_ck
{
694 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
695 // including the whole complex exponential (throwing the imaginary/real part away)
696 static double sigma2_integrand_ck_real(double ksi
, void *params
) {
697 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
698 return creal(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
700 static double sigma2_integrand_ck_imag(double ksi
, void *params
) {
701 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
702 return cimag(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
705 static int ewald32_sr_integral_ck(double r
, complex double k
, int n
, double eta
,
706 complex double *result
, double *err
, gsl_integration_workspace
*workspace
)
708 struct sigma2_integrand_params_ck p
;
711 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
715 p
.R
= r
/ R0
; // i.e. p.R = 1
719 double result_real
, result_imag
, err_real
, err_imag
;
721 F
.function
= sigma2_integrand_ck_real
;
722 // TODO check return values
723 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
724 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
725 workspace
, &result_real
, &err_real
);
727 F
.function
= sigma2_integrand_ck_imag
;
728 // TODO check return values
729 retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
730 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
731 workspace
, &result_imag
, &err_imag
);
733 *result
= result_real
+ I
*result_imag
;
734 *err
= sqrt(sq(err_real
) + sq(err_imag
));
736 double normfac
= pow(R0
, -2*p
.n
- 1);
742 int ewald3_sigma_short(
743 complex double *target
, // must be c->nelem_sc long
744 double *err
, // must be c->nelem_sc long or NULL
745 const qpms_ewald3_constants_t
*c
,
746 const double eta
, const complex double k
/* TODO COMPLEX */,
747 const LatticeDimensionality latdim
, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
748 PGen
*pgen_R
, const bool pgen_generates_shifted_points
749 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
750 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
751 * and adds particle_shift to the generated points before calculations.
752 * If true, it assumes that they are already shifted (if calculating interaction between
753 * different particles in the unit cell).
756 const cart3_t particle_shift
759 const bool k_is_real
= (cimag(k
) == 0); // TODO check how the compiler optimises the loops
760 const double kreal
= creal(k
);
761 const qpms_y_t nelem_sc
= c
->nelem_sc
;
762 const qpms_l_t lMax
= c
->lMax
;
763 gsl_integration_workspace
*workspace
=
764 gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT
);
766 double legendre_buf
[gsl_sf_legendre_array_n(c
->lMax
)]; // work space for the legendre polynomials (used only in the general case)
768 // Manual init of the ewald summation targets
769 complex double * const target_c
= calloc(nelem_sc
, sizeof(complex double));
770 memset(target
, 0, nelem_sc
* sizeof(complex double));
771 double *err_c
= NULL
;
773 err_c
= calloc(nelem_sc
, sizeof(double));
774 memset(err
, 0, nelem_sc
* sizeof(double));
777 #ifdef EWALD_AUTO_CUTOFF
779 * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
781 size_t lw
= 1; // "Layer" index.
782 size_t li
= 0; // Counter of points inside the current "layer".
783 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
784 kahaninit(&lsum
, &lsum_c
);
787 PGenSphReturnData pgen_retdata
;
789 double r_pq_shifted_prev
;
791 // recyclable variables if r_pq_shifted stays the same:
792 double intres
[lMax
+1], interr
[lMax
+1];
793 complex double cintres
[lMax
+1];
795 // CHOOSE POINT BEGIN
796 // TODO check whether _next_sph is the optimal coordinate system choice here
797 while ((pgen_retdata
= PGen_next_sph(pgen_R
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
799 cart3_t Rpq_shifted_cart
; // I will need both sph and cart representations anyway...
800 sph_t Rpq_shifted_sph
;
801 if (pgen_generates_shifted_points
) {
802 Rpq_shifted_sph
= pgen_retdata
.point_sph
;
803 Rpq_shifted_cart
= sph2cart(Rpq_shifted_sph
);
804 } else { // as in the old _points_and_shift functions
805 //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
806 const sph_t bravais_point_sph
= pgen_retdata
.point_sph
;
807 const cart3_t bravais_point_cart
= sph2cart(bravais_point_sph
);
808 Rpq_shifted_cart
= cart3_add(bravais_point_cart
, cart3_scale(-1, particle_shift
)); // CHECKSIGN!!!
809 Rpq_shifted_sph
= cart2sph(Rpq_shifted_cart
);
812 // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
813 const double Rpq_shifted_arg
= Rpq_shifted_sph
.phi
; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
814 const double Rpq_shifted_theta
= Rpq_shifted_sph
.theta
; // POINT-DEPENDENT
815 const double r_pq_shifted
= Rpq_shifted_sph
.r
;
817 // if the radius is the same as in previous cycle, most of the calculations can be recycled
818 const bool new_r_pq_shifted
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
819 if (!new_r_pq_shifted
) assert(r_pq_shifted_prev
== r_pq_shifted
);
821 const complex double e_beta_Rpq
= cexp(I
*cart3_dot(beta
, Rpq_shifted_cart
)); // POINT-DEPENDENT
822 LatticeDimensionality speedup_regime
= 0;
823 if ((latdim
& LAT_2D_IN_3D_XYONLY
) == LAT_2D_IN_3D_XYONLY
) speedup_regime
= LAT_2D_IN_3D_XYONLY
;
824 if ((latdim
& LAT_1D_IN_3D_ZONLY
) == LAT_1D_IN_3D_ZONLY
) speedup_regime
= LAT_1D_IN_3D_ZONLY
;
826 const double * legendre_array
;
828 switch(speedup_regime
) {
829 // speedup checks for special geometries and Legendre polynomials
830 case LAT_1D_IN_3D_ZONLY
:
831 assert((pgen_retdata
.flags
& PGEN_AT_Z
) == PGEN_AT_Z
);
832 assert(Rpq_shifted_theta
== M_PI
|| Rpq_shifted_theta
== 0);
833 legendre_array
= (Rpq_shifted_theta
== 0) ? c
->legendre_plus1
: c
->legendre_minus1
; // CHECKSIGN
835 case LAT_2D_IN_3D_XYONLY
:
836 assert((pgen_retdata
.flags
&PGEN_AT_XY
) == PGEN_AT_XY
);
837 assert(fabs(Rpq_shifted_theta
- M_PI_2
) < DBL_EPSILON
* 1024);
838 // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
839 legendre_array
= c
->legendre0
;
842 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, cos(Rpq_shifted_theta
), c
->legendre_csphase
, legendre_buf
));
843 legendre_array
= legendre_buf
;
847 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
848 const double complex prefacn
= - I
* (k_is_real
? pow(2./creal(k
),n
+1) : cpow(2./k
, n
+1)) * M_2_SQRTPI
/ 2; // profiling TODO put outside the R-loop and multiply in the end?
849 const double R_pq_pown
= pow(r_pq_shifted
, n
); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
850 if (new_r_pq_shifted
) {
854 retval
= ewald32_sr_integral(r_pq_shifted
, kreal
, n
, eta
,
855 &intres_real
, interr
+ n
, workspace
);
856 cintres
[n
] = intres_real
;
858 retval
= ewald32_sr_integral_ck(r_pq_shifted
, k
, n
, eta
,
859 cintres
+n
, interr
+ n
, workspace
);
860 QPMS_ENSURE_SUCCESS(retval
);
861 } // otherwise recycle the integrals
862 for (qpms_m_t m
= -n
; m
<= n
; ++m
){
863 complex double e_imf
;
864 // SPEEDUPS for some special geometries
865 if(speedup_regime
== LAT_2D_IN_3D_XYONLY
) { //2D lattice inside the xy plane
866 if((m
+n
) % 2 != 0) // odd coefficients are zero.
867 continue; // nothing needed, already done by memset
868 e_imf
= cexp(I
*m
*Rpq_shifted_arg
); // profiling TODO: calculate outside the n-loop?
869 } else if (speedup_regime
== LAT_1D_IN_3D_ZONLY
) { // 1D lattice along the z axis
870 if (m
!= 0) // m-non-zero coefficients are zero
871 continue; // nothing needed, already done by memset
873 } else { // general 1D/2D/3D lattice in 3D space
874 e_imf
= cexp(I
*m
*Rpq_shifted_arg
);
877 const double leg
= legendre_array
[gsl_sf_legendre_array_index(n
, abs(m
))];
879 const qpms_y_t y
= qpms_mn2y_sc(m
,n
);
881 kahanadd(err
+ y
, err_c
+ y
, cabs(leg
* (prefacn
/ I
) * R_pq_pown
882 * interr
[n
])); // TODO include also other errors
883 complex double thesummand
= prefacn
* R_pq_pown
* leg
* cintres
[n
] * e_beta_Rpq
* e_imf
* min1pow_m_neg(m
);
884 ckahanadd(target
+ y
, target_c
+ y
, thesummand
);
885 #ifdef EWALD_AUTO_CUTOFF
886 kahanadd(&lsum
, &lsum_c
, cabs(thesummand
));
892 r_pq_shifted_prev
= r_pq_shifted
;
894 #ifdef EWALD_AUTO_CUTOFF
896 double cursum_min
= INFINITY
;
897 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
898 const double a
= cabs(target
[y
]);
899 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
900 cursum_min
= MIN(cursum_min
, a
);
902 if (li
>= 12 * lw
+ 1) {
903 if(lsum
< DBL_EPSILON
* cursum_min
)
904 goto ewald3_sigma_short_end_point_loop
;
905 kahaninit(&lsum
, &lsum_c
);
912 #ifdef EWALD_AUTO_CUTOFF
913 ewald3_sigma_short_end_point_loop
:
916 gsl_integration_workspace_free(workspace
);