8 #include "tiny_inlines.h"
9 #include "qpms_error.h"
10 #include <gsl/gsl_integration.h>
11 #include <gsl/gsl_errno.h>
12 #include <gsl/gsl_sf_legendre.h>
13 #include <gsl/gsl_sf_expint.h>
15 // parameters for the quadrature of integral in (4.6)
16 #ifndef INTEGRATION_WORKSPACE_LIMIT
17 #define INTEGRATION_WORKSPACE_LIMIT 30000
20 #ifndef INTEGRATION_EPSABS
21 #define INTEGRATION_EPSABS 1e-13
24 #ifndef INTEGRATION_EPSREL
25 #define INTEGRATION_EPSREL 1e-13
29 #define M_SQRTPI 1.7724538509055160272981674833411452L
33 // sloppy implementation of factorial
34 // We prefer to avoid tgamma/lgamma, as their errors are about 4+ bits
35 static inline double factorial(const int n
) {
38 return 0; // should not happen in the functions below. (Therefore the assert above)
41 for (int i
= 1; i
<= n
; ++i
)
46 return tgamma(n
+ 1); // hope it's precise and that overflow does not happen
49 // sloppy implementation of double factorial n!!
50 static inline double double_factorial(int n
) {
60 if (n
% 2) { // odd, (2*k - 1)!! = 2**k * Γ(k + 0.5) / sqrt(п)
61 const int k
= n
/ 2 + 1;
62 return pow(2, k
) * tgamma(k
+ 0.5) / M_SQRTPI
;
63 } else { // even, n!! = 2**(n/2) * (n/2)!
65 return pow(2, k
) * factorial(k
);
70 // sloppy implementation of (n/2)! = Γ(n/2 + 1)
71 // It is _slightly_ more precise than direct call of tgamma for small odd n
72 static inline double factorial_of_half(const int n2
) {
74 if (n2
% 2 == 0) return factorial(n2
/2);
76 if (n2
<= 50) { // odd, use (k - 0.5)! = Γ(k + 0.5) = 2**(-k) (2*k - 1)!! sqrt(п) for small n2
77 const int k
= n2
/ 2 + 1;
79 for(int j
= 2*k
- 1; j
> 0; j
-= 2)
81 return fac2
* pow(2, -k
) * M_SQRTPI
;
83 else return tgamma(1. + 0.5*n2
);
87 static inline complex double csq(complex double x
) { return x
* x
; }
88 static inline double sq(double x
) { return x
* x
; }
90 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
92 EWALD32_CONSTANTS_ORIG
, // As in [1, (4,5)], NOT USED right now.
93 EWALD32_CONSTANTS_AGNOSTIC
/* Not depending on the spherical harmonic sign/normalisation
94 * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
95 * replaced by the respective $Y_n^m(\pi/2,\alpha)$
96 * spherical harmonic. See notes/ewald.lyx.
99 } ewald3_constants_option
;
101 // TODO perhaps rewrite everything as agnostic.
102 static const ewald3_constants_option type
= EWALD32_CONSTANTS_AGNOSTIC
;
104 qpms_ewald3_constants_t
*qpms_ewald3_constants_init(const qpms_l_t lMax
/*, const ewald3_constants_option type */,
107 qpms_ewald3_constants_t
*c
= malloc(sizeof(qpms_ewald3_constants_t
));
108 //if (c == NULL) return NULL; // Do I really want to do this?
110 c
->nelem_sc
= qpms_lMax2nelem_sc(lMax
);
111 c
->s1_jMaxes
= malloc(c
->nelem_sc
* sizeof(qpms_l_t
));
112 c
->s1_constfacs
= malloc(c
->nelem_sc
* sizeof(complex double *));
113 //if (c->s1_jMaxes == NULL) return NULL;
115 // ----- the "xy-plane constants" ------
117 size_t s1_constfacs_sz
= 0;
118 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
119 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
120 if ((m
+ n
) % 2 == 0)
121 s1_constfacs_sz
+= 1 + (c
->s1_jMaxes
[y
] = (n
-abs(m
))/2);
123 c
->s1_jMaxes
[y
] = -1;
126 c
->s1_constfacs_base
= malloc(s1_constfacs_sz
* sizeof(complex double));
127 size_t s1_constfacs_sz_cumsum
= 0;
128 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
129 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
130 if ((m
+ n
) % 2 == 0) {
131 c
->s1_constfacs
[y
] = c
->s1_constfacs_base
+ s1_constfacs_sz_cumsum
;
132 // and here comes the actual calculation
133 for (qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]; ++j
){
135 case EWALD32_CONSTANTS_ORIG
: // NOT USED
136 c
->s1_constfacs
[y
][j
] = -0.5 * ipow(n
+1) * min1pow((n
+m
)/2)
137 * sqrt((2*n
+ 1) * factorial(n
-m
) * factorial(n
+m
))
138 * min1pow(j
) * pow(0.5, n
-2*j
)
139 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
))
142 case EWALD32_CONSTANTS_AGNOSTIC
:
143 c
->s1_constfacs
[y
][j
] = -2 * ipow(n
+1) * M_SQRTPI
144 * factorial((n
-m
)/2) * factorial((n
+m
)/2)
146 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
));
149 QPMS_INVALID_ENUM(type
);
152 s1_constfacs_sz_cumsum
+= 1 + c
->s1_jMaxes
[y
];
155 c
->s1_constfacs
[y
] = NULL
;
158 // ----- New generation 2D-in-3D constants ------
159 // TODO it is not necessary to treat +|m| and -|m| cases separately
160 // N.B. currently, this is only valid for EWALD32_CONSTANTS_AGNOSTIC (NOT CHECKED!)
161 c
->S1_constfacs
= malloc((1+c
->nelem_sc
) * sizeof(complex double *));
163 size_t S1_constfacs_sz
= 0;
164 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
165 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
166 const qpms_l_t L_M
= n
- abs(m
);
167 for(qpms_l_t j
= 0; j
<= L_M
; ++j
) { // outer sum
168 // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
169 for(qpms_l_t s
= j
+ (L_M
- j
) % 2;
170 (s
<= 2 * j
) && (s
<= L_M
);
177 c
->S1_constfacs_base
= malloc(S1_constfacs_sz
* sizeof(complex double));
178 size_t S1_constfacs_sz_cumsum
= 0; // second count
179 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
180 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
181 const complex double yfactor
= -2 * ipow(n
+1) * M_SQRTPI
182 * factorial((n
-m
)/2) * factorial((n
+m
)/2);
183 c
->S1_constfacs
[y
] = c
->S1_constfacs_base
+ S1_constfacs_sz_cumsum
;
184 size_t coeffs_per_y
= 0;
185 const qpms_l_t L_M
= n
- abs(m
);
186 for(qpms_l_t j
= 0; j
<= L_M
; ++j
) { // outer sum
187 // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
188 for(qpms_l_t s
= j
+ (L_M
- j
) % 2;
189 (s
<= 2 * j
) && (s
<= L_M
);
191 c
->S1_constfacs_base
[S1_constfacs_sz_cumsum
] =
193 / factorial(2*j
-s
) / factorial(s
- j
)
194 / factorial_of_half(n
- m
- s
) / factorial_of_half(n
+ m
- s
);
195 ++S1_constfacs_sz_cumsum
;
198 S1_constfacs_sz_cumsum
+= coeffs_per_y
;
200 QPMS_ASSERT(S1_constfacs_sz_cumsum
= S1_constfacs_sz
);
201 c
->S1_constfacs
[c
->nelem_sc
] = c
->S1_constfacs_base
+ S1_constfacs_sz
; // For easier limit checks
203 // ------ the "z-axis constants" -----
205 size_t s1_constfacs_1Dz_sz
;
207 const qpms_l_t sz_n_lMax
= lMax
/2 + 1; // number of different j's for n = lMax
208 s1_constfacs_1Dz_sz
= (lMax
% 2) ? isq(sz_n_lMax
) + sz_n_lMax
211 c
->s1_constfacs_1Dz_base
= malloc(s1_constfacs_1Dz_sz
* sizeof(complex double));
212 c
->s1_constfacs_1Dz
= malloc((lMax
+1)*sizeof(complex double *));
214 size_t s1_constfacs_1Dz_sz_cumsum
= 0;
215 for (qpms_l_t n
= 0; n
<= lMax
; ++n
) {
216 c
->s1_constfacs_1Dz
[n
] = c
->s1_constfacs_1Dz_base
+ s1_constfacs_1Dz_sz_cumsum
;
217 for (qpms_l_t j
= 0; j
<= n
/2; ++j
) {
219 case EWALD32_CONSTANTS_AGNOSTIC
:
220 c
->s1_constfacs_1Dz
[n
][j
] = -ipow(n
+1) * min1pow(j
) * factorial(n
)
221 / (factorial(j
) * pow(2, 2*j
) * factorial(n
- 2*j
));
224 QPMS_INVALID_ENUM(type
); // wrong type argument or not implemented
227 s1_constfacs_1Dz_sz_cumsum
+= 1 + n
/ 2;
229 assert(s1_constfacs_1Dz_sz
== s1_constfacs_1Dz_sz_cumsum
);
232 c
->legendre_csphase
= csphase
;
233 c
->legendre0
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
234 c
->legendre_plus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
235 c
->legendre_minus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
236 // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
237 c
->legendre_normconv
= GSL_SF_LEGENDRE_NONE
;
238 // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
239 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, 0, csphase
, c
->legendre0
));
240 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, +1, csphase
, c
->legendre_plus1
));
241 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, -1, csphase
, c
->legendre_minus1
));
245 void qpms_ewald3_constants_free(qpms_ewald3_constants_t
*c
) {
247 free(c
->legendre_plus1
);
248 free(c
->legendre_minus1
);
249 free(c
->s1_constfacs
);
250 free(c
->s1_constfacs_base
);
251 free(c
->S1_constfacs
);
252 free(c
->S1_constfacs_base
);
253 free(c
->s1_constfacs_1Dz_base
);
254 free(c
->s1_constfacs_1Dz
);
260 int ewald3_sigma0(complex double *result
, double *err
,
261 const qpms_ewald3_constants_t
*c
,
262 const double eta
, const complex double k
)
265 complex double z
= -csq(k
/(2*eta
));
266 QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z
,
267 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
268 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, &gam
));
269 *result
= gam
.val
* c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
;
271 *err
= gam
.err
* fabs(c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
);
275 // Wrapper over cpow to correctly handle the k->0 behaviour
276 static inline complex double cpow_0lim_zi(complex double x
, long z
)
278 if (x
== 0 && z
== 0)
280 else if (x
== 0 && z
> 0) // lol branch cut in cpow
286 int ewald3_21_xy_sigma_long (
287 complex double *target
, // must be c->nelem_sc long
289 const qpms_ewald3_constants_t
*c
,
290 const double eta
, const complex double k
,
291 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
292 const LatticeDimensionality latdim
,
293 PGen
*pgen_K
, const bool pgen_generates_shifted_points
294 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
295 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
296 * and adds beta to the generated points before calculations.
297 * If true, it assumes that they are already shifted.
300 const cart3_t particle_shift
303 const bool k_is_real
= (cimag(k
) == 0);
304 assert((latdim
& LAT_XYONLY
) && (latdim
& SPACE3D
));
305 assert((latdim
& LAT1D
) || (latdim
& LAT2D
));
306 const qpms_y_t nelem_sc
= c
->nelem_sc
;
307 assert(nelem_sc
> 0);
308 const qpms_l_t lMax
= c
->lMax
;
310 // Manual init of the ewald summation targets
311 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
312 memset(target
, 0, nelem_sc
* sizeof(complex double));
313 double *err_c
= NULL
;
315 err_c
= calloc(nelem_sc
, sizeof(double));
316 memset(err
, 0, nelem_sc
* sizeof(double));
319 #ifdef EWALD_AUTO_CUTOFF
321 * Experimental: stop the evaluation if the relative contribution of the "last layer"
322 * is below DBL_EPSILON to speed up the calculation.
323 * This obviously can currently work only for "suitable PGen"s that generate points in
324 * layers with increasing distance from the origin.
325 * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
326 * increasing integer index (starting from 1).
327 * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
328 * during run time instead of using this EWALD_AUTO_CUTOFF macro.
330 size_t lw
= 1; // "Layer" index.
331 size_t li
= 0; // Counter of points inside the current "layer".
332 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
333 kahaninit(&lsum
, &lsum_c
);
336 const complex double commonfac
= 1/(k
*k
*unitcell_volume
); // used in the very end (CFC)
338 assert(creal(commonfac
) > 0);
340 PGenSphReturnData pgen_retdata
;
342 double rbeta_pq_prev
;
344 // recycleable values if rbeta_pq stays the same:
345 complex double gamma_pq
;
346 complex double z
; // I?*κ*γ*particle_shift.z
347 complex double x
; // κ*γ/(2*η)
348 complex double factor1d
= 1; // the "additional" factor for the 1D case (then it is not 1)
349 // space for Gamma_pq[j]'s
350 complex double Gamma_pq
[lMax
/2+1];
351 double Gamma_pq_err
[lMax
/2+1];
352 // cpow() is expensive, so we want to save and reuse these too:
353 complex double minus_k_shiftz_pow
[2*lMax
+ 1]; // __[i] = cpow(-k * particle_shift.z, i)
355 long complex double x
= 1;
356 for(qpms_l_t i
= 0; i
<= 2*lMax
; ++i
) {
357 minus_k_shiftz_pow
[i
] = x
;
358 x
*= -k
* particle_shift
.z
;
361 complex double rbeta_pq_div_k_pow
[lMax
+ 1]; // __[i] = cpow_0lim_zi(rbeta_pq/k, i)
362 complex double gamma_pq_powm1
[2*lMax
+ 1]; // __[i] = cpow(gamma_pq, i-1)
364 // CHOOSE POINT BEGIN
365 // TODO maybe PGen_next_sph is not the best coordinate system choice here
366 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
369 if (pgen_generates_shifted_points
) {
370 beta_pq_sph
= pgen_retdata
.point_sph
;
371 const cart3_t beta_pq_cart
= sph2cart(beta_pq_sph
);
372 K_pq_cart
= cart3_add(cart3_scale(-1, beta
), beta_pq_cart
);
373 } else { // as in the old _points_and_shift functions
374 const sph_t K_pq_sph
= pgen_retdata
.point_sph
;
375 K_pq_cart
= sph2cart(K_pq_sph
);
376 const cart3_t beta_pq_cart
= cart3_add(K_pq_cart
, beta
);
377 beta_pq_sph
= cart2sph(beta_pq_cart
);
380 const double rbeta_pq
= beta_pq_sph
.r
;
381 const double arg_pq
= beta_pq_sph
.phi
;
382 //const double beta_pq_theta = beta_pq_sph.theta; //unused
386 const complex double phasefac
= cexp(I
*cart3_dot(K_pq_cart
,particle_shift
)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
388 const bool new_rbeta_pq
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
389 if (!new_rbeta_pq
) assert(rbeta_pq
== rbeta_pq_prev
);
393 //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
395 gamma_pq
= clilgamma(rbeta_pq
/k
);
396 { // fill gamma_pq_powm1[] and rbeta_pq_div_k_pow[]
397 long complex double x
= 1./gamma_pq
;
398 for(qpms_l_t i
= 0; i
<= 2*lMax
; ++i
) {
399 gamma_pq_powm1
[i
] = x
;
402 for(qpms_l_t i
= 0; i
<= lMax
; ++i
) // not fastest, but foolproof
403 rbeta_pq_div_k_pow
[i
] = cpow_0lim_zi(rbeta_pq
/ k
, i
);
406 complex double x
= gamma_pq
*k
/(2*eta
);
407 complex double x2
= x
*x
;
409 if(particle_shift
.z
== 0) {
410 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
412 int retval
= complex_gamma_inc_e(0.5-j
, x2
,
413 // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
414 QPMS_LIKELY(creal(x2
) < 0) && !signbit(cimag(x2
)) ? -1 : 0, &Gam
);
415 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
416 Gamma_pq
[j
] = Gam
.val
;
417 if(err
) Gamma_pq_err
[j
] = Gam
.err
;
420 factor1d
= M_SQRT1_2
* .5 * k
* gamma_pq
;
421 } else if (latdim
& LAT2D
) {
423 // TODO check the branches/phases!
424 complex double z
= I
* k
* gamma_pq
* particle_shift
.z
;
425 ewald3_2_sigma_long_Delta(Gamma_pq
, err
? Gamma_pq_err
: NULL
, lMax
/2, x
, 0 /* FIXME */, z
);
427 QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
432 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
433 // and just fetched for each n, m pair
434 for(qpms_l_t n
= 0; n
<= lMax
; ++n
)
435 for(qpms_m_t m
= -n
; m
<= n
; ++m
) {
436 if((particle_shift
.z
== 0) && ((m
+n
) % 2 != 0)) // odd coefficients are zero.
438 const qpms_y_t y
= qpms_mn2y_sc(m
, n
);
439 size_t constidx
= 0; // constants offset
440 const complex double e_imalpha_pq
= cexp(I
*m
*arg_pq
);
441 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
442 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
443 if (particle_shift
.z
== 0) { // TODO remove when the general case is stable and tested
444 assert((n
-abs(m
))/2 == c
->s1_jMaxes
[y
]);
445 for(qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]/*(n-abs(m))/2*/; ++j
) { // FIXME </<= ?
446 complex double summand
= rbeta_pq_div_k_pow
[n
-2*j
]
447 * e_imalpha_pq
* c
->legendre0
[gsl_sf_legendre_array_index(n
,abs(m
))] * min1pow_m_neg(m
) // This line can actually go outside j-loop
448 * gamma_pq_powm1
[2*j
]// * Gamma_pq[j] bellow (GGG) after error computation
449 * c
->s1_constfacs
[y
][j
];
451 // FIXME include also other errors than Gamma_pq's relative error
452 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq_err
[j
] * cabs(summand
));
454 summand
*= Gamma_pq
[j
]; // GGG
455 ckahanadd(&jsum
, &jsum_c
, summand
);
457 jsum
*= phasefac
* factor1d
; // PFC
458 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
459 #ifdef EWALD_AUTO_CUTOFF
460 kahanadd(&lsum
, &lsum_c
, cabs(jsum
));
462 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
463 } else { // particle_shift.z != 0
464 const qpms_l_t L_M
= n
- abs(m
);
465 for(qpms_l_t j
= 0; j
<= L_M
; ++j
) { // outer sum
466 complex double ssum
, ssum_c
; ckahaninit(&ssum
, &ssum_c
);
467 // TODO errors of ssum
468 // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
469 for(qpms_l_t s
= j
+ (L_M
- j
) % 2;
470 (s
<= 2 * j
) && (s
<= L_M
);
472 complex double ssummand
= c
->S1_constfacs
[y
][constidx
]
473 * minus_k_shiftz_pow
[2*j
- s
] * rbeta_pq_div_k_pow
[n
- s
];
474 ckahanadd(&ssum
, &ssum_c
, ssummand
);
477 const complex double jfactor
= e_imalpha_pq
* Gamma_pq
[j
] * gamma_pq_powm1
[2*j
];
478 if (err
) { // FIXME include also other sources of error than Gamma_pq's relative error
479 double jfactor_err
= Gamma_pq_err
[j
] * pow(cabs(gamma_pq
), 2*j
- 1);
480 kahanadd(&jsum_err
, &jsum_err_c
, jfactor_err
* ssum
);
482 complex double jsummand
= jfactor
* ssum
;
483 ckahanadd(&jsum
, &jsum_c
, jsummand
);
485 jsum
*= phasefac
; // factor1d not here, off-axis sums not implemented/allowed.
486 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
487 #ifdef EWALD_AUTO_CUTOFF
488 kahanadd(&lsum
, &lsum_c
, cabs(jsum
));
490 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
494 rbeta_pq_prev
= rbeta_pq
;
496 #ifdef EWALD_AUTO_CUTOFF
498 double cursum_min
= INFINITY
;
499 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
500 const double a
= cabs(target
[y
]);
501 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
502 cursum_min
= MIN(cursum_min
, a
);
504 if (li
>= 12 * lw
+ 1) {
505 if(lsum
< DBL_EPSILON
* cursum_min
)
506 goto ewald3_21_xy_sigma_long_end_point_loop
;
507 kahaninit(&lsum
, &lsum_c
);
513 #ifdef EWALD_AUTO_CUTOFF
514 ewald3_21_xy_sigma_long_end_point_loop
:
519 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
520 target
[y
] *= commonfac
;
522 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
528 // 1D chain along the z-axis; not many optimisations here as the same
529 // shifted beta radius could be recycled only once anyways
530 int ewald3_1_z_sigma_long (
531 complex double *target
, // must be c->nelem_sc long
533 const qpms_ewald3_constants_t
*c
,
534 const double eta
, const complex double k
,
535 const double unitcell_volume
/* length (periodicity) in this case */,
536 const LatticeDimensionality latdim
,
537 PGen
*pgen_K
, const bool pgen_generates_shifted_points
538 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
539 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
540 * and adds beta to the generated points before calculations.
541 * If true, it assumes that they are already shifted.
544 const cart3_t particle_shift
547 assert(LatticeDimensionality_checkflags(latdim
, LAT_1D_IN_3D_ZONLY
));
548 assert(beta
.x
== 0 && beta
.y
== 0);
549 assert(particle_shift
.x
== 0 && particle_shift
.y
== 0);
550 const double beta_z
= beta
.z
;
551 const double particle_shift_z
= particle_shift_z
;
552 const qpms_y_t nelem_sc
= c
->nelem_sc
;
553 const qpms_l_t lMax
= c
->lMax
;
555 // Manual init of the ewald summation targets
556 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
557 memset(target
, 0, nelem_sc
* sizeof(complex double));
558 double *err_c
= NULL
;
560 err_c
= calloc(nelem_sc
, sizeof(double));
561 memset(err
, 0, nelem_sc
* sizeof(double));
564 const double commonfac
= 1/(k
*unitcell_volume
); // multiplied in the very end (CFC)
565 assert(commonfac
> 0);
567 // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
568 qpms_csf_result Gamma_pq
[lMax
/2+1];
570 PGenSphReturnData pgen_retdata
;
571 // CHOOSE POINT BEGIN
572 // TODO FIXME USE PGen_next_z
573 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
574 assert(pgen_retdata
.flags
& PGEN_AT_Z
);
575 double K_z
, beta_mu_z
;
576 if (pgen_generates_shifted_points
) {
577 beta_mu_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
578 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); //!!!CHECKSIGN!!!
579 K_z
= beta_mu_z
- beta_z
;
580 } else { // as in the old _points_and_shift functions
581 K_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
582 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); // !!!CHECKSIGN!!!
583 beta_mu_z
= K_z
+ beta_z
;
585 double rbeta_mu
= fabs(beta_mu_z
);
588 const complex double phasefac
= cexp(I
* K_z
* particle_shift_z
); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
591 complex double gamma_pq
= clilgamma(rbeta_mu
/k
); // For real beta and k this is real or pure imaginary ...
592 const complex double z
= csq(gamma_pq
*k
/(2*eta
));// ... so the square (this) is in fact real.
593 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
594 int retval
= complex_gamma_inc_e(0.5-j
, z
,
595 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
596 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, Gamma_pq
+j
);
597 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
600 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
601 // and just fetched for each n
602 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
603 const qpms_y_t y
= qpms_mn2y_sc(0, n
);
604 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
605 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
606 for(qpms_l_t j
= 0; j
<= n
/2; ++j
) {
607 complex double summand
= pow(rbeta_mu
/k
, n
-2*j
)
608 * ((beta_mu_z
> 0) ? // TODO this can go outsize the j-loop
609 c
->legendre_plus1
[gsl_sf_legendre_array_index(n
,0)] :
610 (c
->legendre_minus1
[gsl_sf_legendre_array_index(n
,0)] * min1pow(n
))
612 // * min1pow_m_neg(m) // not needed as m == 0
613 * cpow(gamma_pq
, 2*j
) // * Gamma_pq[j] bellow (GGG) after error computation
614 * c
->s1_constfacs_1Dz
[n
][j
];
615 /* s1_consstfacs_1Dz[n][j] =
617 * -I**(n+1) (-1)**j * n!
618 * --------------------------
619 * j! * 2**(2*j) * (n - 2*j)!
623 // FIXME include also other errors than Gamma_pq's relative error
624 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq
[j
].err
* cabs(summand
));
626 summand
*= Gamma_pq
[j
].val
; // GGG
627 ckahanadd(&jsum
, &jsum_c
, summand
);
629 jsum
*= phasefac
; // PFC
630 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
631 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
637 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
638 target
[y
] *= commonfac
;
640 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
645 int ewald3_sigma_long (
646 complex double *target
, // must be c->nelem_sc long
648 const qpms_ewald3_constants_t
*c
,
649 const double eta
, const complex double k
,
650 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
651 const LatticeDimensionality latdim
,
652 PGen
*pgen_K
, const bool pgen_generates_shifted_points
653 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
654 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
655 * and adds beta to the generated points before calculations.
656 * If true, it assumes that they are already shifted.
659 const cart3_t particle_shift
662 assert(latdim
& SPACE3D
);
663 if (latdim
& LAT_XYONLY
)
664 return ewald3_21_xy_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
665 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
666 else if (latdim
& LAT_ZONLY
)
667 return ewald3_1_z_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
668 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
669 // TODO 3D case and general 2D case
670 else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
673 struct sigma2_integrand_params
{
678 static double sigma2_integrand(double ksi
, void *params
) {
679 struct sigma2_integrand_params
*p
= (struct sigma2_integrand_params
*) params
;
680 return exp(-sq(p
->R
* ksi
) + sq(p
->k
/ ksi
/ 2)) * pow(ksi
, 2*p
->n
);
683 static int ewald32_sr_integral(double r
, double k
, int n
, double eta
,
684 double *result
, double *err
, gsl_integration_workspace
*workspace
)
686 struct sigma2_integrand_params p
;
689 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
693 p
.R
= r
/ R0
; // i.e. p.R = 1
696 F
.function
= sigma2_integrand
;
699 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
700 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
701 workspace
, result
, err
);
702 double normfac
= pow(R0
, -2*p
.n
- 1);
708 // a version allowing complex k
710 struct sigma2_integrand_params_ck
{
716 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
717 // including the whole complex exponential (throwing the imaginary/real part away)
718 static double sigma2_integrand_ck_real(double ksi
, void *params
) {
719 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
720 return creal(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
722 static double sigma2_integrand_ck_imag(double ksi
, void *params
) {
723 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
724 return cimag(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
727 static int ewald32_sr_integral_ck(double r
, complex double k
, int n
, double eta
,
728 complex double *result
, double *err
, gsl_integration_workspace
*workspace
)
730 struct sigma2_integrand_params_ck p
;
733 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
737 p
.R
= r
/ R0
; // i.e. p.R = 1
741 double result_real
, result_imag
, err_real
, err_imag
;
743 F
.function
= sigma2_integrand_ck_real
;
744 // TODO check return values
745 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
746 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
747 workspace
, &result_real
, &err_real
);
749 F
.function
= sigma2_integrand_ck_imag
;
750 // TODO check return values
751 retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
752 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
753 workspace
, &result_imag
, &err_imag
);
755 *result
= result_real
+ I
*result_imag
;
756 *err
= sqrt(sq(err_real
) + sq(err_imag
));
758 double normfac
= pow(R0
, -2*p
.n
- 1);
764 int ewald3_sigma_short(
765 complex double *target
, // must be c->nelem_sc long
766 double *err
, // must be c->nelem_sc long or NULL
767 const qpms_ewald3_constants_t
*c
,
768 const double eta
, const complex double k
/* TODO COMPLEX */,
769 const LatticeDimensionality latdim
, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
770 PGen
*pgen_R
, const bool pgen_generates_shifted_points
771 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
772 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
773 * and adds particle_shift to the generated points before calculations.
774 * If true, it assumes that they are already shifted (if calculating interaction between
775 * different particles in the unit cell).
778 const cart3_t particle_shift
781 const bool k_is_real
= (cimag(k
) == 0); // TODO check how the compiler optimises the loops
782 const double kreal
= creal(k
);
783 const qpms_y_t nelem_sc
= c
->nelem_sc
;
784 const qpms_l_t lMax
= c
->lMax
;
785 gsl_integration_workspace
*workspace
=
786 gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT
);
788 double legendre_buf
[gsl_sf_legendre_array_n(c
->lMax
)]; // work space for the legendre polynomials (used only in the general case)
790 // Manual init of the ewald summation targets
791 complex double * const target_c
= calloc(nelem_sc
, sizeof(complex double));
792 memset(target
, 0, nelem_sc
* sizeof(complex double));
793 double *err_c
= NULL
;
795 err_c
= calloc(nelem_sc
, sizeof(double));
796 memset(err
, 0, nelem_sc
* sizeof(double));
799 #ifdef EWALD_AUTO_CUTOFF
801 * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
803 size_t lw
= 1; // "Layer" index.
804 size_t li
= 0; // Counter of points inside the current "layer".
805 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
806 kahaninit(&lsum
, &lsum_c
);
809 PGenSphReturnData pgen_retdata
;
811 double r_pq_shifted_prev
;
813 // recyclable variables if r_pq_shifted stays the same:
814 double intres
[lMax
+1], interr
[lMax
+1];
815 complex double cintres
[lMax
+1];
817 // CHOOSE POINT BEGIN
818 // TODO check whether _next_sph is the optimal coordinate system choice here
819 while ((pgen_retdata
= PGen_next_sph(pgen_R
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
821 cart3_t Rpq_shifted_cart
; // I will need both sph and cart representations anyway...
822 sph_t Rpq_shifted_sph
;
823 if (pgen_generates_shifted_points
) {
824 Rpq_shifted_sph
= pgen_retdata
.point_sph
;
825 Rpq_shifted_cart
= sph2cart(Rpq_shifted_sph
);
826 } else { // as in the old _points_and_shift functions
827 //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
828 const sph_t bravais_point_sph
= pgen_retdata
.point_sph
;
829 const cart3_t bravais_point_cart
= sph2cart(bravais_point_sph
);
830 Rpq_shifted_cart
= cart3_add(bravais_point_cart
, cart3_scale(-1, particle_shift
)); // CHECKSIGN!!!
831 Rpq_shifted_sph
= cart2sph(Rpq_shifted_cart
);
834 // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
835 const double Rpq_shifted_arg
= Rpq_shifted_sph
.phi
; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
836 const double Rpq_shifted_theta
= Rpq_shifted_sph
.theta
; // POINT-DEPENDENT
837 const double r_pq_shifted
= Rpq_shifted_sph
.r
;
839 // if the radius is the same as in previous cycle, most of the calculations can be recycled
840 const bool new_r_pq_shifted
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
841 if (!new_r_pq_shifted
) assert(r_pq_shifted_prev
== r_pq_shifted
);
843 const complex double e_beta_Rpq
= cexp(I
*cart3_dot(beta
, Rpq_shifted_cart
)); // POINT-DEPENDENT
844 LatticeDimensionality speedup_regime
= 0;
845 if ((latdim
& LAT_2D_IN_3D_XYONLY
) == LAT_2D_IN_3D_XYONLY
) speedup_regime
= LAT_2D_IN_3D_XYONLY
;
846 if ((latdim
& LAT_1D_IN_3D_ZONLY
) == LAT_1D_IN_3D_ZONLY
) speedup_regime
= LAT_1D_IN_3D_ZONLY
;
848 const double * legendre_array
;
850 switch(speedup_regime
) {
851 // speedup checks for special geometries and Legendre polynomials
852 case LAT_1D_IN_3D_ZONLY
:
853 assert((pgen_retdata
.flags
& PGEN_AT_Z
) == PGEN_AT_Z
);
854 assert(Rpq_shifted_theta
== M_PI
|| Rpq_shifted_theta
== 0);
855 legendre_array
= (Rpq_shifted_theta
== 0) ? c
->legendre_plus1
: c
->legendre_minus1
; // CHECKSIGN
857 case LAT_2D_IN_3D_XYONLY
:
858 assert((pgen_retdata
.flags
&PGEN_AT_XY
) == PGEN_AT_XY
);
859 assert(fabs(Rpq_shifted_theta
- M_PI_2
) < DBL_EPSILON
* 1024);
860 // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
861 legendre_array
= c
->legendre0
;
864 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, cos(Rpq_shifted_theta
), c
->legendre_csphase
, legendre_buf
));
865 legendre_array
= legendre_buf
;
869 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
870 const double complex prefacn
= - I
* (k_is_real
? pow(2./creal(k
),n
+1) : cpow(2./k
, n
+1)) * M_2_SQRTPI
/ 2; // profiling TODO put outside the R-loop and multiply in the end?
871 const double R_pq_pown
= pow(r_pq_shifted
, n
); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
872 if (new_r_pq_shifted
) {
876 retval
= ewald32_sr_integral(r_pq_shifted
, kreal
, n
, eta
,
877 &intres_real
, interr
+ n
, workspace
);
878 cintres
[n
] = intres_real
;
880 retval
= ewald32_sr_integral_ck(r_pq_shifted
, k
, n
, eta
,
881 cintres
+n
, interr
+ n
, workspace
);
882 QPMS_ENSURE_SUCCESS(retval
);
883 } // otherwise recycle the integrals
884 for (qpms_m_t m
= -n
; m
<= n
; ++m
){
885 complex double e_imf
;
886 // SPEEDUPS for some special geometries
887 if(speedup_regime
== LAT_2D_IN_3D_XYONLY
) { //2D lattice inside the xy plane
888 if((m
+n
) % 2 != 0) // odd coefficients are zero.
889 continue; // nothing needed, already done by memset
890 e_imf
= cexp(I
*m
*Rpq_shifted_arg
); // profiling TODO: calculate outside the n-loop?
891 } else if (speedup_regime
== LAT_1D_IN_3D_ZONLY
) { // 1D lattice along the z axis
892 if (m
!= 0) // m-non-zero coefficients are zero
893 continue; // nothing needed, already done by memset
895 } else { // general 1D/2D/3D lattice in 3D space
896 e_imf
= cexp(I
*m
*Rpq_shifted_arg
);
899 const double leg
= legendre_array
[gsl_sf_legendre_array_index(n
, abs(m
))];
901 const qpms_y_t y
= qpms_mn2y_sc(m
,n
);
903 kahanadd(err
+ y
, err_c
+ y
, cabs(leg
* (prefacn
/ I
) * R_pq_pown
904 * interr
[n
])); // TODO include also other errors
905 complex double thesummand
= prefacn
* R_pq_pown
* leg
* cintres
[n
] * e_beta_Rpq
* e_imf
* min1pow_m_neg(m
);
906 ckahanadd(target
+ y
, target_c
+ y
, thesummand
);
907 #ifdef EWALD_AUTO_CUTOFF
908 kahanadd(&lsum
, &lsum_c
, cabs(thesummand
));
914 r_pq_shifted_prev
= r_pq_shifted
;
916 #ifdef EWALD_AUTO_CUTOFF
918 double cursum_min
= INFINITY
;
919 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
920 const double a
= cabs(target
[y
]);
921 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
922 cursum_min
= MIN(cursum_min
, a
);
924 if (li
>= 12 * lw
+ 1) {
925 if(lsum
< DBL_EPSILON
* cursum_min
)
926 goto ewald3_sigma_short_end_point_loop
;
927 kahaninit(&lsum
, &lsum_c
);
934 #ifdef EWALD_AUTO_CUTOFF
935 ewald3_sigma_short_end_point_loop
:
938 gsl_integration_workspace_free(workspace
);