8 #include "tiny_inlines.h"
9 #include "qpms_error.h"
10 #include <gsl/gsl_integration.h>
11 #include <gsl/gsl_errno.h>
12 #include <gsl/gsl_sf_legendre.h>
13 #include <gsl/gsl_sf_expint.h>
15 // parameters for the quadrature of integral in (4.6)
16 #ifndef INTEGRATION_WORKSPACE_LIMIT
17 #define INTEGRATION_WORKSPACE_LIMIT 30000
20 #ifndef INTEGRATION_EPSABS
21 #define INTEGRATION_EPSABS 1e-13
24 #ifndef INTEGRATION_EPSREL
25 #define INTEGRATION_EPSREL 1e-13
29 #define M_SQRTPI 1.7724538509055160272981674833411452
33 // sloppy implementation of factorial
34 static inline double factorial(const int n
) {
37 return 0; // should not happen in the functions below. (Therefore the assert above)
40 for (int i
= 1; i
<= n
; ++i
)
45 return tgamma(n
+ 1); // hope it's precise and that overflow does not happen
48 static inline complex double csq(complex double x
) { return x
* x
; }
49 static inline double sq(double x
) { return x
* x
; }
51 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
53 EWALD32_CONSTANTS_ORIG
, // As in [1, (4,5)], NOT USED right now.
54 EWALD32_CONSTANTS_AGNOSTIC
/* Not depending on the spherical harmonic sign/normalisation
55 * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
56 * replaced by the respective $Y_n^m(\pi/2,\alpha)$
57 * spherical harmonic. See notes/ewald.lyx.
60 } ewald3_constants_option
;
62 // TODO perhaps rewrite everything as agnostic.
63 static const ewald3_constants_option type
= EWALD32_CONSTANTS_AGNOSTIC
;
65 qpms_ewald3_constants_t
*qpms_ewald3_constants_init(const qpms_l_t lMax
/*, const ewald3_constants_option type */,
68 qpms_ewald3_constants_t
*c
= malloc(sizeof(qpms_ewald3_constants_t
));
69 //if (c == NULL) return NULL; // Do I really want to do this?
71 c
->nelem_sc
= qpms_lMax2nelem_sc(lMax
);
72 c
->s1_jMaxes
= malloc(c
->nelem_sc
* sizeof(qpms_l_t
));
73 c
->s1_constfacs
= malloc(c
->nelem_sc
* sizeof(complex double *));
74 //if (c->s1_jMaxes == NULL) return NULL;
76 // ----- the "xy-plane constants" ------
78 size_t s1_constfacs_sz
= 0;
79 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
80 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
82 s1_constfacs_sz
+= 1 + (c
->s1_jMaxes
[y
] = (n
-abs(m
))/2);
87 c
->s1_constfacs_base
= malloc(s1_constfacs_sz
* sizeof(complex double));
88 size_t s1_constfacs_sz_cumsum
= 0;
89 for (qpms_y_t y
= 0; y
< c
->nelem_sc
; ++y
) {
90 qpms_l_t n
; qpms_m_t m
; qpms_y2mn_sc_p(y
, &m
, &n
);
91 if ((m
+ n
) % 2 == 0) {
92 c
->s1_constfacs
[y
] = c
->s1_constfacs_base
+ s1_constfacs_sz_cumsum
;
93 // and here comes the actual calculation
94 for (qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]; ++j
){
96 case EWALD32_CONSTANTS_ORIG
: // NOT USED
97 c
->s1_constfacs
[y
][j
] = -0.5 * ipow(n
+1) * min1pow((n
+m
)/2)
98 * sqrt((2*n
+ 1) * factorial(n
-m
) * factorial(n
+m
))
99 * min1pow(j
) * pow(0.5, n
-2*j
)
100 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
))
103 case EWALD32_CONSTANTS_AGNOSTIC
:
104 c
->s1_constfacs
[y
][j
] = -2 * ipow(n
+1) * M_SQRTPI
105 * factorial((n
-m
)/2) * factorial((n
+m
)/2)
107 / (factorial(j
) * factorial((n
-m
)/2-j
) * factorial((n
+m
)/2-j
));
110 QPMS_INVALID_ENUM(type
);
113 s1_constfacs_sz_cumsum
+= 1 + c
->s1_jMaxes
[y
];
116 c
->s1_constfacs
[y
] = NULL
;
119 // ------ the "z-axis constants" -----
121 size_t s1_constfacs_1Dz_sz
;
123 const qpms_l_t sz_n_lMax
= lMax
/2 + 1; // number of different j's for n = lMax
124 s1_constfacs_1Dz_sz
= (lMax
% 2) ? isq(sz_n_lMax
) + sz_n_lMax
127 c
->s1_constfacs_1Dz_base
= malloc(s1_constfacs_1Dz_sz
* sizeof(complex double));
128 c
->s1_constfacs_1Dz
= malloc((lMax
+1)*sizeof(complex double *));
130 size_t s1_constfacs_1Dz_sz_cumsum
= 0;
131 for (qpms_l_t n
= 0; n
<= lMax
; ++n
) {
132 c
->s1_constfacs_1Dz
[n
] = c
->s1_constfacs_1Dz_base
+ s1_constfacs_1Dz_sz_cumsum
;
133 for (qpms_l_t j
= 0; j
<= n
/2; ++j
) {
135 case EWALD32_CONSTANTS_AGNOSTIC
:
136 c
->s1_constfacs_1Dz
[n
][j
] = -ipow(n
+1) * min1pow(j
) * factorial(n
)
137 / (factorial(j
) * pow(2, 2*j
) * factorial(n
- 2*j
));
140 QPMS_INVALID_ENUM(type
); // wrong type argument or not implemented
143 s1_constfacs_1Dz_sz_cumsum
+= 1 + n
/ 2;
145 assert(s1_constfacs_1Dz_sz
== s1_constfacs_1Dz_sz_cumsum
);
148 c
->legendre_csphase
= csphase
;
149 c
->legendre0
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
150 c
->legendre_plus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
151 c
->legendre_minus1
= malloc(gsl_sf_legendre_array_n(lMax
) * sizeof(double));
152 // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
153 c
->legendre_normconv
= GSL_SF_LEGENDRE_NONE
;
154 // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
155 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, 0, csphase
, c
->legendre0
));
156 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, +1, csphase
, c
->legendre_plus1
));
157 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, -1, csphase
, c
->legendre_minus1
));
161 void qpms_ewald3_constants_free(qpms_ewald3_constants_t
*c
) {
163 free(c
->legendre_plus1
);
164 free(c
->legendre_minus1
);
165 free(c
->s1_constfacs
);
166 free(c
->s1_constfacs_base
);
167 free(c
->s1_constfacs_1Dz_base
);
168 free(c
->s1_constfacs_1Dz
);
174 int ewald3_sigma0(complex double *result
, double *err
,
175 const qpms_ewald3_constants_t
*c
,
176 const double eta
, const complex double k
)
179 complex double z
= -csq(k
/(2*eta
));
180 QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z
,
181 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
182 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, &gam
));
183 *result
= gam
.val
* c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
;
185 *err
= gam
.err
* fabs(c
->legendre0
[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI
);
189 // Wrapper over cpow to correctly handle the k->0 behaviour
190 static inline complex double cpow_0lim_zi(complex double x
, long z
)
192 if (x
== 0 && z
== 0)
194 else if (x
== 0 && z
> 0) // lol branch cut in cpow
200 int ewald3_21_xy_sigma_long (
201 complex double *target
, // must be c->nelem_sc long
203 const qpms_ewald3_constants_t
*c
,
204 const double eta
, const complex double k
,
205 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
206 const LatticeDimensionality latdim
,
207 PGen
*pgen_K
, const bool pgen_generates_shifted_points
208 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
209 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
210 * and adds beta to the generated points before calculations.
211 * If true, it assumes that they are already shifted.
214 const cart3_t particle_shift
217 const bool k_is_real
= (cimag(k
) == 0);
218 assert((latdim
& LAT_XYONLY
) && (latdim
& SPACE3D
));
219 assert((latdim
& LAT1D
) || (latdim
& LAT2D
));
220 const qpms_y_t nelem_sc
= c
->nelem_sc
;
221 assert(nelem_sc
> 0);
222 const qpms_l_t lMax
= c
->lMax
;
224 // Manual init of the ewald summation targets
225 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
226 memset(target
, 0, nelem_sc
* sizeof(complex double));
227 double *err_c
= NULL
;
229 err_c
= calloc(nelem_sc
, sizeof(double));
230 memset(err
, 0, nelem_sc
* sizeof(double));
233 #ifdef EWALD_AUTO_CUTOFF
235 * Experimental: stop the evaluation if the relative contribution of the "last layer"
236 * is below DBL_EPSILON to speed up the calculation.
237 * This obviously can currently work only for "suitable PGen"s that generate points in
238 * layers with increasing distance from the origin.
239 * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
240 * increasing integer index (starting from 1).
241 * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
242 * during run time instead of using this EWALD_AUTO_CUTOFF macro.
244 size_t lw
= 1; // "Layer" index.
245 size_t li
= 0; // Counter of points inside the current "layer".
246 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
247 kahaninit(&lsum
, &lsum_c
);
250 const complex double commonfac
= 1/(k
*k
*unitcell_volume
); // used in the very end (CFC)
252 assert(creal(commonfac
) > 0);
254 PGenSphReturnData pgen_retdata
;
256 double rbeta_pq_prev
;
258 // recycleable values if rbeta_pq stays the same:
259 complex double gamma_pq
;
260 complex double z
; // I?*κ*γ*particle_shift.z
261 complex double x
; // κ*γ/(2*η)
262 complex double factor1d
= 1; // the "additional" factor for the 1D case (then it is not 1)
263 // space for Gamma_pq[j]'s
264 complex double Gamma_pq
[lMax
/2+1];
265 double Gamma_pq_err
[lMax
/2+1];
267 // CHOOSE POINT BEGIN
268 // TODO mayby PGen_next_sph is not the best coordinate system choice here
269 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
272 if (pgen_generates_shifted_points
) {
273 beta_pq_sph
= pgen_retdata
.point_sph
;
274 const cart3_t beta_pq_cart
= sph2cart(beta_pq_sph
);
275 K_pq_cart
= cart3_add(cart3_scale(-1, beta
), beta_pq_cart
);
276 } else { // as in the old _points_and_shift functions
277 const sph_t K_pq_sph
= pgen_retdata
.point_sph
;
278 K_pq_cart
= sph2cart(K_pq_sph
);
279 const cart3_t beta_pq_cart
= cart3_add(K_pq_cart
, beta
);
280 beta_pq_sph
= cart2sph(beta_pq_cart
);
283 const double rbeta_pq
= beta_pq_sph
.r
;
284 const double arg_pq
= beta_pq_sph
.phi
;
285 //const double beta_pq_theta = beta_pq_sph.theta; //unused
289 const complex double phasefac
= cexp(I
*cart3_dot(K_pq_cart
,particle_shift
)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
291 const bool new_rbeta_pq
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
292 if (!new_rbeta_pq
) assert(rbeta_pq
== rbeta_pq_prev
);
296 //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
298 gamma_pq
= clilgamma(rbeta_pq
/k
);
299 complex double x
= gamma_pq
*k
/(2*eta
);
300 complex double x2
= x
*x
;
301 if(particle_shift
.z
== 0) {
302 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
304 int retval
= complex_gamma_inc_e(0.5-j
, x2
,
305 // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
306 QPMS_LIKELY(creal(x2
) < 0) && !signbit(cimag(x2
)) ? -1 : 0, &Gam
);
307 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
308 Gamma_pq
[j
] = Gam
.val
;
309 if(err
) Gamma_pq_err
[j
] = Gam
.err
;
312 factor1d
= M_SQRT1_2
* .5 * k
* gamma_pq
;
313 } else if (latdim
& LAT2D
) {
315 // TODO check the branches/phases!
316 complex double z
= I
* k
* gamma_pq
* particle_shift
.z
;
317 ewald3_2_sigma_long_Delta(Gamma_pq
, err
? Gamma_pq_err
: NULL
, lMax
/2, x
, z
);
319 QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
324 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
325 // and just fetched for each n, m pair
326 for(qpms_l_t n
= 0; n
<= lMax
; ++n
)
327 for(qpms_m_t m
= -n
; m
<= n
; ++m
) {
328 if((m
+n
) % 2 != 0) // odd coefficients are zero.
330 const qpms_y_t y
= qpms_mn2y_sc(m
, n
);
331 const complex double e_imalpha_pq
= cexp(I
*m
*arg_pq
);
332 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
333 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
334 assert((n
-abs(m
))/2 == c
->s1_jMaxes
[y
]);
335 for(qpms_l_t j
= 0; j
<= c
->s1_jMaxes
[y
]/*(n-abs(m))/2*/; ++j
) { // FIXME </<= ?
336 complex double summand
= cpow_0lim_zi(rbeta_pq
/k
, n
-2*j
)
337 * e_imalpha_pq
* c
->legendre0
[gsl_sf_legendre_array_index(n
,abs(m
))] * min1pow_m_neg(m
) // This line can actually go outside j-loop
338 * cpow(gamma_pq
, 2*j
-1) // * Gamma_pq[j] bellow (GGG) after error computation
339 * c
->s1_constfacs
[y
][j
];
341 // FIXME include also other errors than Gamma_pq's relative error
342 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq_err
[j
] * cabs(summand
));
344 summand
*= Gamma_pq
[j
]; // GGG
345 ckahanadd(&jsum
, &jsum_c
, summand
);
347 jsum
*= phasefac
* factor1d
; // PFC
348 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
349 #ifdef EWALD_AUTO_CUTOFF
350 kahanadd(&lsum
, &lsum_c
, cabs(jsum
));
352 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
355 rbeta_pq_prev
= rbeta_pq
;
357 #ifdef EWALD_AUTO_CUTOFF
359 double cursum_min
= INFINITY
;
360 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
361 const double a
= cabs(target
[y
]);
362 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
363 cursum_min
= MIN(cursum_min
, a
);
365 if (li
>= 12 * lw
+ 1) {
366 if(lsum
< DBL_EPSILON
* cursum_min
)
367 goto ewald3_21_xy_sigma_long_end_point_loop
;
368 kahaninit(&lsum
, &lsum_c
);
374 #ifdef EWALD_AUTO_CUTOFF
375 ewald3_21_xy_sigma_long_end_point_loop
:
380 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
381 target
[y
] *= commonfac
;
383 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
389 // 1D chain along the z-axis; not many optimisations here as the same
390 // shifted beta radius could be recycled only once anyways
391 int ewald3_1_z_sigma_long (
392 complex double *target
, // must be c->nelem_sc long
394 const qpms_ewald3_constants_t
*c
,
395 const double eta
, const complex double k
,
396 const double unitcell_volume
/* length (periodicity) in this case */,
397 const LatticeDimensionality latdim
,
398 PGen
*pgen_K
, const bool pgen_generates_shifted_points
399 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
400 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
401 * and adds beta to the generated points before calculations.
402 * If true, it assumes that they are already shifted.
405 const cart3_t particle_shift
408 assert(LatticeDimensionality_checkflags(latdim
, LAT_1D_IN_3D_ZONLY
));
409 assert(beta
.x
== 0 && beta
.y
== 0);
410 assert(particle_shift
.x
== 0 && particle_shift
.y
== 0);
411 const double beta_z
= beta
.z
;
412 const double particle_shift_z
= particle_shift_z
;
413 const qpms_y_t nelem_sc
= c
->nelem_sc
;
414 const qpms_l_t lMax
= c
->lMax
;
416 // Manual init of the ewald summation targets
417 complex double *target_c
= calloc(nelem_sc
, sizeof(complex double));
418 memset(target
, 0, nelem_sc
* sizeof(complex double));
419 double *err_c
= NULL
;
421 err_c
= calloc(nelem_sc
, sizeof(double));
422 memset(err
, 0, nelem_sc
* sizeof(double));
425 const double commonfac
= 1/(k
*unitcell_volume
); // multiplied in the very end (CFC)
426 assert(commonfac
> 0);
428 // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
429 qpms_csf_result Gamma_pq
[lMax
/2+1];
431 PGenSphReturnData pgen_retdata
;
432 // CHOOSE POINT BEGIN
433 // TODO FIXME USE PGen_next_z
434 while ((pgen_retdata
= PGen_next_sph(pgen_K
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
435 assert(pgen_retdata
.flags
& PGEN_AT_Z
);
436 double K_z
, beta_mu_z
;
437 if (pgen_generates_shifted_points
) {
438 beta_mu_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
439 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); //!!!CHECKSIGN!!!
440 K_z
= beta_mu_z
- beta_z
;
441 } else { // as in the old _points_and_shift functions
442 K_z
= ((pgen_retdata
.point_sph
.theta
== 0) ?
443 pgen_retdata
.point_sph
.r
: -pgen_retdata
.point_sph
.r
); // !!!CHECKSIGN!!!
444 beta_mu_z
= K_z
+ beta_z
;
446 double rbeta_mu
= fabs(beta_mu_z
);
449 const complex double phasefac
= cexp(I
* K_z
* particle_shift_z
); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
452 complex double gamma_pq
= clilgamma(rbeta_mu
/k
); // For real beta and k this is real or pure imaginary ...
453 const complex double z
= csq(gamma_pq
*k
/(2*eta
));// ... so the square (this) is in fact real.
454 for(qpms_l_t j
= 0; j
<= lMax
/2; ++j
) {
455 int retval
= complex_gamma_inc_e(0.5-j
, z
,
456 // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
457 QPMS_LIKELY(creal(z
) < 0) && !signbit(cimag(z
)) ? -1 : 0, Gamma_pq
+j
);
458 QPMS_ENSURE_SUCCESS_OR(retval
, GSL_EUNDRFLW
);
461 // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
462 // and just fetched for each n
463 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
464 const qpms_y_t y
= qpms_mn2y_sc(0, n
);
465 complex double jsum
, jsum_c
; ckahaninit(&jsum
, &jsum_c
);
466 double jsum_err
, jsum_err_c
; kahaninit(&jsum_err
, &jsum_err_c
); // TODO do I really need to kahan sum errors?
467 for(qpms_l_t j
= 0; j
<= n
/2; ++j
) {
468 complex double summand
= pow(rbeta_mu
/k
, n
-2*j
)
469 * ((beta_mu_z
> 0) ? // TODO this can go outsize the j-loop
470 c
->legendre_plus1
[gsl_sf_legendre_array_index(n
,0)] :
471 (c
->legendre_minus1
[gsl_sf_legendre_array_index(n
,0)] * min1pow(n
))
473 // * min1pow_m_neg(m) // not needed as m == 0
474 * cpow(gamma_pq
, 2*j
) // * Gamma_pq[j] bellow (GGG) after error computation
475 * c
->s1_constfacs_1Dz
[n
][j
];
476 /* s1_consstfacs_1Dz[n][j] =
478 * -I**(n+1) (-1)**j * n!
479 * --------------------------
480 * j! * 2**(2*j) * (n - 2*j)!
484 // FIXME include also other errors than Gamma_pq's relative error
485 kahanadd(&jsum_err
, &jsum_err_c
, Gamma_pq
[j
].err
* cabs(summand
));
487 summand
*= Gamma_pq
[j
].val
; // GGG
488 ckahanadd(&jsum
, &jsum_c
, summand
);
490 jsum
*= phasefac
; // PFC
491 ckahanadd(target
+ y
, target_c
+ y
, jsum
);
492 if(err
) kahanadd(err
+ y
, err_c
+ y
, jsum_err
);
498 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
) // CFC common factor from above
499 target
[y
] *= commonfac
;
501 for(qpms_y_t y
= 0; y
< nelem_sc
; ++y
)
506 int ewald3_sigma_long (
507 complex double *target
, // must be c->nelem_sc long
509 const qpms_ewald3_constants_t
*c
,
510 const double eta
, const complex double k
,
511 const double unitcell_volume
/* with the corresponding lattice dimensionality */,
512 const LatticeDimensionality latdim
,
513 PGen
*pgen_K
, const bool pgen_generates_shifted_points
514 /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
515 * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
516 * and adds beta to the generated points before calculations.
517 * If true, it assumes that they are already shifted.
520 const cart3_t particle_shift
523 assert(latdim
& SPACE3D
);
524 if (latdim
& LAT_XYONLY
)
525 return ewald3_21_xy_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
526 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
527 else if (latdim
& LAT_ZONLY
)
528 return ewald3_1_z_sigma_long(target
, err
, c
, eta
, k
, unitcell_volume
,
529 latdim
, pgen_K
, pgen_generates_shifted_points
, beta
, particle_shift
);
530 // TODO 3D case and general 2D case
531 else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
534 struct sigma2_integrand_params
{
539 static double sigma2_integrand(double ksi
, void *params
) {
540 struct sigma2_integrand_params
*p
= (struct sigma2_integrand_params
*) params
;
541 return exp(-sq(p
->R
* ksi
) + sq(p
->k
/ ksi
/ 2)) * pow(ksi
, 2*p
->n
);
544 static int ewald32_sr_integral(double r
, double k
, int n
, double eta
,
545 double *result
, double *err
, gsl_integration_workspace
*workspace
)
547 struct sigma2_integrand_params p
;
550 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
554 p
.R
= r
/ R0
; // i.e. p.R = 1
557 F
.function
= sigma2_integrand
;
560 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
561 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
562 workspace
, result
, err
);
563 double normfac
= pow(R0
, -2*p
.n
- 1);
569 // a version allowing complex k
571 struct sigma2_integrand_params_ck
{
577 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
578 // including the whole complex exponential (throwing the imaginary/real part away)
579 static double sigma2_integrand_ck_real(double ksi
, void *params
) {
580 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
581 return creal(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
583 static double sigma2_integrand_ck_imag(double ksi
, void *params
) {
584 struct sigma2_integrand_params_ck
*p
= (struct sigma2_integrand_params_ck
*) params
;
585 return cimag(cexp(-csq(p
->R
* ksi
) + csq(p
->k
/ ksi
/ 2))) * pow(ksi
, 2*p
->n
);
588 static int ewald32_sr_integral_ck(double r
, complex double k
, int n
, double eta
,
589 complex double *result
, double *err
, gsl_integration_workspace
*workspace
)
591 struct sigma2_integrand_params_ck p
;
594 const double R0
= r
; // Maybe could be chosen otherwise, but fuck it for now.
598 p
.R
= r
/ R0
; // i.e. p.R = 1
602 double result_real
, result_imag
, err_real
, err_imag
;
604 F
.function
= sigma2_integrand_ck_real
;
605 // TODO check return values
606 int retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
607 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
608 workspace
, &result_real
, &err_real
);
610 F
.function
= sigma2_integrand_ck_imag
;
611 // TODO check return values
612 retval
= gsl_integration_qagiu(&F
, eta
, INTEGRATION_EPSABS
,
613 INTEGRATION_EPSREL
, INTEGRATION_WORKSPACE_LIMIT
,
614 workspace
, &result_imag
, &err_imag
);
616 *result
= result_real
+ I
*result_imag
;
617 *err
= sqrt(sq(err_real
) + sq(err_imag
));
619 double normfac
= pow(R0
, -2*p
.n
- 1);
625 int ewald3_sigma_short(
626 complex double *target
, // must be c->nelem_sc long
627 double *err
, // must be c->nelem_sc long or NULL
628 const qpms_ewald3_constants_t
*c
,
629 const double eta
, const complex double k
/* TODO COMPLEX */,
630 const LatticeDimensionality latdim
, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
631 PGen
*pgen_R
, const bool pgen_generates_shifted_points
632 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
633 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
634 * and adds particle_shift to the generated points before calculations.
635 * If true, it assumes that they are already shifted (if calculating interaction between
636 * different particles in the unit cell).
639 const cart3_t particle_shift
642 const bool k_is_real
= (cimag(k
) == 0); // TODO check how the compiler optimises the loops
643 const double kreal
= creal(k
);
644 const qpms_y_t nelem_sc
= c
->nelem_sc
;
645 const qpms_l_t lMax
= c
->lMax
;
646 gsl_integration_workspace
*workspace
=
647 gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT
);
649 double legendre_buf
[gsl_sf_legendre_array_n(c
->lMax
)]; // work space for the legendre polynomials (used only in the general case)
651 // Manual init of the ewald summation targets
652 complex double * const target_c
= calloc(nelem_sc
, sizeof(complex double));
653 memset(target
, 0, nelem_sc
* sizeof(complex double));
654 double *err_c
= NULL
;
656 err_c
= calloc(nelem_sc
, sizeof(double));
657 memset(err
, 0, nelem_sc
* sizeof(double));
660 #ifdef EWALD_AUTO_CUTOFF
662 * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
664 size_t lw
= 1; // "Layer" index.
665 size_t li
= 0; // Counter of points inside the current "layer".
666 double lsum
, lsum_c
; // Layer contribution magnitude sum + kahan compensation
667 kahaninit(&lsum
, &lsum_c
);
670 PGenSphReturnData pgen_retdata
;
672 double r_pq_shifted_prev
;
674 // recyclable variables if r_pq_shifted stays the same:
675 double intres
[lMax
+1], interr
[lMax
+1];
676 complex double cintres
[lMax
+1];
678 // CHOOSE POINT BEGIN
679 // TODO check whether _next_sph is the optimal coordinate system choice here
680 while ((pgen_retdata
= PGen_next_sph(pgen_R
)).flags
& PGEN_NOTDONE
) { // BEGIN POINT LOOP
682 cart3_t Rpq_shifted_cart
; // I will need both sph and cart representations anyway...
683 sph_t Rpq_shifted_sph
;
684 if (pgen_generates_shifted_points
) {
685 Rpq_shifted_sph
= pgen_retdata
.point_sph
;
686 Rpq_shifted_cart
= sph2cart(Rpq_shifted_sph
);
687 } else { // as in the old _points_and_shift functions
688 //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
689 const sph_t bravais_point_sph
= pgen_retdata
.point_sph
;
690 const cart3_t bravais_point_cart
= sph2cart(bravais_point_sph
);
691 Rpq_shifted_cart
= cart3_add(bravais_point_cart
, cart3_scale(-1, particle_shift
)); // CHECKSIGN!!!
692 Rpq_shifted_sph
= cart2sph(Rpq_shifted_cart
);
695 // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
696 const double Rpq_shifted_arg
= Rpq_shifted_sph
.phi
; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
697 const double Rpq_shifted_theta
= Rpq_shifted_sph
.theta
; // POINT-DEPENDENT
698 const double r_pq_shifted
= Rpq_shifted_sph
.r
;
700 // if the radius is the same as in previous cycle, most of the calculations can be recycled
701 const bool new_r_pq_shifted
= (!pgen_generates_shifted_points
) || (pgen_retdata
.flags
& !PGEN_OLD_R
);
702 if (!new_r_pq_shifted
) assert(r_pq_shifted_prev
== r_pq_shifted
);
704 const complex double e_beta_Rpq
= cexp(I
*cart3_dot(beta
, Rpq_shifted_cart
)); // POINT-DEPENDENT
705 LatticeDimensionality speedup_regime
= 0;
706 if ((latdim
& LAT_2D_IN_3D_XYONLY
) == LAT_2D_IN_3D_XYONLY
) speedup_regime
= LAT_2D_IN_3D_XYONLY
;
707 if ((latdim
& LAT_1D_IN_3D_ZONLY
) == LAT_1D_IN_3D_ZONLY
) speedup_regime
= LAT_1D_IN_3D_ZONLY
;
709 const double * legendre_array
;
711 switch(speedup_regime
) {
712 // speedup checks for special geometries and Legendre polynomials
713 case LAT_1D_IN_3D_ZONLY
:
714 assert((pgen_retdata
.flags
& PGEN_AT_Z
) == PGEN_AT_Z
);
715 assert(Rpq_shifted_theta
== M_PI
|| Rpq_shifted_theta
== 0);
716 legendre_array
= (Rpq_shifted_theta
== 0) ? c
->legendre_plus1
: c
->legendre_minus1
; // CHECKSIGN
718 case LAT_2D_IN_3D_XYONLY
:
719 assert((pgen_retdata
.flags
&PGEN_AT_XY
) == PGEN_AT_XY
);
720 assert(fabs(Rpq_shifted_theta
- M_PI_2
) < DBL_EPSILON
* 1024);
721 // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
722 legendre_array
= c
->legendre0
;
725 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c
->legendre_normconv
, lMax
, cos(Rpq_shifted_theta
), c
->legendre_csphase
, legendre_buf
));
726 legendre_array
= legendre_buf
;
730 for(qpms_l_t n
= 0; n
<= lMax
; ++n
) {
731 const double complex prefacn
= - I
* (k_is_real
? pow(2./creal(k
),n
+1) : cpow(2./k
, n
+1)) * M_2_SQRTPI
/ 2; // profiling TODO put outside the R-loop and multiply in the end?
732 const double R_pq_pown
= pow(r_pq_shifted
, n
); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
733 if (new_r_pq_shifted
) {
737 retval
= ewald32_sr_integral(r_pq_shifted
, kreal
, n
, eta
,
738 &intres_real
, interr
+ n
, workspace
);
739 cintres
[n
] = intres_real
;
741 retval
= ewald32_sr_integral_ck(r_pq_shifted
, k
, n
, eta
,
742 cintres
+n
, interr
+ n
, workspace
);
743 QPMS_ENSURE_SUCCESS(retval
);
744 } // otherwise recycle the integrals
745 for (qpms_m_t m
= -n
; m
<= n
; ++m
){
746 complex double e_imf
;
747 // SPEEDUPS for some special geometries
748 if(speedup_regime
== LAT_2D_IN_3D_XYONLY
) { //2D lattice inside the xy plane
749 if((m
+n
) % 2 != 0) // odd coefficients are zero.
750 continue; // nothing needed, already done by memset
751 e_imf
= cexp(I
*m
*Rpq_shifted_arg
); // profiling TODO: calculate outside the n-loop?
752 } else if (speedup_regime
== LAT_1D_IN_3D_ZONLY
) { // 1D lattice along the z axis
753 if (m
!= 0) // m-non-zero coefficients are zero
754 continue; // nothing needed, already done by memset
756 } else { // general 1D/2D/3D lattice in 3D space
757 e_imf
= cexp(I
*m
*Rpq_shifted_arg
);
760 const double leg
= legendre_array
[gsl_sf_legendre_array_index(n
, abs(m
))];
762 const qpms_y_t y
= qpms_mn2y_sc(m
,n
);
764 kahanadd(err
+ y
, err_c
+ y
, cabs(leg
* (prefacn
/ I
) * R_pq_pown
765 * interr
[n
])); // TODO include also other errors
766 complex double thesummand
= prefacn
* R_pq_pown
* leg
* cintres
[n
] * e_beta_Rpq
* e_imf
* min1pow_m_neg(m
);
767 ckahanadd(target
+ y
, target_c
+ y
, thesummand
);
768 #ifdef EWALD_AUTO_CUTOFF
769 kahanadd(&lsum
, &lsum_c
, cabs(thesummand
));
775 r_pq_shifted_prev
= r_pq_shifted
;
777 #ifdef EWALD_AUTO_CUTOFF
779 double cursum_min
= INFINITY
;
780 for (qpms_y_t y
= 0; y
< nelem_sc
; ++y
) {
781 const double a
= cabs(target
[y
]);
782 if (a
) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
783 cursum_min
= MIN(cursum_min
, a
);
785 if (li
>= 12 * lw
+ 1) {
786 if(lsum
< DBL_EPSILON
* cursum_min
)
787 goto ewald3_sigma_short_end_point_loop
;
788 kahaninit(&lsum
, &lsum_c
);
795 #ifdef EWALD_AUTO_CUTOFF
796 ewald3_sigma_short_end_point_loop
:
799 gsl_integration_workspace_free(workspace
);