2 #include "qpms_types.h"
3 #include "qpms_specfunc.h"
5 #include "translations.h"
6 #include "indexing.h" // TODO replace size_t and int with own index types here
8 #include <gsl/gsl_sf_legendre.h>
9 #include <gsl/gsl_sf_bessel.h>
10 #include "tiny_inlines.h"
11 #include "assert_cython_workaround.h"
13 #include <gsl/gsl_sf_coupling.h>
14 #include "qpms_error.h"
15 #include "normalisation.h"
16 #include "translations_inlines.h"
19 * Define macros with additional factors that "should not be there" according
20 * to the "original" formulae but are needed to work with my vswfs.
21 * (actually, I don't know whether the error is in using "wrong" implementation
22 * of vswfs, "wrong" implementation of Xu's translation coefficient formulae,
23 * error/inconsintency in Xu's paper or something else)
24 * Anyway, the zeroes give the correct _numerical_ values according to Xu's
25 * paper tables (without Xu's typos, of course), while
26 * the predefined macros give the correct translations of the VSWFs for the
27 * QPMS_NORMALIZATION_TAYLOR_CS norm.
29 #if !(defined AN0 || defined AN1 || defined AN2 || defined AN3)
30 #pragma message "using AN1 macro as default"
33 //#if !(defined AM0 || defined AM2)
36 #if !(defined BN0 || defined BN1 || defined BN2 || defined BN3)
37 #pragma message "using BN1 macro as default"
40 //#if !(defined BM0 || defined BM2)
43 //#if !(defined BF0 || defined BF1 || defined BF2 || defined BF3)
47 // if defined, the pointer B_multipliers[y] corresponds to the q = 1 element;
48 // otherwise, it corresponds to the q = 0 element, which should be identically zero
49 #ifdef QPMS_PACKED_B_MULTIPLIERS
56 // Translation operators for real sph. harm. based waves are not yet implemented...
57 static inline void TROPS_ONLY_EIMF_IMPLEMENTED(qpms_normalisation_t norm
) {
58 if (norm
& (QPMS_NORMALISATION_SPHARM_REAL
| QPMS_NORMALISATION_REVERSE_AZIMUTHAL_PHASE
))
59 QPMS_NOT_IMPLEMENTED("Translation operators for real or inverse complex spherical harmonics based waves are not implemented.");
62 // Use if only the symmetric form [[A, B], [B, A]] (without additional factors) of translation operator is allowed.
63 static inline void TROPS_ONLY_AB_SYMMETRIC_NORMS_IMPLEMENTED(qpms_normalisation_t norm
) {
64 switch (norm
& QPMS_NORMALISATION_NORM_BITS
) {
65 case QPMS_NORMALISATION_NORM_SPHARM
:
66 case QPMS_NORMALISATION_NORM_POWER
:
69 QPMS_NOT_IMPLEMENTED("Only spherical harmonic and power normalisation supported.");
72 ( !(norm
& QPMS_NORMALISATION_N_I
) != !(norm
& QPMS_NORMALISATION_M_I
) )
74 ( !(norm
& QPMS_NORMALISATION_N_MINUS
) != !(norm
& QPMS_NORMALISATION_M_MINUS
) )
76 QPMS_NOT_IMPLEMENTED("Only normalisations without a phase factors between M and N waves are supported.");
82 * [Xu_old] Yu-Lin Xu, Journal of Computational Physics 127, 285–298 (1996)
83 * [Xu] Yu-Lin Xu, Journal of Computational Physics 139, 137–165 (1998)
87 * GENERAL TODO: use normalised Legendre functions for Kristensson and Taylor conventions directly
88 * instead of normalising them here (the same applies for csphase).
91 static const double sqrtpi
= 1.7724538509055160272981674833411451827975494561223871;
92 //static const double ln2 = 0.693147180559945309417232121458176568075500134360255254120;
94 // Associated Legendre polynomial at zero argument (DLMF 14.5.1)
95 double qpms_legendre0(int m
, int n
) {
96 return pow(2,m
) * sqrtpi
/ tgamma(.5*n
- .5*m
+ .5) / tgamma(.5*n
-.5*m
);
99 // Derivative of associated Legendre polynomial at zero argument (DLMF 14.5.2)
100 double qpms_legendreD0(int m
, int n
) {
101 return -2 * qpms_legendre0(m
, n
);
105 static inline int imin(int x
, int y
) {
106 return x
> y
? y
: x
;
109 // The uppermost value of q index for the B coefficient terms from [Xu](60).
110 // N.B. this is different from [Xu_old](79) due to the n vs. n+1 difference.
111 // However, the trailing terms in [Xu_old] are analytically zero (although
112 // the numerical values will carry some non-zero rounding error).
113 static inline int gauntB_Q_max(int M
, int n
, int mu
, int nu
) {
114 return imin(n
, imin(nu
, (n
+nu
+1-abs(M
+mu
))/2));
117 static inline double qpms_trans_normlogfac(qpms_normalisation_t norm
,
118 int m
, int n
, int mu
, int nu
) {
119 return -0.5*(lgamma(n
+m
+1)-lgamma(n
-m
+1)+lgamma(nu
-mu
+1)-lgamma(nu
+mu
+1));
122 static inline double qpms_trans_normfac(qpms_normalisation_t norm
,
123 int m
, int n
, int mu
, int nu
) {
124 int csphase
= qpms_normalisation_t_csphase(norm
);
125 /* Account for csphase here. Alternatively, this could be done by
126 * using appropriate csphase in the legendre polynomials when calculating
127 * the translation operator.
129 double normfac
= (1 == csphase
) ? min1pow(m
-mu
) : 1.;
130 normfac
*= sqrt((n
*(n
+1.))/(nu
*(nu
+1.)));
131 normfac
*= sqrt((2.*n
+1)/(2.*nu
+1));
135 complex double qpms_trans_single_A(qpms_normalisation_t norm
,
136 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
137 bool r_ge_d
, qpms_bessel_t J
) {
138 TROPS_ONLY_EIMF_IMPLEMENTED(norm
);
139 if(r_ge_d
) J
= QPMS_BESSEL_REGULAR
;
141 double costheta
= cos(kdlj
.theta
);
143 int qmax
= gaunt_q_max(-m
,n
,mu
,nu
); // nemá tu být +m?
147 gaunt_xu(-m
,n
,mu
,nu
,qmax
,a1q
,&err
);
148 QPMS_ENSURE_SUCCESS(err
);
149 double a1q0
= a1q
[0];
151 double leg
[gsl_sf_legendre_array_n(n
+nu
)];
152 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE
,n
+nu
,costheta
,-1,leg
));
153 complex double bes
[n
+nu
+1];
154 QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J
, n
+nu
, kdlj
.r
, bes
));
155 complex double sum
= 0;
156 for(int q
= 0; q
<= qmax
; ++q
) {
159 //if(p < abs(Pp_order)) continue; // FIXME raději nastav lépe meze
160 assert(p
>= abs(Pp_order
));
161 double a1q_n
= a1q
[q
] / a1q0
;
162 double Pp
= leg
[gsl_sf_legendre_array_index(p
, abs(Pp_order
))];
163 if (Pp_order
< 0) Pp
*= min1pow(mu
-m
) * exp(lgamma(1+p
+Pp_order
)-lgamma(1+p
-Pp_order
));
164 complex double zp
= bes
[p
];
165 complex double summandq
= (n
*(n
+1) + nu
*(nu
+1) - p
*(p
+1)) * min1pow(q
) * a1q_n
* zp
* Pp
;
166 sum
+= summandq
; // TODO KAHAN
169 double exponent
=(lgamma(2*n
+1)-lgamma(n
+2)+lgamma(2*nu
+3)-lgamma(nu
+2)
170 +lgamma(n
+nu
+m
-mu
+1)-lgamma(n
-m
+1)-lgamma(nu
+mu
+1)
171 +lgamma(n
+nu
+1) - lgamma(2*(n
+nu
)+1));
172 complex double presum
= exp(exponent
);
173 presum
*= cexp(I
*(mu
-m
)*kdlj
.phi
) * min1pow(m
) * ipow(nu
+n
) / (4*n
);
175 double normlogfac
= qpms_trans_normlogfac(norm
,m
,n
,mu
,nu
);
176 double normfac
= qpms_trans_normfac(norm
,m
,n
,mu
,nu
);
177 /// N<-N type coefficients w.r.t. Kristensson's convention. Csphase has been already taken into acct ^^^.
178 normfac
*= qpms_normalisation_factor_N_noCS(norm
, nu
, mu
)
179 / qpms_normalisation_factor_N_noCS(norm
, n
, m
);
180 // ipow(n-nu) is the difference from the Taylor formula!
181 presum
*= /*ipow(n-nu) * */
182 (normfac
* exp(normlogfac
))
204 complex double qpms_trans_single_B(qpms_normalisation_t norm
,
205 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
206 bool r_ge_d
, qpms_bessel_t J
) {
207 TROPS_ONLY_EIMF_IMPLEMENTED(norm
);
208 if(r_ge_d
) J
= QPMS_BESSEL_REGULAR
;
209 double costheta
= cos(kdlj
.theta
);
211 int q2max
= gaunt_q_max(-m
-1,n
+1,mu
+1,nu
);
212 int Qmax
= gaunt_q_max(-m
,n
+1,mu
,nu
);
213 int realQmax
= gauntB_Q_max(-m
,n
,mu
,nu
);
214 double a2q
[q2max
+1], a3q
[Qmax
+1], a2q0
, a3q0
;
217 for (int q
= 0; q
<= q2max
; ++q
)
222 gaunt_xu(-m
-1,n
+1,mu
+1,nu
,q2max
,a2q
,&err
);
223 QPMS_ENSURE_SUCCESS(err
);
226 gaunt_xu(-m
,n
+1,mu
,nu
,Qmax
,a3q
,&err
);
227 QPMS_ENSURE_SUCCESS(err
);
230 double leg
[gsl_sf_legendre_array_n(n
+nu
+1)];
231 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE
,n
+nu
+1,costheta
,-1,leg
));
232 complex double bes
[n
+nu
+2];
233 QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J
, n
+nu
+1, kdlj
.r
, bes
));
235 complex double sum
= 0;
236 for (int q
= 0; q
<= realQmax
; ++q
) {
238 double a2q_n
= a2q
[q
]/a2q0
;
239 double a3q_n
= a3q
[q
]/a3q0
;
240 complex double zp_
= bes
[p
+1];
241 int Pp_order_
= mu
-m
;
242 //if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze
243 assert(p
+1 >= abs(Pp_order_
));
244 double Pp_
= leg
[gsl_sf_legendre_array_index(p
+1, abs(Pp_order_
))];
245 if (Pp_order_
< 0) Pp_
*= min1pow(mu
-m
) * exp(lgamma(1+1+p
+Pp_order_
)-lgamma(1+1+p
-Pp_order_
));
246 complex double summandq
= ((2*(n
+1)*(nu
-mu
)*a2q_n
247 -(-nu
*(nu
+1) - n
*(n
+3) - 2*mu
*(n
+1)+p
*(p
+3))* a3q_n
)
248 *min1pow(q
) * zp_
* Pp_
);
249 sum
+= summandq
; //TODO KAHAN
252 double exponent
=(lgamma(2*n
+3)-lgamma(n
+2)+lgamma(2*nu
+3)-lgamma(nu
+2)
253 +lgamma(n
+nu
+m
-mu
+2)-lgamma(n
-m
+1)-lgamma(nu
+mu
+1)
254 +lgamma(n
+nu
+2) - lgamma(2*(n
+nu
)+3));
255 complex double presum
= exp(exponent
);
256 presum
*= cexp(I
*(mu
-m
)*kdlj
.phi
) * min1pow(m
) * ipow(nu
+n
+1) / (
257 (4*n
)*(n
+1)*(n
+m
+1));
259 double normlogfac
= qpms_trans_normlogfac(norm
,m
,n
,mu
,nu
);
260 double normfac
= qpms_trans_normfac(norm
,m
,n
,mu
,nu
);
261 /// N<-M type coefficients w.r.t. Kristensson's convention. Csphase has been already taken into acct ^^^.
262 normfac
*= qpms_normalisation_factor_M_noCS(norm
, nu
, mu
)
263 / qpms_normalisation_factor_N_noCS(norm
, n
, m
);
265 // ipow(n-nu) is the difference from the "old Taylor" formula
266 presum
*= /*ipow(n-nu) * */(exp(normlogfac
) * normfac
)
288 void qpms_trans_calculator_free(qpms_trans_calculator
*c
) {
289 free(c
->A_multipliers
[0]);
290 free(c
->A_multipliers
);
291 free(c
->B_multipliers
[0]);
292 free(c
->B_multipliers
);
294 qpms_ewald3_constants_free(c
->e3c
);
300 static inline size_t qpms_trans_calculator_index_mnmunu(const qpms_trans_calculator
*c
,
301 int m
, int n
, int mu
, int nu
){
302 return c
->nelem
* qpms_mn2y(m
,n
) + qpms_mn2y(mu
,nu
);
305 static inline size_t qpms_trans_calculator_index_yyu(const qpms_trans_calculator
*c
,
306 size_t y
, size_t yu
) {
307 return c
->nelem
* y
+ yu
;
311 static inline double fsq(double x
) {return x
* x
; }
313 static void qpms_trans_calculator_multipliers_A(
314 qpms_normalisation_t norm
,
315 complex double *dest
, int m
, int n
, int mu
, int nu
, int qmax
) {
316 assert(qmax
== gaunt_q_max(-m
,n
,mu
,nu
));
319 gaunt_xu(-m
,n
,mu
,nu
,qmax
,a1q
,&err
);
320 QPMS_ENSURE_SUCCESS(err
);
321 double a1q0
= a1q
[0];
323 double normlogfac
= qpms_trans_normlogfac(norm
,m
,n
,mu
,nu
);
324 double normfac
= qpms_trans_normfac(norm
,m
,n
,mu
,nu
);
325 /// N<-N type coefficients w.r.t. Kristensson's convention. Csphase has been already taken into acct ^^^.
326 normfac
*= qpms_normalisation_factor_N_noCS(norm
, nu
, mu
)
327 / qpms_normalisation_factor_N_noCS(norm
, n
, m
);
329 normfac
*= min1pow(m
); //different from old Taylor
331 double exponent
=(lgamma(2*n
+1)-lgamma(n
+2)+lgamma(2*nu
+3)-lgamma(nu
+2)
332 +lgamma(n
+nu
+m
-mu
+1)-lgamma(n
-m
+1)-lgamma(nu
+mu
+1)
333 +lgamma(n
+nu
+1) - lgamma(2*(n
+nu
)+1))
335 complex double presum
= exp(exponent
);
336 presum
*= normfac
/ (4.*n
);
337 presum
*= ipow(n
+nu
); // different from old Taylor
339 for(int q
= 0; q
<= qmax
; q
++) {
341 int Pp_order
= mu
- m
;
342 assert(p
>= abs(Pp_order
));
343 double a1q_n
= a1q
[q
] / a1q0
;
344 // Assuming non_normalized legendre polynomials (normalisation done here by hand)!
345 double Ppfac
= (Pp_order
>= 0) ? 1 :
346 min1pow(mu
-m
) * exp(lgamma(1+p
+Pp_order
)-lgamma(1+p
-Pp_order
));
347 double summandfac
= (n
*(n
+1) + nu
*(nu
+1) - p
*(p
+1)) * min1pow(q
) * a1q_n
;
348 dest
[q
] = presum
* summandfac
* Ppfac
366 // FIXME I might not need complex here
372 static double cruzan_bfactor(int M
, int n
, int mu
, int nu
, int p
) {
373 double logprefac
= lgamma(n
+M
+1) - lgamma(n
-M
+1) + lgamma(nu
+mu
+1) - lgamma(nu
-mu
+1)
374 + lgamma(p
-M
-mu
+2) - lgamma(p
+M
+mu
+2);
376 return min1pow(mu
+M
) * (2*p
+3) * exp(logprefac
)
377 * gsl_sf_coupling_3j(2*n
, 2*nu
, 2*(p
+1), 2*M
, 2*mu
, 2*(-M
-mu
))
378 * gsl_sf_coupling_3j(2*n
, 2*nu
, 2*p
, 0, 0, 0);
382 void qpms_trans_calculator_multipliers_B(
383 qpms_normalisation_t norm
,
384 complex double *dest
, int m
, int n
, int mu
, int nu
, int Qmax
){
385 // This is according to the Cruzan-type formula [Xu](59)
386 assert(Qmax
== gauntB_Q_max(-m
,n
,mu
,nu
));
388 double normlogfac
= qpms_trans_normlogfac(norm
,m
,n
,mu
,nu
);
389 double normfac
= qpms_trans_normfac(norm
,m
,n
,mu
,nu
);
390 /// N<-M type coefficients w.r.t. Kristensson's convention. Csphase has been already taken into acct ^^^.
391 normfac
*= qpms_normalisation_factor_M_noCS(norm
, nu
, mu
)
392 / qpms_normalisation_factor_N_noCS(norm
, n
, m
);
394 double presum
= min1pow(1-m
) * (2*nu
+1)/(2.*(n
*(n
+1)))
395 * exp(lgamma(n
+m
+1) - lgamma(n
-m
+1) + lgamma(nu
-mu
+1) - lgamma(nu
+mu
+1)
399 for(int q
= BQ_OFFSET
; q
<= Qmax
; ++q
) {
401 int Pp_order
= mu
- m
;
402 // Assuming non-normalised Legendre polynomials, normalise here by hand.
403 // Ppfac_ differs from Ppfac in the A-case by the substitution p->p+1
404 double Ppfac_
= (Pp_order
>= 0)? 1 :
405 min1pow(mu
-m
) * exp(lgamma(1+1+p
+Pp_order
)-lgamma(1+1+p
-Pp_order
));
408 * (isq(n
+nu
+1)-isq(p
+1))
410 dest
[q
-BQ_OFFSET
] = presum
* t
* Ppfac_
411 * cruzan_bfactor(-m
,n
,mu
,nu
,p
) * ipow(p
+1)
439 qpms_trans_calculator
440 *qpms_trans_calculator_init (const int lMax
, const qpms_normalisation_t normalisation
) {
441 TROPS_ONLY_EIMF_IMPLEMENTED(normalisation
);
443 qpms_trans_calculator
*c
= malloc(sizeof(qpms_trans_calculator
));
445 c
->nelem
= lMax
* (lMax
+2);
446 c
->A_multipliers
= malloc((1+SQ(c
->nelem
)) * sizeof(complex double *));
447 c
->B_multipliers
= malloc((1+SQ(c
->nelem
)) * sizeof(complex double *));
448 c
->normalisation
= normalisation
;
449 size_t *qmaxes
= malloc(SQ(c
->nelem
) * sizeof(size_t));
451 for(size_t y
= 0; y
< c
->nelem
; y
++)
452 for(size_t yu
= 0; yu
< c
->nelem
; yu
++) {
454 qpms_y2mn_p(y
,&m
,&n
);
455 qpms_y2mn_p(yu
,&mu
,&nu
);
457 qmaxes
[qpms_trans_calculator_index_yyu(c
,y
,yu
)]
458 = gaunt_q_max(-m
,n
,mu
,nu
));
460 c
->A_multipliers
[0] = malloc(qmaxsum
* sizeof(complex double));
461 // calculate multiplier beginnings
462 for(size_t i
= 0; i
< SQ(c
->nelem
); ++i
)
463 c
->A_multipliers
[i
+1] = c
->A_multipliers
[i
] + qmaxes
[i
] + 1;
464 // calculate the multipliers
465 for(size_t y
= 0; y
< c
->nelem
; ++y
)
466 for(size_t yu
= 0; yu
< c
->nelem
; ++yu
) {
467 size_t i
= y
* c
->nelem
+ yu
;
469 qpms_y2mn_p(y
, &m
, &n
);
470 qpms_y2mn_p(yu
, &mu
, &nu
);
471 qpms_trans_calculator_multipliers_A(normalisation
,
472 c
->A_multipliers
[i
], m
, n
, mu
, nu
, qmaxes
[i
]);
476 for(size_t y
=0; y
< c
->nelem
; y
++)
477 for(size_t yu
= 0; yu
< c
->nelem
; yu
++) {
479 qpms_y2mn_p(y
,&m
,&n
);
480 qpms_y2mn_p(yu
,&mu
,&nu
);
481 qmaxsum
+= (1 - BQ_OFFSET
) + (
482 qmaxes
[qpms_trans_calculator_index_yyu(c
,y
,yu
)]
483 = gauntB_Q_max(-m
,n
,mu
,nu
));
485 c
->B_multipliers
[0] = malloc(qmaxsum
* sizeof(complex double));
486 // calculate multiplier beginnings
487 for(size_t i
= 0; i
< SQ(c
->nelem
); ++i
)
488 c
->B_multipliers
[i
+1] = c
->B_multipliers
[i
] + qmaxes
[i
] + (1 - BQ_OFFSET
);
489 // calculate the multipliers
490 for(size_t y
= 0; y
< c
->nelem
; ++y
)
491 for(size_t yu
= 0; yu
< c
->nelem
; ++yu
) {
492 size_t i
= y
* c
->nelem
+ yu
;
494 qpms_y2mn_p(y
, &m
, &n
);
495 qpms_y2mn_p(yu
, &mu
, &nu
);
496 qpms_trans_calculator_multipliers_B(normalisation
,
497 c
->B_multipliers
[i
], m
, n
, mu
, nu
, qmaxes
[i
]);
502 c
->e3c
= qpms_ewald3_constants_init(2 * lMax
+ 1, /*csphase*/ qpms_normalisation_t_csphase(normalisation
));
504 c
->legendre0
= malloc(gsl_sf_legendre_array_n(2*lMax
+1) * sizeof(double));
505 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE
,2*lMax
+1,
506 0,-1,c
->legendre0
)); // TODO maybe use some "precise" analytical formula instead?
510 static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_trans_calculator
*c
,
511 int m
, int n
, int mu
, int nu
, double kdlj_phi
,
512 const complex double *bessel_buf
, const double *legendre_buf
) {
513 TROPS_ONLY_EIMF_IMPLEMENTED(c
->normalisation
);
514 size_t i
= qpms_trans_calculator_index_mnmunu(c
, m
, n
, mu
, nu
);
515 size_t qmax
= c
->A_multipliers
[i
+1] - c
->A_multipliers
[i
] - 1;
516 assert(qmax
== gaunt_q_max(-m
,n
,mu
,nu
));
517 complex double sum
, kahanc
;
518 ckahaninit(&sum
, &kahanc
);
519 for(size_t q
= 0; q
<= qmax
; ++q
) {
521 double Pp
= legendre_buf
[gsl_sf_legendre_array_index(p
, abs(mu
-m
))];
522 complex double zp
= bessel_buf
[p
];
523 complex double multiplier
= c
->A_multipliers
[i
][q
];
524 ckahanadd(&sum
, &kahanc
, Pp
* zp
* multiplier
);
526 complex double eimf
= cexp(I
*(mu
-m
)*kdlj_phi
);
530 complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator
*c
,
531 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
532 bool r_ge_d
, qpms_bessel_t J
,
533 complex double *bessel_buf
, double *legendre_buf
) {
534 // This functions gets preallocated memory for bessel and legendre functions, but computes them itself
535 if (r_ge_d
) J
= QPMS_BESSEL_REGULAR
;
536 if (0 == kdlj
.r
&& J
!= QPMS_BESSEL_REGULAR
)
539 int csphase
= qpms_normalisation_t_csphase(c
->normalisation
);
541 double costheta
= cos(kdlj
.theta
);
542 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE
,n
+nu
,
543 costheta
,csphase
,legendre_buf
));
544 QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J
, n
+nu
+1, kdlj
.r
, bessel_buf
));
545 return qpms_trans_calculator_get_A_precalcbuf(c
,m
,n
,mu
,nu
,
546 kdlj
.phi
,bessel_buf
,legendre_buf
);
549 static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_trans_calculator
*c
,
550 int m
, int n
, int mu
, int nu
, double kdlj_phi
,
551 const complex double *bessel_buf
, const double *legendre_buf
) {
552 TROPS_ONLY_EIMF_IMPLEMENTED(c
->normalisation
);
553 size_t i
= qpms_trans_calculator_index_mnmunu(c
, m
, n
, mu
, nu
);
554 size_t qmax
= c
->B_multipliers
[i
+1] - c
->B_multipliers
[i
] - (1 - BQ_OFFSET
);
555 assert(qmax
== gauntB_Q_max(-m
,n
,mu
,nu
));
556 complex double sum
, kahanc
;
557 ckahaninit(&sum
, &kahanc
);
558 for(int q
= BQ_OFFSET
; q
<= qmax
; ++q
) {
560 double Pp_
= legendre_buf
[gsl_sf_legendre_array_index(p
+1, abs(mu
-m
))];
561 complex double zp_
= bessel_buf
[p
+1];
562 complex double multiplier
= c
->B_multipliers
[i
][q
-BQ_OFFSET
];
563 ckahanadd(&sum
, &kahanc
, Pp_
* zp_
* multiplier
);
565 complex double eimf
= cexp(I
*(mu
-m
)*kdlj_phi
);
569 complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator
*c
,
570 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
571 bool r_ge_d
, qpms_bessel_t J
,
572 complex double *bessel_buf
, double *legendre_buf
) {
573 // This functions gets preallocated memory for bessel and legendre functions, but computes them itself
574 if (r_ge_d
) J
= QPMS_BESSEL_REGULAR
;
575 if (0 == kdlj
.r
&& J
!= QPMS_BESSEL_REGULAR
)
578 int csphase
= qpms_normalisation_t_csphase(c
->normalisation
);
579 double costheta
= cos(kdlj
.theta
);
580 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE
,n
+nu
+1,
581 costheta
,csphase
,legendre_buf
));
582 QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J
, n
+nu
+1, kdlj
.r
, bessel_buf
));
583 return qpms_trans_calculator_get_B_precalcbuf(c
,m
,n
,mu
,nu
,
584 kdlj
.phi
,bessel_buf
,legendre_buf
);
587 int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator
*c
,
588 complex double *Adest
, complex double *Bdest
,
589 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
590 bool r_ge_d
, qpms_bessel_t J
,
591 complex double *bessel_buf
, double *legendre_buf
) {
592 if (r_ge_d
) J
= QPMS_BESSEL_REGULAR
;
593 if (0 == kdlj
.r
&& J
!= QPMS_BESSEL_REGULAR
) {
596 // TODO warn? different return value?
599 double costheta
= cos(kdlj
.theta
);
600 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE
,n
+nu
+1,
601 costheta
,-1,legendre_buf
));
602 QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J
, n
+nu
+1, kdlj
.r
, bessel_buf
));
603 *Adest
= qpms_trans_calculator_get_A_precalcbuf(c
,m
,n
,mu
,nu
,
604 kdlj
.phi
,bessel_buf
,legendre_buf
);
605 *Bdest
= qpms_trans_calculator_get_B_precalcbuf(c
,m
,n
,mu
,nu
,
606 kdlj
.phi
,bessel_buf
,legendre_buf
);
610 int qpms_trans_calculator_get_AB_arrays_precalcbuf(const qpms_trans_calculator
*c
,
611 complex double *Adest
, complex double *Bdest
,
612 size_t deststride
, size_t srcstride
, double kdlj_phi
,
613 const complex double *bessel_buf
, const double *legendre_buf
) {
614 size_t desti
= 0, srci
= 0;
615 for (int n
= 1; n
<= c
->lMax
; ++n
) for (int m
= -n
; m
<= n
; ++m
) {
616 for (int nu
= 1; nu
<= c
->lMax
; ++nu
) for (int mu
= -nu
; mu
<= nu
; ++mu
) {
618 size_t assertindex
= qpms_trans_calculator_index_mnmunu(c
,m
,n
,mu
,nu
);
620 assert(assertindex
== desti
*c
->nelem
+ srci
);
621 *(Adest
+ deststride
* desti
+ srcstride
* srci
) =
622 qpms_trans_calculator_get_A_precalcbuf(c
,m
,n
,mu
,nu
,
623 kdlj_phi
, bessel_buf
, legendre_buf
);
624 *(Bdest
+ deststride
* desti
+ srcstride
* srci
) =
625 qpms_trans_calculator_get_B_precalcbuf(c
,m
,n
,mu
,nu
,
626 kdlj_phi
,bessel_buf
,legendre_buf
);
635 int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator
*c
,
636 complex double *Adest
, complex double *Bdest
,
637 size_t deststride
, size_t srcstride
,
638 csph_t kdlj
, bool r_ge_d
, qpms_bessel_t J
,
639 complex double *bessel_buf
, double *legendre_buf
) {
640 if (r_ge_d
) J
= QPMS_BESSEL_REGULAR
;
641 if (0 == kdlj
.r
&& J
!= QPMS_BESSEL_REGULAR
) {
642 for (size_t i
= 0; i
< c
->nelem
; ++i
)
643 for (size_t j
= 0; j
< c
->nelem
; ++j
) {
644 *(Adest
+ i
*srcstride
+ j
*deststride
) = NAN
+I
*NAN
;
645 *(Bdest
+ i
*srcstride
+ j
*deststride
) = NAN
+I
*NAN
;
647 // TODO warn? different return value?
651 double costheta
= cos(kdlj
.theta
);
652 QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE
,2*c
->lMax
+1,
653 costheta
,-1,legendre_buf
));
654 QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J
, 2*c
->lMax
+1, kdlj
.r
, bessel_buf
));
656 return qpms_trans_calculator_get_AB_arrays_precalcbuf(c
, Adest
, Bdest
,
657 deststride
, srcstride
, kdlj
.phi
, bessel_buf
, legendre_buf
);
660 complex double qpms_trans_calculator_get_A(const qpms_trans_calculator
*c
,
661 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
662 bool r_ge_d
, qpms_bessel_t J
) {
663 double leg
[gsl_sf_legendre_array_n(n
+nu
)];
664 complex double bes
[n
+nu
+1]; // maximum order is 2n for A coeffs, plus the zeroth.
665 return qpms_trans_calculator_get_A_buf(c
,m
,n
,mu
,nu
,kdlj
,r_ge_d
,J
,
669 complex double qpms_trans_calculator_get_B(const qpms_trans_calculator
*c
,
670 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
671 bool r_ge_d
, qpms_bessel_t J
) {
672 double leg
[gsl_sf_legendre_array_n(n
+nu
+1)];
673 complex double bes
[n
+nu
+2]; // maximum order is 2n+1 for B coeffs, plus the zeroth.
674 return qpms_trans_calculator_get_B_buf(c
,m
,n
,mu
,nu
,kdlj
,r_ge_d
,J
,
678 int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator
*c
,
679 complex double *Adest
, complex double *Bdest
,
680 int m
, int n
, int mu
, int nu
, csph_t kdlj
,
681 bool r_ge_d
, qpms_bessel_t J
) {
682 double leg
[gsl_sf_legendre_array_n(2*c
->lMax
+1)];
683 complex double bes
[2*c
->lMax
+2]; // maximum order is 2n+1 for B coeffs, plus the zeroth.
684 return qpms_trans_calculator_get_AB_buf_p(c
,Adest
, Bdest
,m
,n
,mu
,nu
,kdlj
,r_ge_d
,J
,
688 int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator
*c
,
689 complex double *Adest
, complex double *Bdest
,
690 size_t deststride
, size_t srcstride
,
691 csph_t kdlj
, bool r_ge_d
, qpms_bessel_t J
) {
692 double leg
[gsl_sf_legendre_array_n(c
->lMax
+c
->lMax
+1)];
693 complex double bes
[2*c
->lMax
+2]; // maximum order is 2n+1 for B coeffs, plus the zeroth.
694 return qpms_trans_calculator_get_AB_arrays_buf(c
,
695 Adest
, Bdest
, deststride
, srcstride
,
700 // Convenience functions using VSWF base specs
701 qpms_errno_t
qpms_trans_calculator_get_trans_array(const qpms_trans_calculator
*c
,
702 complex double *target
,
703 /// Must be destspec->lMax <= c-> lMax && destspec->norm == c->norm.
704 const qpms_vswf_set_spec_t
*destspec
, size_t deststride
,
705 /// Must be srcspec->lMax <= c-> lMax && srcspec->norm == c->norm.
706 const qpms_vswf_set_spec_t
*srcspec
, size_t srcstride
,
707 csph_t kdlj
, bool r_ge_d
, qpms_bessel_t J
)
709 TROPS_ONLY_AB_SYMMETRIC_NORMS_IMPLEMENTED(c
->normalisation
);
710 assert(c
->normalisation
== destspec
->norm
&& c
->normalisation
== srcspec
->norm
);
711 assert(c
->lMax
>= destspec
->lMax
&& c
->lMax
>= srcspec
->lMax
);
712 assert(destspec
->lMax_L
< 0 && srcspec
->lMax_L
< 0);
713 // TODO don't use c->lMax etc. if both destspec->lMax and srcspec->lMax are smaller
714 complex double A
[c
->nelem
][c
->nelem
];
715 complex double B
[c
->nelem
][c
->nelem
];
716 qpms_errno_t retval
= qpms_trans_calculator_get_AB_arrays(c
,
717 A
[0], B
[0], c
->nelem
, 1,
719 qpms_trans_array_from_AB(target
, destspec
, deststride
, srcspec
, srcstride
,
720 A
[0], B
[0], c
->lMax
);
724 qpms_errno_t
qpms_trans_calculator_get_trans_array_e32_e(const qpms_trans_calculator
*c
,
725 complex double *target
, double *err
,
726 /// Must be destspec->lMax <= c-> lMax && destspec->norm == c->norm.
727 const qpms_vswf_set_spec_t
*destspec
, size_t deststride
,
728 /// Must be srcspec->lMax <= c-> lMax && srcspec->norm == c->norm.
729 const qpms_vswf_set_spec_t
*srcspec
, size_t srcstride
,
730 const double eta
, const complex double k
,
731 cart2_t b1
, cart2_t b2
,
733 const cart3_t particle_shift
,
734 double maxR
, double maxK
,
735 const qpms_ewald_part parts
738 TROPS_ONLY_AB_SYMMETRIC_NORMS_IMPLEMENTED(c
->normalisation
);
739 QPMS_ENSURE(c
->normalisation
== destspec
->norm
&& c
->normalisation
== srcspec
->norm
,
740 "The normalisation conventions must be the same");
741 assert(c
->lMax
>= destspec
->lMax
&& c
->lMax
>= srcspec
->lMax
);
742 assert(destspec
->lMax_L
< 0 && srcspec
->lMax_L
< 0);
743 // TODO don't use c->lMax etc. if both destspec->lMax and srcspec->lMax are smaller
744 const ptrdiff_t ldAB
= c
->nelem
;
745 complex double *A
, *B
;
746 double *Aerr
= NULL
, *Berr
= NULL
;
747 QPMS_CRASHING_MALLOC(A
, c
->nelem
*c
->nelem
*sizeof(complex double));
748 QPMS_CRASHING_MALLOC(B
, c
->nelem
*c
->nelem
*sizeof(complex double));
750 QPMS_CRASHING_MALLOC(Aerr
, c
->nelem
*c
->nelem
*sizeof(double));
751 QPMS_CRASHING_MALLOC(Berr
, c
->nelem
*c
->nelem
*sizeof(double));
753 qpms_errno_t retval
= qpms_trans_calculator_get_AB_arrays_e32_e(c
,
754 A
, Aerr
, B
, Berr
, ldAB
, 1,
755 eta
, k
, b1
, b2
, beta
, particle_shift
, maxR
, maxK
, parts
);
756 for (size_t desti
= 0; desti
< destspec
->n
; ++desti
) {
757 // TODO replace with (modified) qpms_trans_array_from_AB()
758 qpms_y_t desty
; qpms_vswf_type_t destt
;
759 if(QPMS_SUCCESS
!= qpms_uvswfi2ty(destspec
->ilist
[desti
], &destt
, &desty
))
760 qpms_pr_error_at_flf(__FILE__
,__LINE__
,__func__
,
761 "Invalid u. vswf index %llx.", destspec
->ilist
[desti
]);
762 for (size_t srci
= 0; srci
< srcspec
->n
; ++srci
){
763 qpms_y_t srcy
; qpms_vswf_type_t srct
;
764 if(QPMS_SUCCESS
!= qpms_uvswfi2ty(srcspec
->ilist
[srci
], &srct
, &srcy
))
765 qpms_pr_error_at_flf(__FILE__
,__LINE__
,__func__
,
766 "Invalid u. vswf index %llx.", srcspec
->ilist
[srci
]);
767 target
[srci
* srcstride
+ desti
* deststride
]
768 = (srct
== destt
) ? A
[ldAB
*desty
+ srcy
] : B
[ldAB
*desty
+ srcy
];
769 if(err
) err
[srci
* srcstride
+ desti
* deststride
]
770 = (srct
== destt
) ? Aerr
[ldAB
*desty
+ srcy
] : Berr
[ldAB
*desty
+ srcy
];
774 if (err
) { free(Aerr
); free(Berr
); }
778 qpms_errno_t
qpms_trans_calculator_get_trans_array_e32(const qpms_trans_calculator
*c
,
779 complex double *target
, double *err
,
780 /// Must be destspec->lMax <= c-> lMax && destspec->norm == c->norm.
781 const qpms_vswf_set_spec_t
*destspec
, size_t deststride
,
782 /// Must be srcspec->lMax <= c-> lMax && srcspec->norm == c->norm.
783 const qpms_vswf_set_spec_t
*srcspec
, size_t srcstride
,
784 const double eta
, const complex double k
,
785 cart2_t b1
, cart2_t b2
,
787 const cart3_t particle_shift
,
788 double maxR
, double maxK
791 return qpms_trans_calculator_get_trans_array_e32_e(c
, target
, err
, destspec
, deststride
,
792 srcspec
, srcstride
, eta
, k
, b1
, b2
, beta
, particle_shift
, maxR
, maxK
, QPMS_EWALD_FULL
);
796 qpms_errno_t
qpms_trans_calculator_get_trans_array_lc3p(
797 const qpms_trans_calculator
*c
,
798 complex double *target
,
799 /// Must be destspec->lMax <= c-> lMax && destspec->norm == c->norm.
800 const qpms_vswf_set_spec_t
*destspec
, size_t deststride
,
801 /// Must be srcspec->lMax <= c-> lMax && srcspec->norm == c->norm.
802 const qpms_vswf_set_spec_t
*srcspec
, size_t srcstride
,
803 complex double k
, cart3_t destpos
, cart3_t srcpos
, qpms_bessel_t J
804 /// Workspace has to be at least 2 * c->neleme**2 long
807 csph_t kdlj
= cart2csph(cart3_substract(destpos
, srcpos
));
809 return qpms_trans_calculator_get_trans_array(c
, target
,
810 destspec
, deststride
, srcspec
, srcstride
, kdlj
,
815 int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_trans_calculator
*c
,
816 complex double * const Adest
, double * const Aerr
,
817 complex double * const Bdest
, double * const Berr
,
818 const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
819 /* qpms_bessel_t J*/ // assume QPMS_HANKEL_PLUS
820 const double eta
, const double k
, const double unitcell_area
,
821 const size_t nRpoints
, const cart2_t
*Rpoints
, // n.b. can't contain 0; TODO automatic recognition and skip
822 const size_t nKpoints
, const cart2_t
*Kpoints
,
823 const double beta
,//DIFF21
824 const double particle_shift
//DIFF21
828 const qpms_y_t nelem2_sc
= qpms_lMax2nelem_sc(c
->e3c
->lMax
);
829 //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
830 const bool doerr
= Aerr
|| Berr
;
831 const bool do_sigma0
= (particle_shift
== 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
833 complex double *sigmas_short
= malloc(sizeof(complex double)*nelem2_sc
);
834 complex double *sigmas_long
= malloc(sizeof(complex double)*nelem2_sc
);
835 complex double *sigmas_total
= malloc(sizeof(complex double)*nelem2_sc
);
836 double *serr_short
, *serr_long
, *serr_total
;
838 serr_short
= malloc(sizeof(double)*nelem2_sc
);
839 serr_long
= malloc(sizeof(double)*nelem2_sc
);
840 serr_total
= malloc(sizeof(double)*nelem2_sc
);
841 } else serr_short
= serr_long
= serr_total
= NULL
;
843 QPMS_ENSURE_SUCCESS(ewald31z_sigma_long_points_and_shift(sigmas_long
, serr_long
, //DIFF21
844 c
->e3c
, eta
, k
, unitcell_area
, nKpoints
, Kpoints
, beta
, particle_shift
));
846 QPMS_ENSURE_SUCCESS(ewald31z_sigma_short_points_and_shift(sigmas_short
, serr_short
, //DIFF21
847 c
->e3c
, eta
, k
, nRpoints
, Rpoints
, beta
, particle_shift
));
849 for(qpms_y_t y
= 0; y
< nelem2_sc
; ++y
)
850 sigmas_total
[y
] = sigmas_short
[y
] + sigmas_long
[y
];
851 if (doerr
) for(qpms_y_t y
= 0; y
< nelem2_sc
; ++y
)
852 serr_total
[y
] = serr_short
[y
] + serr_long
[y
];
854 complex double sigma0
= 0; double sigma0_err
= 0;
856 QPMS_ENSURE_SUCCESS(ewald31z_sigma0(&sigma0
, &sigma0_err
, c
->e3c
, eta
, k
));
857 const qpms_l_t y
= qpms_mn2y_sc(0,0);
858 sigmas_total
[y
] += sigma0
;
859 if(doerr
) serr_total
[y
] += sigma0_err
;
863 ptrdiff_t desti
= 0, srci
= 0;
864 for (qpms_l_t n
= 1; n
<= c
->lMax
; ++n
) for (qpms_m_t m
= -n
; m
<= n
; ++m
) {
865 for (qpms_l_t nu
= 1; nu
<= c
->lMax
; ++nu
) for (qpms_m_t mu
= -nu
; mu
<= nu
; ++mu
){
866 const size_t i
= qpms_trans_calculator_index_mnmunu(c
, m
, n
, mu
, nu
);
867 const size_t qmax
= c
->A_multipliers
[i
+1] - c
->A_multipliers
[i
] - 1;
868 complex double Asum
, Asumc
; ckahaninit(&Asum
, &Asumc
);
869 double Asumerr
, Asumerrc
; if(Aerr
) kahaninit(&Asumerr
, &Asumerrc
);
871 const qpms_m_t mu_m
= mu
- m
;
872 // TODO skip if ... (N.B. skip will be different for 31z and 32)
873 for(qpms_l_t q
= 0; q
<= qmax
; ++q
) {
874 const qpms_l_t p
= n
+ nu
- 2*q
;
875 const qpms_y_t y_sc
= qpms_mn2y_sc(mu_m
, p
);
876 const complex double multiplier
= c
->A_multipliers
[i
][q
];
877 complex double sigma
= sigmas_total
[y_sc
];
878 ckahanadd(&Asum
, &Asumc
, multiplier
* sigma
);
879 if (Aerr
) kahanadd(&Asumerr
, &Asumerrc
, multiplier
* serr_total
[y_sc
]);
882 *(Adest
+ deststride
* desti
+ srcstride
* srci
) = Asum
;
883 if (Aerr
) *(Aerr
+ deststride
* desti
+ srcstride
* srci
) = Asumerr
;
886 complex double Bsum
, Bsumc
; ckahaninit(&Bsum
, &Bsumc
);
887 double Bsumerr
, Bsumerrc
; if(Berr
) kahaninit(&Bsumerr
, &Bsumerrc
);
888 for(qpms_l_t q
= 0; q
<= qmax
; ++q
) {
889 const qpms_l_t p_
= n
+ nu
- 2*q
+ 1;
890 const qpms_y_t y_sc
= qpms_mn2y_sc(mu_m
, p_
);
891 const complex double multiplier
= c
->B_multipliers
[i
][q
-BQ_OFFSET
];
892 complex double sigma
= sigmas_total
[y_sc
];
893 ckahanadd(&Bsum
, &Bsumc
, multiplier
* sigma
);
894 if (Berr
) kahanadd(&Bsumerr
, &Bsumerrc
, multiplier
* serr_total
[y_sc
]);
897 *(Bdest
+ deststride
* desti
+ srcstride
* srci
) = Bsum
;
898 if (Berr
) *(Berr
+ deststride
* desti
+ srcstride
* srci
) = Bsumerr
;
917 #endif // LATTICESUMS_31
922 // N.B. alternative point generation strategy toggled by macro GEN_RSHIFTEDPOINTS
923 // and GEN_KSHIFTEDPOINTS.
924 // The results should be the same. The performance can slightly differ (especially
925 // if some optimizations in the point generators are implemented.)
926 int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator
*c
,
927 complex double * const Adest
, double * const Aerr
,
928 complex double * const Bdest
, double * const Berr
,
929 const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
930 /* qpms_bessel_t J*/ // assume QPMS_HANKEL_PLUS
931 const double eta
, const complex double k
,
932 const cart2_t b1
, const cart2_t b2
,
934 const cart3_t particle_shift
,
935 double maxR
, double maxK
,
936 const qpms_ewald_part parts
940 const qpms_y_t nelem2_sc
= qpms_lMax2nelem_sc(c
->e3c
->lMax
);
941 //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
942 const bool doerr
= Aerr
|| Berr
;
943 const bool do_sigma0
= ((particle_shift
.x
== 0) && (particle_shift
.y
== 0) && (particle_shift
.z
== 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
945 complex double *sigmas_short
= malloc(sizeof(complex double)*nelem2_sc
);
946 complex double *sigmas_long
= malloc(sizeof(complex double)*nelem2_sc
);
947 complex double *sigmas_total
= malloc(sizeof(complex double)*nelem2_sc
);
948 double *serr_short
, *serr_long
, *serr_total
;
950 serr_short
= malloc(sizeof(double)*nelem2_sc
);
951 serr_long
= malloc(sizeof(double)*nelem2_sc
);
952 serr_total
= malloc(sizeof(double)*nelem2_sc
);
953 } else serr_short
= serr_long
= serr_total
= NULL
;
955 const double unitcell_area
= l2d_unitcell_area(b1
, b2
);
956 cart2_t rb1
, rb2
; // reciprocal basis
957 QPMS_ENSURE_SUCCESS(l2d_reciprocalBasis2pi(b1
, b2
, &rb1
, &rb2
));
959 if (parts
& QPMS_EWALD_LONG_RANGE
) {
960 PGen Kgen
= PGen_xyWeb_new(rb1
, rb2
, BASIS_RTOL
,
961 #ifdef GEN_KSHIFTEDPOINTS
966 0, true, maxK
, false);
968 QPMS_ENSURE_SUCCESS(ewald3_sigma_long(sigmas_long
, serr_long
, c
->e3c
, eta
, k
,
969 unitcell_area
, LAT_2D_IN_3D_XYONLY
, &Kgen
,
970 #ifdef GEN_KSHIFTEDPOINTS
975 cart22cart3xy(beta
), particle_shift
));
976 if(Kgen
.stateData
) // PGen not consumed entirely (converged earlier)
980 if (parts
& QPMS_EWALD_SHORT_RANGE
) {
981 PGen Rgen
= PGen_xyWeb_new(b1
, b2
, BASIS_RTOL
,
982 #ifdef GEN_RSHIFTEDPOINTS
983 cart2_scale(-1 /*CHECKSIGN*/, cart3xy2cart2(particle_shift
)),
987 0, !do_sigma0
, maxR
, false);
988 #ifdef GEN_RSHIFTEDPOINTS // rather ugly hacks, LPTODO cleanup
989 if (particle_shift
.z
!= 0) {
990 const cart3_t zshift
= {0, 0, -particle_shift
.z
/*CHECKSIGN*/};
991 Rgen
= Pgen_shifted_new(Rgen
, zshift
);
996 QPMS_ENSURE_SUCCESS(ewald3_sigma_short(sigmas_short
, serr_short
, c
->e3c
, eta
, k
,
997 particle_shift
.z
? LAT_2D_IN_3D
: LAT_2D_IN_3D_XYONLY
, &Rgen
,
998 #ifdef GEN_RSHIFTEDPOINTS
1003 cart22cart3xy(beta
), particle_shift
));
1005 if(Rgen
.stateData
) // PGen not consumed entirely (converged earlier)
1006 PGen_destroy(&Rgen
);
1009 for(qpms_y_t y
= 0; y
< nelem2_sc
; ++y
)
1010 sigmas_total
[y
] = ((parts
& QPMS_EWALD_SHORT_RANGE
) ? sigmas_short
[y
] : 0)
1011 + ((parts
& QPMS_EWALD_LONG_RANGE
) ? sigmas_long
[y
] : 0);
1012 if (doerr
) for(qpms_y_t y
= 0; y
< nelem2_sc
; ++y
)
1013 serr_total
[y
] = ((parts
& QPMS_EWALD_SHORT_RANGE
) ? serr_short
[y
] : 0)
1014 + ((parts
& QPMS_EWALD_LONG_RANGE
) ? serr_long
[y
] : 0);
1016 complex double sigma0
= 0; double sigma0_err
= 0;
1017 if (do_sigma0
&& (parts
& QPMS_EWALD_0TERM
)) {
1018 QPMS_ENSURE_SUCCESS(ewald3_sigma0(&sigma0
, &sigma0_err
, c
->e3c
, eta
, k
));
1019 const qpms_l_t y
= qpms_mn2y_sc(0,0);
1020 sigmas_total
[y
] += sigma0
;
1021 if(doerr
) serr_total
[y
] += sigma0_err
;
1025 ptrdiff_t desti
= 0, srci
= 0;
1026 for (qpms_l_t n
= 1; n
<= c
->lMax
; ++n
) for (qpms_m_t m
= -n
; m
<= n
; ++m
) {
1027 for (qpms_l_t nu
= 1; nu
<= c
->lMax
; ++nu
) for (qpms_m_t mu
= -nu
; mu
<= nu
; ++mu
){
1028 const size_t i
= qpms_trans_calculator_index_mnmunu(c
, m
, n
, mu
, nu
);
1029 const size_t qmax
= c
->A_multipliers
[i
+1] - c
->A_multipliers
[i
] - 1;
1030 complex double Asum
, Asumc
; ckahaninit(&Asum
, &Asumc
);
1031 double Asumerr
, Asumerrc
; if(Aerr
) kahaninit(&Asumerr
, &Asumerrc
);
1033 const qpms_m_t mu_m
= mu
- m
;
1035 for(qpms_l_t q
= 0; q
<= qmax
; ++q
) {
1036 const qpms_l_t p
= n
+ nu
- 2*q
;
1037 const qpms_y_t y_sc
= qpms_mn2y_sc(mu_m
, p
);
1038 const complex double multiplier
= c
->A_multipliers
[i
][q
];
1039 complex double sigma
= sigmas_total
[y_sc
];
1040 ckahanadd(&Asum
, &Asumc
, multiplier
* sigma
);
1041 if (Aerr
) kahanadd(&Asumerr
, &Asumerrc
, multiplier
* serr_total
[y_sc
]);
1044 *(Adest
+ deststride
* desti
+ srcstride
* srci
) = Asum
;
1045 if (Aerr
) *(Aerr
+ deststride
* desti
+ srcstride
* srci
) = Asumerr
;
1048 complex double Bsum
, Bsumc
; ckahaninit(&Bsum
, &Bsumc
);
1049 double Bsumerr
, Bsumerrc
; if(Berr
) kahaninit(&Bsumerr
, &Bsumerrc
);
1050 for(qpms_l_t q
= 0; q
<= qmax
; ++q
) {
1051 const qpms_l_t p_
= n
+ nu
- 2*q
+ 1;
1052 const qpms_y_t y_sc
= qpms_mn2y_sc(mu_m
, p_
);
1053 const complex double multiplier
= c
->B_multipliers
[i
][q
-BQ_OFFSET
];
1054 complex double sigma
= sigmas_total
[y_sc
];
1055 ckahanadd(&Bsum
, &Bsumc
, multiplier
* sigma
);
1056 if (Berr
) kahanadd(&Bsumerr
, &Bsumerrc
, multiplier
* serr_total
[y_sc
]);
1059 *(Bdest
+ deststride
* desti
+ srcstride
* srci
) = Bsum
;
1060 if (Berr
) *(Berr
+ deststride
* desti
+ srcstride
* srci
) = Bsumerr
;
1080 int qpms_trans_calculator_get_AB_arrays_e32(const qpms_trans_calculator
*c
,
1081 complex double * const Adest
, double * const Aerr
,
1082 complex double * const Bdest
, double * const Berr
,
1083 const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
1084 /* qpms_bessel_t J*/ // assume QPMS_HANKEL_PLUS
1085 const double eta
, const complex double k
,
1086 const cart2_t b1
, const cart2_t b2
,
1088 const cart3_t particle_shift
,
1089 double maxR
, double maxK
)
1091 return qpms_trans_calculator_get_AB_arrays_e32_e(
1092 c
, Adest
, Aerr
, Bdest
, Berr
, deststride
, srcstride
,
1093 eta
, k
, b1
, b2
, beta
, particle_shift
, maxR
, maxK
, QPMS_EWALD_FULL
);
1096 #endif // LATTICESUMS32
1099 complex double qpms_trans_calculator_get_A_ext(const qpms_trans_calculator
*c
,
1100 int m
, int n
, int mu
, int nu
,
1101 complex double kdlj_r
, double kdlj_theta
, double kdlj_phi
,
1102 int r_ge_d
, int J
) {
1103 csph_t kdlj
= {kdlj_r
, kdlj_theta
, kdlj_phi
};
1104 return qpms_trans_calculator_get_A(c
,m
,n
,mu
,nu
,kdlj
,r_ge_d
,J
);
1107 complex double qpms_trans_calculator_get_B_ext(const qpms_trans_calculator
*c
,
1108 int m
, int n
, int mu
, int nu
,
1109 complex double kdlj_r
, double kdlj_theta
, double kdlj_phi
,
1110 int r_ge_d
, int J
) {
1111 csph_t kdlj
= {kdlj_r
, kdlj_theta
, kdlj_phi
};
1112 return qpms_trans_calculator_get_B(c
,m
,n
,mu
,nu
,kdlj
,r_ge_d
,J
);
1115 int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator
*c
,
1116 complex double *Adest
, complex double *Bdest
,
1117 int m
, int n
, int mu
, int nu
,
1118 complex double kdlj_r
, double kdlj_theta
, double kdlj_phi
,
1119 int r_ge_d
, int J
) {
1120 csph_t kdlj
= {kdlj_r
, kdlj_theta
, kdlj_phi
};
1121 return qpms_trans_calculator_get_AB_p(c
,Adest
,Bdest
,m
,n
,mu
,nu
,kdlj
,r_ge_d
,J
);
1124 int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator
*c
,
1125 complex double *Adest
, complex double *Bdest
,
1126 size_t deststride
, size_t srcstride
,
1127 complex double kdlj_r
, double kdlj_theta
, double kdlj_phi
,
1128 int r_ge_d
, int J
) {
1129 csph_t kdlj
= {kdlj_r
, kdlj_theta
, kdlj_phi
};
1130 return qpms_trans_calculator_get_AB_arrays(c
,Adest
,Bdest
,deststride
,srcstride
,