qpms/ewald.c

   1 #include "ewald.h"
   2 #include <stdlib.h>
   3 #include "indexing.h"
   4 #include "kahansum.h"
   5 #include <assert.h>
   6 #include <string.h>
   7 #include <complex.h>
   8 #include "tiny_inlines.h"
   9 #include "qpms_error.h"
  10 #include <gsl/gsl_integration.h>
  11 #include <gsl/gsl_errno.h>
  12 #include <gsl/gsl_sf_legendre.h>
  13 #include <gsl/gsl_sf_expint.h>
  14
  15 // parameters for the quadrature of integral in (4.6)
  16 #ifndef INTEGRATION_WORKSPACE_LIMIT
  17 #define INTEGRATION_WORKSPACE_LIMIT 30000
  18 #endif
  19
  20 #ifndef INTEGRATION_EPSABS
  21 #define INTEGRATION_EPSABS 1e-13
  22 #endif
  23
  24 #ifndef INTEGRATION_EPSREL
  25 #define INTEGRATION_EPSREL 1e-13
  26 #endif
  27
  28 #ifndef M_SQRTPI
  29 #define M_SQRTPI 1.7724538509055160272981674833411452
  30 #endif
  31
  32
  33 // sloppy implementation of factorial
  34 static inline double factorial(const int n) {
  35   assert(n >= 0);
  36   if (n < 0)
  37     return 0; // should not happen in the functions below. (Therefore the assert above)
  38   else if (n <= 20) {
  39     double fac = 1;
  40     for (int i = 1; i <= n; ++i)
  41       fac *= i;
  42     return fac;
  43   }
  44   else
  45     return tgamma(n + 1); // hope it's precise and that overflow does not happen
  46 }
  47
  48 static inline complex double csq(complex double x) { return x * x; }
  49 static inline double sq(double x) { return x * x; }
  50
  51 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
  52 typedef enum {
  53   EWALD32_CONSTANTS_ORIG, // As in [1, (4,5)], NOT USED right now.
  54   EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
  55                               * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
  56                               * replaced by the respective $Y_n^m(\pi/2,\alpha)$
  57                               * spherical harmonic. See notes/ewald.lyx.
  58                               */
  59
  60 } ewald3_constants_option;
  61
  62 // TODO perhaps rewrite everything as agnostic.
  63 static const ewald3_constants_option type = EWALD32_CONSTANTS_AGNOSTIC;
  64
  65 qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, const ewald3_constants_option type */,
  66     const int csphase)
  67 {
  68   qpms_ewald3_constants_t *c = malloc(sizeof(qpms_ewald3_constants_t));
  69   //if (c == NULL) return NULL; // Do I really want to do this?
  70   c->lMax = lMax;
  71   c->nelem_sc = qpms_lMax2nelem_sc(lMax);
  72   c->s1_jMaxes = malloc(c->nelem_sc * sizeof(qpms_l_t));
  73   c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *));
  74   //if (c->s1_jMaxes == NULL) return NULL;
  75
  76   // ----- the "xy-plane constants" ------
  77   //determine sizes
  78   size_t s1_constfacs_sz = 0;
  79   for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
  80     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
  81     if ((m + n) % 2 == 0)
  82       s1_constfacs_sz += 1 + (c->s1_jMaxes[y] = (n-abs(m))/2);
  83     else
  84       c->s1_jMaxes[y] = -1;
  85   }
  86
  87   c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
  88   size_t s1_constfacs_sz_cumsum = 0;
  89   for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
  90     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
  91     if ((m + n) % 2 == 0) {
  92       c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
  93       // and here comes the actual calculation
  94       for (qpms_l_t j = 0; j <= c->s1_jMaxes[y]; ++j){
  95         switch(type) {
  96           case EWALD32_CONSTANTS_ORIG: // NOT USED
  97             c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2)
  98               * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
  99               * min1pow(j) * pow(0.5, n-2*j)
 100               / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
 101               * pow(0.5, 2*j-1);
 102             break;
 103           case EWALD32_CONSTANTS_AGNOSTIC:
 104             c->s1_constfacs[y][j] = -2 * ipow(n+1) * M_SQRTPI
 105               * factorial((n-m)/2) * factorial((n+m)/2)
 106               * min1pow(j)
 107               / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j));
 108             break;
 109           default:
 110             QPMS_INVALID_ENUM(type);
 111         }
 112       }
 113       s1_constfacs_sz_cumsum += 1 + c->s1_jMaxes[y];
 114     }
 115     else
 116       c->s1_constfacs[y] = NULL;
 117   }
 118
 119   // ------ the "z-axis constants" -----
 120   // determine sizes
 121   size_t s1_constfacs_1Dz_sz;
 122   {
 123     const qpms_l_t sz_n_lMax = lMax/2 + 1;  // number of different j's for n = lMax
 124     s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax
 125                                      : isq(sz_n_lMax);
 126   }
 127   c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double));
 128   c->s1_constfacs_1Dz = malloc((lMax+1)*sizeof(complex double *));
 129
 130   size_t s1_constfacs_1Dz_sz_cumsum = 0;
 131   for (qpms_l_t n = 0; n <= lMax; ++n) {
 132     c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum;
 133     for (qpms_l_t j = 0; j <= n/2; ++j) {
 134       switch(type) {
 135         case EWALD32_CONSTANTS_AGNOSTIC:
 136           c->s1_constfacs_1Dz[n][j] = -ipow(n+1) * min1pow(j) * factorial(n)
 137             / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j));
 138           break;
 139         default:
 140           QPMS_INVALID_ENUM(type); // wrong type argument or not implemented
 141       }
 142     }
 143     s1_constfacs_1Dz_sz_cumsum += 1 + n / 2;
 144   }
 145   assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum);
 146
 147
 148   c->legendre_csphase = csphase;
 149   c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
 150   c->legendre_plus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
 151   c->legendre_minus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
 152   // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
 153   c->legendre_normconv = GSL_SF_LEGENDRE_NONE;
 154   // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
 155   QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, 0, csphase, c->legendre0));
 156   QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, +1, csphase, c->legendre_plus1));
 157   QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, -1, csphase, c->legendre_minus1));
 158   return c;
 159 }
 160
 161 void qpms_ewald3_constants_free(qpms_ewald3_constants_t *c) {
 162   free(c->legendre0);
 163   free(c->legendre_plus1);
 164   free(c->legendre_minus1);
 165   free(c->s1_constfacs);
 166   free(c->s1_constfacs_base);
 167   free(c->s1_constfacs_1Dz_base);
 168   free(c->s1_constfacs_1Dz);
 169   free(c->s1_jMaxes);
 170   free(c);
 171 }
 172
 173
 174 int ewald3_sigma0(complex double *result, double *err,
 175     const qpms_ewald3_constants_t *c,
 176     const double eta, const complex double k)
 177 {
 178   qpms_csf_result gam;
 179   complex double z = -csq(k/(2*eta));
 180   QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z,
 181      // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
 182      QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, &gam));
 183   *result = gam.val * c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI;
 184   if(err)
 185     *err = gam.err * fabs(c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI);
 186   return 0;
 187 }
 188
 189 // Wrapper over cpow to correctly handle the k->0 behaviour
 190 static inline complex double cpow_0lim_zi(complex double x, long z)
 191 {
 192   if (x == 0 && z == 0)
 193     return 1;
 194   else if (x == 0 && z > 0) // lol branch cut in cpow
 195     return 0;
 196   else
 197     return cpow(x, z);
 198 }
 199
 200 int ewald3_21_xy_sigma_long (
 201     complex double *target, // must be c->nelem_sc long
 202     double *err,
 203     const qpms_ewald3_constants_t *c,
 204     const double eta, const complex double k,
 205     const double unitcell_volume /* with the corresponding lattice dimensionality */,
 206     const LatticeDimensionality latdim,
 207     PGen *pgen_K, const bool pgen_generates_shifted_points
 208     /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 209      * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 210      * and adds beta to the generated points before calculations.
 211      * If true, it assumes that they are already shifted.
 212      */,
 213     const cart3_t beta,
 214     const cart3_t particle_shift
 215     )
 216 {
 217   const bool k_is_real = (cimag(k) == 0);
 218   assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
 219   assert((latdim & LAT1D) || (latdim & LAT2D));
 220   const qpms_y_t nelem_sc = c->nelem_sc;
 221   assert(nelem_sc > 0);
 222   const qpms_l_t lMax = c->lMax;
 223
 224   // Manual init of the ewald summation targets
 225   complex double *target_c = calloc(nelem_sc, sizeof(complex double));
 226   memset(target, 0, nelem_sc * sizeof(complex double));
 227   double *err_c = NULL;
 228   if (err) {
 229     err_c = calloc(nelem_sc, sizeof(double));
 230     memset(err, 0, nelem_sc * sizeof(double));
 231   }
 232
 233 #ifdef EWALD_AUTO_CUTOFF
 234   /*
 235    * Experimental: stop the evaluation if the relative contribution of the "last layer"
 236    * is below DBL_EPSILON to speed up the calculation.
 237    * This obviously can currently work only for "suitable PGen"s that generate points in
 238    * layers with increasing distance from the origin.
 239    * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
 240    * increasing integer index (starting from 1).
 241    * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
 242    * during run time instead of using this EWALD_AUTO_CUTOFF macro.
 243    */
 244   size_t lw = 1; // "Layer" index.
 245   size_t li = 0; // Counter of points inside the current "layer".
 246   double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
 247   kahaninit(&lsum, &lsum_c);
 248 #endif
 249
 250   const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
 251   if (k_is_real)
 252     assert(creal(commonfac) > 0);
 253
 254   PGenSphReturnData pgen_retdata;
 255 #ifndef NDEBUG
 256   double rbeta_pq_prev;
 257 #endif
 258   // recycleable values if rbeta_pq stays the same:
 259   complex double gamma_pq;
 260   complex double z; // I?*κ*γ*particle_shift.z
 261   complex double x; // κ*γ/(2*η)
 262   complex double factor1d = 1; // the "additional" factor for the 1D case (then it is not 1)
 263   // space for Gamma_pq[j]'s
 264   complex double Gamma_pq[lMax/2+1];
 265   double Gamma_pq_err[lMax/2+1];
 266
 267   // CHOOSE POINT BEGIN
 268   // TODO mayby PGen_next_sph is not the best coordinate system choice here
 269   while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
 270     cart3_t K_pq_cart;
 271     sph_t beta_pq_sph;
 272     if (pgen_generates_shifted_points) {
 273       beta_pq_sph = pgen_retdata.point_sph;
 274       const cart3_t beta_pq_cart = sph2cart(beta_pq_sph);
 275       K_pq_cart = cart3_add(cart3_scale(-1, beta), beta_pq_cart);
 276     } else { // as in the old _points_and_shift functions
 277       const sph_t K_pq_sph = pgen_retdata.point_sph;
 278       K_pq_cart = sph2cart(K_pq_sph);
 279       const cart3_t beta_pq_cart = cart3_add(K_pq_cart, beta);
 280       beta_pq_sph = cart2sph(beta_pq_cart);
 281     }
 282
 283     const double rbeta_pq = beta_pq_sph.r;
 284     const double arg_pq = beta_pq_sph.phi;
 285     //const double beta_pq_theta = beta_pq_sph.theta; //unused
 286
 287   // CHOOSE POINT END
 288
 289     const complex double phasefac = cexp(I*cart3_dot(K_pq_cart,particle_shift)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
 290
 291     const bool new_rbeta_pq = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
 292     if (!new_rbeta_pq) assert(rbeta_pq == rbeta_pq_prev);
 293
 294
 295     // R-DEPENDENT BEGIN
 296     //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
 297     if (new_rbeta_pq) {
 298       gamma_pq = clilgamma(rbeta_pq/k);
 299       complex double x = gamma_pq*k/(2*eta);
 300       complex double x2 = x*x;
 301       if(particle_shift.z == 0) {
 302         for(qpms_l_t j = 0; j <= lMax/2; ++j) {
 303           qpms_csf_result Gam;
 304           int retval = complex_gamma_inc_e(0.5-j, x2,
 305             // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
 306             QPMS_LIKELY(creal(x2) < 0) && !signbit(cimag(x2)) ? -1 : 0, &Gam);
 307           QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
 308           Gamma_pq[j] = Gam.val;
 309           if(err) Gamma_pq_err[j] = Gam.err;
 310         }
 311         if (latdim & LAT1D)
 312           factor1d =  M_SQRT1_2 * .5 * k * gamma_pq;
 313       } else if (latdim & LAT2D) {
 314         QPMS_UNTESTED;
 315         // TODO check the branches/phases!
 316         complex double z = I * k * gamma_pq * particle_shift.z;
 317         ewald3_2_sigma_long_Delta(Gamma_pq, err ?  Gamma_pq_err : NULL, lMax/2, x, z);
 318       } else {
 319         QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
 320       }
 321     }
 322     // R-DEPENDENT END
 323
 324     // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
 325     // and just fetched for each n, m pair
 326     for(qpms_l_t n = 0; n <= lMax; ++n)
 327       for(qpms_m_t m = -n; m <= n; ++m) {
 328         if((m+n) % 2 != 0) // odd coefficients are zero.
 329           continue;
 330         const qpms_y_t y = qpms_mn2y_sc(m, n);
 331         const complex double e_imalpha_pq = cexp(I*m*arg_pq);
 332         complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
 333         double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
 334         assert((n-abs(m))/2 == c->s1_jMaxes[y]);
 335         for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
 336           complex double summand = cpow_0lim_zi(rbeta_pq/k, n-2*j)
 337             * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
 338             * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
 339             * c->s1_constfacs[y][j];
 340           if(err) {
 341             // FIXME include also other errors than Gamma_pq's relative error
 342              kahanadd(&jsum_err, &jsum_err_c, Gamma_pq_err[j] * cabs(summand));
 343           }
 344           summand *= Gamma_pq[j]; // GGG
 345           ckahanadd(&jsum, &jsum_c, summand);
 346         }
 347         jsum *= phasefac * factor1d; // PFC
 348         ckahanadd(target + y, target_c + y, jsum);
 349 #ifdef EWALD_AUTO_CUTOFF
 350         kahanadd(&lsum, &lsum_c, cabs(jsum));
 351 #endif
 352         if(err) kahanadd(err + y, err_c + y, jsum_err);
 353       }
 354 #ifndef NDEBUG
 355     rbeta_pq_prev = rbeta_pq;
 356 #endif
 357 #ifdef EWALD_AUTO_CUTOFF
 358     ++li;
 359     double cursum_min = INFINITY;
 360     for (qpms_y_t y = 0; y < nelem_sc; ++y) {
 361       const double a = cabs(target[y]);
 362       if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
 363         cursum_min = MIN(cursum_min, a);
 364     }
 365     if (li >= 12 * lw + 1) {
 366       if(lsum < DBL_EPSILON * cursum_min)
 367         goto ewald3_21_xy_sigma_long_end_point_loop;
 368       kahaninit(&lsum, &lsum_c);
 369       li = 0;
 370       ++lw;
 371     }
 372 #endif
 373   } // END POINT LOOP
 374 #ifdef EWALD_AUTO_CUTOFF
 375 ewald3_21_xy_sigma_long_end_point_loop:
 376 #endif
 377
 378   free(err_c);
 379   free(target_c);
 380   for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
 381     target[y] *= commonfac;
 382   if(err)
 383     for(qpms_y_t y = 0; y < nelem_sc; ++y)
 384       err[y] *= commonfac;
 385   return 0;
 386 }
 387
 388
 389 // 1D chain along the z-axis; not many optimisations here as the same
 390 // shifted beta radius could be recycled only once anyways
 391 int ewald3_1_z_sigma_long (
 392     complex double *target, // must be c->nelem_sc long
 393     double *err,
 394     const qpms_ewald3_constants_t *c,
 395     const double eta, const complex double k,
 396     const double unitcell_volume /* length (periodicity) in this case */,
 397     const LatticeDimensionality latdim,
 398     PGen *pgen_K, const bool pgen_generates_shifted_points
 399     /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 400      * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 401      * and adds beta to the generated points before calculations.
 402      * If true, it assumes that they are already shifted.
 403      */,
 404     const cart3_t beta,
 405     const cart3_t particle_shift
 406     )
 407 {
 408   assert(LatticeDimensionality_checkflags(latdim, LAT_1D_IN_3D_ZONLY));
 409   assert(beta.x == 0 && beta.y == 0);
 410   assert(particle_shift.x == 0 && particle_shift.y == 0);
 411   const double beta_z = beta.z;
 412   const double particle_shift_z = particle_shift_z;
 413   const qpms_y_t nelem_sc = c->nelem_sc;
 414   const qpms_l_t lMax = c->lMax;
 415
 416   // Manual init of the ewald summation targets
 417   complex double *target_c = calloc(nelem_sc, sizeof(complex double));
 418   memset(target, 0, nelem_sc * sizeof(complex double));
 419   double *err_c = NULL;
 420   if (err) {
 421     err_c = calloc(nelem_sc, sizeof(double));
 422     memset(err, 0, nelem_sc * sizeof(double));
 423   }
 424
 425   const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC)
 426   assert(commonfac > 0);
 427
 428   // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
 429   qpms_csf_result Gamma_pq[lMax/2+1];
 430
 431   PGenSphReturnData pgen_retdata;
 432   // CHOOSE POINT BEGIN
 433   // TODO FIXME USE PGen_next_z
 434   while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
 435     assert(pgen_retdata.flags & PGEN_AT_Z);
 436     double K_z, beta_mu_z;
 437     if (pgen_generates_shifted_points) {
 438       beta_mu_z = ((pgen_retdata.point_sph.theta == 0) ?
 439         pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); //!!!CHECKSIGN!!!
 440       K_z = beta_mu_z - beta_z;
 441     } else { // as in the old _points_and_shift functions
 442       K_z = ((pgen_retdata.point_sph.theta == 0) ?
 443           pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
 444       beta_mu_z = K_z + beta_z;
 445     }
 446     double rbeta_mu = fabs(beta_mu_z);
 447   // CHOOSE POINT END
 448
 449     const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
 450
 451     // R-DEPENDENT BEGIN
 452     complex double gamma_pq = clilgamma(rbeta_mu/k);  // For real beta and k this is real or pure imaginary ...
 453     const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real.
 454     for(qpms_l_t j = 0; j <= lMax/2; ++j) {
 455       int retval = complex_gamma_inc_e(0.5-j, z,
 456           // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
 457           QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, Gamma_pq+j);
 458       QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
 459     }
 460     // R-DEPENDENT END
 461     // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
 462     // and just fetched for each n
 463     for(qpms_l_t n = 0; n <= lMax; ++n) {
 464         const qpms_y_t y = qpms_mn2y_sc(0, n);
 465         complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
 466         double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
 467         for(qpms_l_t j = 0; j <= n/2; ++j) {
 468           complex double summand = pow(rbeta_mu/k, n-2*j)
 469             * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop
 470                 c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] :
 471                 (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n))
 472               )
 473             // * min1pow_m_neg(m) // not needed as m == 0
 474             * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation
 475             * c->s1_constfacs_1Dz[n][j];
 476               /* s1_consstfacs_1Dz[n][j] =
 477                *
 478                *     -I**(n+1) (-1)**j * n!
 479                *   --------------------------
 480                *   j! * 2**(2*j) * (n - 2*j)!
 481                */
 482
 483           if(err) {
 484             // FIXME include also other errors than Gamma_pq's relative error
 485              kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
 486           }
 487           summand *= Gamma_pq[j].val; // GGG
 488           ckahanadd(&jsum, &jsum_c, summand);
 489         }
 490         jsum *= phasefac; // PFC
 491         ckahanadd(target + y, target_c + y, jsum);
 492         if(err) kahanadd(err + y, err_c + y, jsum_err);
 493       }
 494   } // END POINT LOOP
 495
 496   free(err_c);
 497   free(target_c);
 498   for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
 499     target[y] *= commonfac;
 500   if(err)
 501     for(qpms_y_t y = 0; y < nelem_sc; ++y)
 502       err[y] *= commonfac;
 503   return 0;
 504 }
 505
 506 int ewald3_sigma_long (
 507     complex double *target, // must be c->nelem_sc long
 508     double *err,
 509     const qpms_ewald3_constants_t *c,
 510     const double eta, const complex double k,
 511     const double unitcell_volume /* with the corresponding lattice dimensionality */,
 512     const LatticeDimensionality latdim,
 513     PGen *pgen_K, const bool pgen_generates_shifted_points
 514     /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 515      * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 516      * and adds beta to the generated points before calculations.
 517      * If true, it assumes that they are already shifted.
 518      */,
 519     const cart3_t beta,
 520     const cart3_t particle_shift
 521     )
 522 {
 523   assert(latdim & SPACE3D);
 524   if (latdim & LAT_XYONLY)
 525     return ewald3_21_xy_sigma_long(target, err, c, eta, k, unitcell_volume,
 526         latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
 527   else if (latdim & LAT_ZONLY)
 528     return ewald3_1_z_sigma_long(target, err, c, eta, k, unitcell_volume,
 529         latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
 530   // TODO 3D case and general 2D case
 531   else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
 532 }
 533
 534 struct sigma2_integrand_params {
 535   int n;
 536   double k, R;
 537 };
 538
 539 static double sigma2_integrand(double ksi, void *params) {
 540   struct sigma2_integrand_params *p = (struct sigma2_integrand_params *) params;
 541   return exp(-sq(p->R * ksi) + sq(p->k / ksi / 2)) * pow(ksi, 2*p->n);
 542 }
 543
 544 static int ewald32_sr_integral(double r, double k, int n, double eta,
 545     double *result, double *err, gsl_integration_workspace *workspace)
 546 {
 547   struct sigma2_integrand_params p;
 548
 549
 550   const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
 551   p.n = n;
 552   eta *= R0;
 553   p.k = k * R0;
 554   p.R = r / R0;  // i.e. p.R = 1
 555
 556   gsl_function F;
 557   F.function = sigma2_integrand;
 558   F.params = &p;
 559
 560   int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 561       INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 562       workspace, result, err);
 563   double normfac = pow(R0, -2*p.n - 1);
 564   *result *= normfac;
 565   *err *= normfac;
 566   return retval;
 567 }
 568
 569 // a version allowing complex k
 570
 571 struct sigma2_integrand_params_ck {
 572   int n;
 573   complex double k;
 574   double R;
 575 };
 576
 577 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
 578 // including the whole complex exponential (throwing the imaginary/real part away)
 579 static double sigma2_integrand_ck_real(double ksi, void *params) {
 580   struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
 581   return creal(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
 582 }
 583 static double sigma2_integrand_ck_imag(double ksi, void *params) {
 584   struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
 585   return cimag(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
 586 }
 587
 588 static int ewald32_sr_integral_ck(double r, complex double k, int n, double eta,
 589     complex double *result, double *err, gsl_integration_workspace *workspace)
 590 {
 591   struct sigma2_integrand_params_ck p;
 592
 593
 594   const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
 595   p.n = n;
 596   eta *= R0;
 597   p.k = k * R0;
 598   p.R = r / R0;  // i.e. p.R = 1
 599
 600   gsl_function F;
 601   F.params = &p;
 602   double result_real, result_imag, err_real, err_imag;
 603
 604   F.function = sigma2_integrand_ck_real;
 605   // TODO check return values
 606   int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 607       INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 608       workspace, &result_real, &err_real);
 609
 610   F.function = sigma2_integrand_ck_imag;
 611   // TODO check return values
 612   retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 613       INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 614       workspace, &result_imag, &err_imag);
 615
 616   *result = result_real + I*result_imag;
 617   *err = sqrt(sq(err_real) + sq(err_imag));
 618
 619   double normfac = pow(R0, -2*p.n - 1);
 620   *result *= normfac;
 621   *err *= normfac;
 622   return retval;
 623 }
 624
 625 int ewald3_sigma_short(
 626                 complex double *target, // must be c->nelem_sc long
 627                 double *err, // must be c->nelem_sc long or NULL
 628                 const qpms_ewald3_constants_t *c,
 629                 const double eta, const complex double k /* TODO COMPLEX */,
 630                 const LatticeDimensionality latdim, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
 631                 PGen *pgen_R, const bool pgen_generates_shifted_points
 632                 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
 633                  * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
 634                  * and adds particle_shift to the generated points before calculations.
 635                  * If true, it assumes that they are already shifted (if calculating interaction between
 636                  * different particles in the unit cell).
 637                  */,
 638                 const cart3_t beta,
 639                 const cart3_t particle_shift
 640                 )
 641 {
 642   const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
 643   const double kreal = creal(k);
 644   const qpms_y_t nelem_sc = c->nelem_sc;
 645   const qpms_l_t lMax = c->lMax;
 646   gsl_integration_workspace *workspace =
 647     gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
 648
 649   double legendre_buf[gsl_sf_legendre_array_n(c->lMax)]; // work space for the legendre polynomials (used only in the general case)
 650
 651   // Manual init of the ewald summation targets
 652   complex double * const target_c = calloc(nelem_sc, sizeof(complex double));
 653   memset(target, 0, nelem_sc * sizeof(complex double));
 654   double *err_c = NULL;
 655   if (err) {
 656     err_c = calloc(nelem_sc, sizeof(double));
 657     memset(err, 0, nelem_sc * sizeof(double));
 658   }
 659
 660 #ifdef EWALD_AUTO_CUTOFF
 661   /*
 662    * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
 663    */
 664   size_t lw = 1; // "Layer" index.
 665   size_t li = 0; // Counter of points inside the current "layer".
 666   double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
 667   kahaninit(&lsum, &lsum_c);
 668 #endif
 669
 670   PGenSphReturnData pgen_retdata;
 671 #ifndef NDEBUG
 672   double r_pq_shifted_prev;
 673 #endif
 674   // recyclable variables if r_pq_shifted stays the same:
 675   double intres[lMax+1], interr[lMax+1];
 676   complex double cintres[lMax+1];
 677
 678 // CHOOSE POINT BEGIN
 679 // TODO check whether _next_sph is the optimal coordinate system choice here
 680   while ((pgen_retdata = PGen_next_sph(pgen_R)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
 681 // CHOOSE POINT END
 682     cart3_t Rpq_shifted_cart; // I will need both sph and cart representations anyway...
 683     sph_t Rpq_shifted_sph;
 684     if (pgen_generates_shifted_points) {
 685       Rpq_shifted_sph = pgen_retdata.point_sph;
 686       Rpq_shifted_cart = sph2cart(Rpq_shifted_sph);
 687     } else { // as in the old _points_and_shift functions
 688       //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
 689       const sph_t bravais_point_sph = pgen_retdata.point_sph;
 690       const cart3_t bravais_point_cart = sph2cart(bravais_point_sph);
 691       Rpq_shifted_cart = cart3_add(bravais_point_cart, cart3_scale(-1, particle_shift)); // CHECKSIGN!!!
 692       Rpq_shifted_sph = cart2sph(Rpq_shifted_cart);
 693     }
 694
 695     // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
 696     const double Rpq_shifted_arg = Rpq_shifted_sph.phi; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
 697     const double Rpq_shifted_theta = Rpq_shifted_sph.theta; // POINT-DEPENDENT
 698     const double r_pq_shifted = Rpq_shifted_sph.r;
 699
 700     // if the radius is the same as in previous cycle, most of the calculations can be recycled
 701     const bool new_r_pq_shifted = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
 702     if (!new_r_pq_shifted) assert(r_pq_shifted_prev == r_pq_shifted);
 703
 704     const complex double e_beta_Rpq = cexp(I*cart3_dot(beta, Rpq_shifted_cart)); // POINT-DEPENDENT
 705     LatticeDimensionality speedup_regime = 0;
 706     if ((latdim & LAT_2D_IN_3D_XYONLY) == LAT_2D_IN_3D_XYONLY) speedup_regime = LAT_2D_IN_3D_XYONLY;
 707     if ((latdim & LAT_1D_IN_3D_ZONLY)  == LAT_1D_IN_3D_ZONLY)  speedup_regime = LAT_1D_IN_3D_ZONLY;
 708
 709     const double * legendre_array;
 710
 711     switch(speedup_regime) {
 712       // speedup checks for special geometries and Legendre polynomials
 713       case LAT_1D_IN_3D_ZONLY:
 714         assert((pgen_retdata.flags & PGEN_AT_Z) == PGEN_AT_Z);
 715         assert(Rpq_shifted_theta == M_PI || Rpq_shifted_theta == 0);
 716         legendre_array = (Rpq_shifted_theta == 0) ? c->legendre_plus1 : c->legendre_minus1; // CHECKSIGN
 717         break;
 718       case LAT_2D_IN_3D_XYONLY:
 719         assert((pgen_retdata.flags &PGEN_AT_XY) == PGEN_AT_XY);
 720         assert(fabs(Rpq_shifted_theta - M_PI_2) < DBL_EPSILON * 1024);
 721         // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
 722         legendre_array = c->legendre0;
 723         break;
 724       default:
 725         QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, cos(Rpq_shifted_theta), c->legendre_csphase, legendre_buf));
 726         legendre_array = legendre_buf;
 727         break;
 728     }
 729
 730     for(qpms_l_t n = 0; n <= lMax; ++n) {
 731       const double complex prefacn = - I * (k_is_real ? pow(2./creal(k),n+1) : cpow(2./k, n+1)) * M_2_SQRTPI / 2; // profiling TODO put outside the R-loop and multiply in the end?
 732       const double R_pq_pown = pow(r_pq_shifted, n); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
 733       if (new_r_pq_shifted) {
 734         int retval;
 735         if (k_is_real) {
 736           double intres_real;
 737           retval = ewald32_sr_integral(r_pq_shifted, kreal, n, eta,
 738                &intres_real, interr + n, workspace);
 739           cintres[n] = intres_real;
 740         } else
 741           retval = ewald32_sr_integral_ck(r_pq_shifted, k, n, eta,
 742                cintres+n, interr + n, workspace);
 743         QPMS_ENSURE_SUCCESS(retval);
 744       } // otherwise recycle the integrals
 745       for (qpms_m_t m = -n; m <= n; ++m){
 746         complex double e_imf;
 747         // SPEEDUPS for some special geometries
 748         if(speedup_regime == LAT_2D_IN_3D_XYONLY) { //2D lattice inside the xy plane
 749           if((m+n) % 2 != 0) // odd coefficients are zero.
 750             continue; // nothing needed, already done by memset
 751           e_imf = cexp(I*m*Rpq_shifted_arg); // profiling TODO: calculate outside the n-loop?
 752         } else if (speedup_regime == LAT_1D_IN_3D_ZONLY) { // 1D lattice along the z axis
 753           if (m != 0) // m-non-zero coefficients are zero
 754             continue; // nothing needed, already done by memset
 755           e_imf = 1;
 756         } else { // general 1D/2D/3D lattice in 3D space
 757           e_imf = cexp(I*m*Rpq_shifted_arg);
 758         }
 759
 760         const double leg = legendre_array[gsl_sf_legendre_array_index(n, abs(m))];
 761
 762         const qpms_y_t y = qpms_mn2y_sc(m,n);
 763         if(err)
 764           kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
 765               * interr[n])); // TODO include also other errors
 766         complex double thesummand = prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m);
 767         ckahanadd(target + y, target_c + y, thesummand);
 768 #ifdef EWALD_AUTO_CUTOFF
 769         kahanadd(&lsum, &lsum_c, cabs(thesummand));
 770 #endif
 771       }
 772
 773     }
 774 #ifndef NDEBUG
 775     r_pq_shifted_prev = r_pq_shifted;
 776 #endif
 777 #ifdef EWALD_AUTO_CUTOFF
 778     ++li;
 779     double cursum_min = INFINITY;
 780     for (qpms_y_t y = 0; y < nelem_sc; ++y) {
 781       const double a = cabs(target[y]);
 782       if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
 783         cursum_min = MIN(cursum_min, a);
 784     }
 785     if (li >= 12 * lw + 1) {
 786       if(lsum < DBL_EPSILON * cursum_min)
 787         goto ewald3_sigma_short_end_point_loop;
 788       kahaninit(&lsum, &lsum_c);
 789       li = 0;
 790       ++lw;
 791     }
 792 #endif
 793   }
 794   //END POINT LOOP
 795 #ifdef EWALD_AUTO_CUTOFF
 796 ewald3_sigma_short_end_point_loop:
 797 #endif
 798
 799   gsl_integration_workspace_free(workspace);
 800   if(err) free(err_c);
 801   free(target_c);
 802   return 0;
 803 }
 804