qpms/ewald.c

   1 #include "ewald.h"
   2 #include <stdlib.h>
   3 #include "indexing.h"
   4 #include "kahansum.h"
   5 #include <assert.h>
   6 #include <string.h>
   7 #include <complex.h>
   8 #include "tiny_inlines.h"
   9 #include "qpms_error.h"
  10 #include <gsl/gsl_integration.h>
  11 #include <gsl/gsl_errno.h>
  12 #include <gsl/gsl_sf_legendre.h>
  13 #include <gsl/gsl_sf_expint.h>
  14
  15 // parameters for the quadrature of integral in (4.6)
  16 #ifndef INTEGRATION_WORKSPACE_LIMIT
  17 #define INTEGRATION_WORKSPACE_LIMIT 30000
  18 #endif
  19
  20 #ifndef INTEGRATION_EPSABS
  21 #define INTEGRATION_EPSABS 1e-13
  22 #endif
  23
  24 #ifndef INTEGRATION_EPSREL
  25 #define INTEGRATION_EPSREL 1e-13
  26 #endif
  27
  28 #ifndef M_SQRTPI
  29 #define M_SQRTPI 1.7724538509055160272981674833411452L
  30 #endif
  31
  32
  33 // sloppy implementation of factorial
  34 // We prefer to avoid tgamma/lgamma, as their errors are about 4+ bits
  35 static inline double factorial(const int n) {
  36   assert(n >= 0);
  37   if (n < 0)
  38     return 0; // should not happen in the functions below. (Therefore the assert above)
  39   else if (n <= 20) {
  40     double fac = 1;
  41     for (int i = 1; i <= n; ++i)
  42       fac *= i;
  43     return fac;
  44   }
  45   else
  46     return tgamma(n + 1); // hope it's precise and that overflow does not happen
  47 }
  48
  49 // sloppy implementation of double factorial n!!
  50 static inline double double_factorial(int n) {
  51   assert(n >= 0);
  52   if (n <= 25) {
  53     double fac = 1;
  54     while (n > 0) {
  55       fac *= n;
  56       n -= 2;
  57     }
  58     return fac;
  59   } else {
  60     if (n % 2) { // odd, (2*k - 1)!! = 2**k * Γ(k + 0.5) / sqrt(п)
  61       const int k = n / 2 + 1;
  62       return pow(2, k) * tgamma(k + 0.5) / M_SQRTPI;
  63     } else { // even, n!! = 2**(n/2) * (n/2)!
  64       const int k = n/2;
  65       return pow(2, k) * factorial(k);
  66     }
  67   }
  68 }
  69
  70 // sloppy implementation of (n/2)! = Γ(n/2 + 1)
  71 // It is _slightly_ more precise than direct call of tgamma for small odd n
  72 static inline double factorial_of_half(const int n2) {
  73   assert(n2 >= 0);
  74   if (n2 % 2 == 0) return factorial(n2/2);
  75   else {
  76     if (n2 <= 50) { // odd, use (k - 0.5)! =  Γ(k + 0.5) = 2**(-k) (2*k - 1)!! sqrt(п) for small n2
  77       const int k = n2 / 2 + 1;
  78       double fac2 = 1;
  79       for(int j = 2*k - 1; j > 0; j -= 2)
  80         fac2 *= j;
  81       return fac2 * pow(2, -k) * M_SQRTPI;
  82     }
  83     else return tgamma(1. + 0.5*n2);
  84   }
  85 }
  86
  87 static inline complex double csq(complex double x) { return x * x; }
  88 static inline double sq(double x) { return x * x; }
  89
  90 /// Metadata describing the normalisation conventions used in ewald32_constants_t.
  91 typedef enum {
  92   EWALD32_CONSTANTS_ORIG, // As in [1, (4,5)], NOT USED right now.
  93   EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
  94                               * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
  95                               * replaced by the respective $Y_n^m(\pi/2,\alpha)$
  96                               * spherical harmonic. See notes/ewald.lyx.
  97                               */
  98
  99 } ewald3_constants_option;
 100
 101 // TODO perhaps rewrite everything as agnostic.
 102 static const ewald3_constants_option type = EWALD32_CONSTANTS_AGNOSTIC;
 103
 104 qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, const ewald3_constants_option type */,
 105     const int csphase)
 106 {
 107   qpms_ewald3_constants_t *c = malloc(sizeof(qpms_ewald3_constants_t));
 108   //if (c == NULL) return NULL; // Do I really want to do this?
 109   c->lMax = lMax;
 110   c->nelem_sc = qpms_lMax2nelem_sc(lMax);
 111   c->s1_jMaxes = malloc(c->nelem_sc * sizeof(qpms_l_t));
 112   c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *));
 113   //if (c->s1_jMaxes == NULL) return NULL;
 114
 115   // ----- the "xy-plane constants" ------
 116   //determine sizes
 117   size_t s1_constfacs_sz = 0;
 118   for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 119     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
 120     if ((m + n) % 2 == 0)
 121       s1_constfacs_sz += 1 + (c->s1_jMaxes[y] = (n-abs(m))/2);
 122     else
 123       c->s1_jMaxes[y] = -1;
 124   }
 125
 126   c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
 127   size_t s1_constfacs_sz_cumsum = 0;
 128   for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 129     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
 130     if ((m + n) % 2 == 0) {
 131       c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
 132       // and here comes the actual calculation
 133       for (qpms_l_t j = 0; j <= c->s1_jMaxes[y]; ++j){
 134         switch(type) {
 135           case EWALD32_CONSTANTS_ORIG: // NOT USED
 136             c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2)
 137               * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
 138               * min1pow(j) * pow(0.5, n-2*j)
 139               / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
 140               * pow(0.5, 2*j-1);
 141             break;
 142           case EWALD32_CONSTANTS_AGNOSTIC:
 143             c->s1_constfacs[y][j] = -2 * ipow(n+1) * M_SQRTPI
 144               * factorial((n-m)/2) * factorial((n+m)/2)
 145               * min1pow(j)
 146               / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j));
 147             break;
 148           default:
 149             QPMS_INVALID_ENUM(type);
 150         }
 151       }
 152       s1_constfacs_sz_cumsum += 1 + c->s1_jMaxes[y];
 153     }
 154     else
 155       c->s1_constfacs[y] = NULL;
 156   }
 157
 158   // -----  New generation 2D-in-3D constants ------
 159   // TODO it is not necessary to treat +|m| and -|m| cases separately
 160   // N.B. currently, this is only valid for EWALD32_CONSTANTS_AGNOSTIC (NOT CHECKED!)
 161   c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *));
 162   //determine sizes
 163   size_t S1_constfacs_sz = 0;
 164   for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 165     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
 166     const qpms_l_t L_M = n - abs(m);
 167     for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
 168       // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
 169       for(qpms_l_t s = j + (L_M - j) % 2;
 170           (s <= 2 * j) && (s <= L_M);
 171           s += 2) {
 172         ++S1_constfacs_sz;
 173       }
 174     }
 175   }
 176
 177   c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double));
 178   size_t S1_constfacs_sz_cumsum = 0; // second count
 179   for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 180     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
 181     const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI
 182               * factorial((n-m)/2) * factorial((n+m)/2);
 183     c->S1_constfacs[y] = c->S1_constfacs_base + S1_constfacs_sz_cumsum;
 184     size_t coeffs_per_y = 0;
 185     const qpms_l_t L_M = n - abs(m);
 186     for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
 187       // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
 188       for(qpms_l_t s = j + (L_M - j) % 2;
 189           (s <= 2 * j) && (s <= L_M);
 190           s += 2) {
 191         c->S1_constfacs_base[S1_constfacs_sz_cumsum] =
 192              yfactor * min1pow(j)
 193              / factorial(2*j-s) / factorial(s - j)
 194              / factorial_of_half(n - m - s) / factorial_of_half(n + m - s);
 195         ++S1_constfacs_sz_cumsum;
 196       }
 197     }
 198     S1_constfacs_sz_cumsum += coeffs_per_y;
 199   }
 200   QPMS_ASSERT(S1_constfacs_sz_cumsum = S1_constfacs_sz);
 201   c->S1_constfacs[c->nelem_sc] = c->S1_constfacs_base + S1_constfacs_sz; // For easier limit checks
 202
 203   // ------ the "z-axis constants" -----
 204   // determine sizes
 205   size_t s1_constfacs_1Dz_sz;
 206   {
 207     const qpms_l_t sz_n_lMax = lMax/2 + 1;  // number of different j's for n = lMax
 208     s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax
 209                                      : isq(sz_n_lMax);
 210   }
 211   c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double));
 212   c->s1_constfacs_1Dz = malloc((lMax+1)*sizeof(complex double *));
 213
 214   size_t s1_constfacs_1Dz_sz_cumsum = 0;
 215   for (qpms_l_t n = 0; n <= lMax; ++n) {
 216     c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum;
 217     for (qpms_l_t j = 0; j <= n/2; ++j) {
 218       switch(type) {
 219         case EWALD32_CONSTANTS_AGNOSTIC:
 220           c->s1_constfacs_1Dz[n][j] = -ipow(n+1) * min1pow(j) * factorial(n)
 221             / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j));
 222           break;
 223         default:
 224           QPMS_INVALID_ENUM(type); // wrong type argument or not implemented
 225       }
 226     }
 227     s1_constfacs_1Dz_sz_cumsum += 1 + n / 2;
 228   }
 229   assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum);
 230
 231
 232   c->legendre_csphase = csphase;
 233   c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
 234   c->legendre_plus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
 235   c->legendre_minus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
 236   // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
 237   c->legendre_normconv = GSL_SF_LEGENDRE_NONE;
 238   // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
 239   QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, 0, csphase, c->legendre0));
 240   QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, +1, csphase, c->legendre_plus1));
 241   QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, -1, csphase, c->legendre_minus1));
 242   return c;
 243 }
 244
 245 void qpms_ewald3_constants_free(qpms_ewald3_constants_t *c) {
 246   free(c->legendre0);
 247   free(c->legendre_plus1);
 248   free(c->legendre_minus1);
 249   free(c->s1_constfacs);
 250   free(c->s1_constfacs_base);
 251   free(c->S1_constfacs);
 252   free(c->S1_constfacs_base);
 253   free(c->s1_constfacs_1Dz_base);
 254   free(c->s1_constfacs_1Dz);
 255   free(c->s1_jMaxes);
 256   free(c);
 257 }
 258
 259
 260 int ewald3_sigma0(complex double *result, double *err,
 261     const qpms_ewald3_constants_t *c,
 262     const double eta, const complex double k)
 263 {
 264   qpms_csf_result gam;
 265   complex double z = -csq(k/(2*eta));
 266   QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z,
 267      // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
 268      QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, &gam));
 269   *result = gam.val * c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI;
 270   if(err)
 271     *err = gam.err * fabs(c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI);
 272   return 0;
 273 }
 274
 275 // Wrapper over cpow to correctly handle the k->0 behaviour
 276 static inline complex double cpow_0lim_zi(complex double x, long z)
 277 {
 278   if (x == 0 && z == 0)
 279     return 1;
 280   else if (x == 0 && z > 0) // lol branch cut in cpow
 281     return 0;
 282   else
 283     return cpow(x, z);
 284 }
 285
 286 int ewald3_21_xy_sigma_long (
 287     complex double *target, // must be c->nelem_sc long
 288     double *err,
 289     const qpms_ewald3_constants_t *c,
 290     const double eta, const complex double k,
 291     const double unitcell_volume /* with the corresponding lattice dimensionality */,
 292     const LatticeDimensionality latdim,
 293     PGen *pgen_K, const bool pgen_generates_shifted_points
 294     /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 295      * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 296      * and adds beta to the generated points before calculations.
 297      * If true, it assumes that they are already shifted.
 298      */,
 299     const cart3_t beta,
 300     const cart3_t particle_shift
 301     )
 302 {
 303   const bool k_is_real = (cimag(k) == 0);
 304   assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
 305   assert((latdim & LAT1D) || (latdim & LAT2D));
 306   const qpms_y_t nelem_sc = c->nelem_sc;
 307   assert(nelem_sc > 0);
 308   const qpms_l_t lMax = c->lMax;
 309
 310   // Manual init of the ewald summation targets
 311   complex double *target_c = calloc(nelem_sc, sizeof(complex double));
 312   memset(target, 0, nelem_sc * sizeof(complex double));
 313   double *err_c = NULL;
 314   if (err) {
 315     err_c = calloc(nelem_sc, sizeof(double));
 316     memset(err, 0, nelem_sc * sizeof(double));
 317   }
 318
 319 #ifdef EWALD_AUTO_CUTOFF
 320   /*
 321    * Experimental: stop the evaluation if the relative contribution of the "last layer"
 322    * is below DBL_EPSILON to speed up the calculation.
 323    * This obviously can currently work only for "suitable PGen"s that generate points in
 324    * layers with increasing distance from the origin.
 325    * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
 326    * increasing integer index (starting from 1).
 327    * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
 328    * during run time instead of using this EWALD_AUTO_CUTOFF macro.
 329    */
 330   size_t lw = 1; // "Layer" index.
 331   size_t li = 0; // Counter of points inside the current "layer".
 332   double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
 333   kahaninit(&lsum, &lsum_c);
 334 #endif
 335
 336   const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
 337   if (k_is_real)
 338     assert(creal(commonfac) > 0);
 339
 340   PGenSphReturnData pgen_retdata;
 341 #ifndef NDEBUG
 342   double rbeta_pq_prev;
 343 #endif
 344   // recycleable values if rbeta_pq stays the same:
 345   complex double gamma_pq;
 346   complex double z; // I?*κ*γ*particle_shift.z
 347   complex double x; // κ*γ/(2*η)
 348   complex double factor1d = 1; // the "additional" factor for the 1D case (then it is not 1)
 349   // space for Gamma_pq[j]'s
 350   complex double Gamma_pq[lMax/2+1];
 351   double Gamma_pq_err[lMax/2+1];
 352   // cpow() is expensive, so we want to save and reuse these too:
 353   complex double minus_k_shiftz_pow[2*lMax + 1]; // __[i] = cpow(-k * particle_shift.z, i)
 354   {
 355     long complex double x = 1;
 356     for(qpms_l_t i = 0; i <= 2*lMax; ++i) {
 357       minus_k_shiftz_pow[i] = x;
 358       x *= -k * particle_shift.z;
 359     }
 360   }
 361   complex double rbeta_pq_div_k_pow[lMax + 1]; // __[i] = cpow_0lim_zi(rbeta_pq/k, i)
 362   complex double gamma_pq_powm1[2*lMax + 1]; // __[i] = cpow(gamma_pq, i-1)
 363
 364   // CHOOSE POINT BEGIN
 365   // TODO maybe PGen_next_sph is not the best coordinate system choice here
 366   while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
 367     cart3_t K_pq_cart;
 368     sph_t beta_pq_sph;
 369     if (pgen_generates_shifted_points) {
 370       beta_pq_sph = pgen_retdata.point_sph;
 371       const cart3_t beta_pq_cart = sph2cart(beta_pq_sph);
 372       K_pq_cart = cart3_add(cart3_scale(-1, beta), beta_pq_cart);
 373     } else { // as in the old _points_and_shift functions
 374       const sph_t K_pq_sph = pgen_retdata.point_sph;
 375       K_pq_cart = sph2cart(K_pq_sph);
 376       const cart3_t beta_pq_cart = cart3_add(K_pq_cart, beta);
 377       beta_pq_sph = cart2sph(beta_pq_cart);
 378     }
 379
 380     const double rbeta_pq = beta_pq_sph.r;
 381     const double arg_pq = beta_pq_sph.phi;
 382     //const double beta_pq_theta = beta_pq_sph.theta; //unused
 383
 384   // CHOOSE POINT END
 385
 386     const complex double phasefac = cexp(I*cart3_dot(K_pq_cart,particle_shift)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
 387
 388     const bool new_rbeta_pq = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
 389     if (!new_rbeta_pq) assert(rbeta_pq == rbeta_pq_prev);
 390
 391
 392     // R-DEPENDENT BEGIN
 393     //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
 394     if (new_rbeta_pq) {
 395       gamma_pq = clilgamma(rbeta_pq/k);
 396       { // fill gamma_pq_powm1[] and rbeta_pq_div_k_pow[]
 397         long complex double x = 1./gamma_pq;
 398         for(qpms_l_t i = 0; i <= 2*lMax; ++i) {
 399           gamma_pq_powm1[i] = x;
 400           x *= gamma_pq;
 401         }
 402         for(qpms_l_t i = 0; i <= lMax; ++i) // not fastest, but foolproof
 403           rbeta_pq_div_k_pow[i] = cpow_0lim_zi(rbeta_pq / k, i);
 404       }
 405
 406       complex double x = gamma_pq*k/(2*eta);
 407       complex double x2 = x*x;
 408
 409       if(particle_shift.z == 0) {
 410         for(qpms_l_t j = 0; j <= lMax/2; ++j) {
 411           qpms_csf_result Gam;
 412           int retval = complex_gamma_inc_e(0.5-j, x2,
 413             // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
 414             QPMS_LIKELY(creal(x2) < 0) && !signbit(cimag(x2)) ? -1 : 0, &Gam);
 415           QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
 416           Gamma_pq[j] = Gam.val;
 417           if(err) Gamma_pq_err[j] = Gam.err;
 418         }
 419         if (latdim & LAT1D)
 420           factor1d =  M_SQRT1_2 * .5 * k * gamma_pq;
 421       } else if (latdim & LAT2D) {
 422         QPMS_UNTESTED;
 423         // TODO check the branches/phases!
 424         complex double z = I * k * gamma_pq * particle_shift.z;
 425         ewald3_2_sigma_long_Delta(Gamma_pq, err ?  Gamma_pq_err : NULL, lMax/2, x, 0 /* FIXME */, z);
 426       } else {
 427         QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
 428       }
 429     }
 430     // R-DEPENDENT END
 431
 432     // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
 433     // and just fetched for each n, m pair
 434     for(qpms_l_t n = 0; n <= lMax; ++n)
 435       for(qpms_m_t m = -n; m <= n; ++m) {
 436         if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero.
 437           continue;
 438         const qpms_y_t y = qpms_mn2y_sc(m, n);
 439         size_t constidx = 0; // constants offset
 440         const complex double e_imalpha_pq = cexp(I*m*arg_pq);
 441           complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
 442           double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
 443         if (particle_shift.z == 0) { // TODO remove when the general case is stable and tested
 444           assert((n-abs(m))/2 == c->s1_jMaxes[y]);
 445           for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
 446             complex double summand = rbeta_pq_div_k_pow[n-2*j]
 447               * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
 448               * gamma_pq_powm1[2*j]// * Gamma_pq[j] bellow (GGG) after error computation
 449               * c->s1_constfacs[y][j];
 450             if(err) {
 451               // FIXME include also other errors than Gamma_pq's relative error
 452                kahanadd(&jsum_err, &jsum_err_c, Gamma_pq_err[j] * cabs(summand));
 453             }
 454             summand *= Gamma_pq[j]; // GGG
 455             ckahanadd(&jsum, &jsum_c, summand);
 456           }
 457           jsum *= phasefac * factor1d; // PFC
 458           ckahanadd(target + y, target_c + y, jsum);
 459 #ifdef EWALD_AUTO_CUTOFF
 460           kahanadd(&lsum, &lsum_c, cabs(jsum));
 461 #endif
 462           if(err) kahanadd(err + y, err_c + y, jsum_err);
 463         } else { // particle_shift.z != 0
 464           const qpms_l_t L_M = n - abs(m);
 465           for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
 466             complex double ssum, ssum_c; ckahaninit(&ssum, &ssum_c);
 467             // TODO errors of ssum
 468             // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
 469             for(qpms_l_t s = j + (L_M - j) % 2;
 470                 (s <= 2 * j) && (s <= L_M);
 471                 s += 2) {
 472               complex double ssummand = c->S1_constfacs[y][constidx]
 473                 * minus_k_shiftz_pow[2*j - s] * rbeta_pq_div_k_pow[n - s];
 474               ckahanadd(&ssum, &ssum_c, ssummand);
 475               ++constidx;
 476             }
 477             const complex double jfactor = e_imalpha_pq * Gamma_pq[j] * gamma_pq_powm1[2*j];
 478             if (err) { // FIXME include also other sources of error than Gamma_pq's relative error
 479               double jfactor_err = Gamma_pq_err[j] * pow(cabs(gamma_pq), 2*j - 1);
 480               kahanadd(&jsum_err, &jsum_err_c, jfactor_err * ssum);
 481             }
 482             complex double jsummand = jfactor * ssum;
 483             ckahanadd(&jsum, &jsum_c, jsummand);
 484           }
 485           jsum *= phasefac; // factor1d not here, off-axis sums not implemented/allowed.
 486           ckahanadd(target + y, target_c + y, jsum);
 487 #ifdef EWALD_AUTO_CUTOFF
 488           kahanadd(&lsum, &lsum_c, cabs(jsum));
 489 #endif
 490           if(err) kahanadd(err + y, err_c + y, jsum_err);
 491         }
 492       }
 493 #ifndef NDEBUG
 494     rbeta_pq_prev = rbeta_pq;
 495 #endif
 496 #ifdef EWALD_AUTO_CUTOFF
 497     ++li;
 498     double cursum_min = INFINITY;
 499     for (qpms_y_t y = 0; y < nelem_sc; ++y) {
 500       const double a = cabs(target[y]);
 501       if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
 502         cursum_min = MIN(cursum_min, a);
 503     }
 504     if (li >= 12 * lw + 1) {
 505       if(lsum < DBL_EPSILON * cursum_min)
 506         goto ewald3_21_xy_sigma_long_end_point_loop;
 507       kahaninit(&lsum, &lsum_c);
 508       li = 0;
 509       ++lw;
 510     }
 511 #endif
 512   } // END POINT LOOP
 513 #ifdef EWALD_AUTO_CUTOFF
 514 ewald3_21_xy_sigma_long_end_point_loop:
 515 #endif
 516
 517   free(err_c);
 518   free(target_c);
 519   for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
 520     target[y] *= commonfac;
 521   if(err)
 522     for(qpms_y_t y = 0; y < nelem_sc; ++y)
 523       err[y] *= commonfac;
 524   return 0;
 525 }
 526
 527
 528 // 1D chain along the z-axis; not many optimisations here as the same
 529 // shifted beta radius could be recycled only once anyways
 530 int ewald3_1_z_sigma_long (
 531     complex double *target, // must be c->nelem_sc long
 532     double *err,
 533     const qpms_ewald3_constants_t *c,
 534     const double eta, const complex double k,
 535     const double unitcell_volume /* length (periodicity) in this case */,
 536     const LatticeDimensionality latdim,
 537     PGen *pgen_K, const bool pgen_generates_shifted_points
 538     /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 539      * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 540      * and adds beta to the generated points before calculations.
 541      * If true, it assumes that they are already shifted.
 542      */,
 543     const cart3_t beta,
 544     const cart3_t particle_shift
 545     )
 546 {
 547   assert(LatticeDimensionality_checkflags(latdim, LAT_1D_IN_3D_ZONLY));
 548   assert(beta.x == 0 && beta.y == 0);
 549   assert(particle_shift.x == 0 && particle_shift.y == 0);
 550   const double beta_z = beta.z;
 551   const double particle_shift_z = particle_shift_z;
 552   const qpms_y_t nelem_sc = c->nelem_sc;
 553   const qpms_l_t lMax = c->lMax;
 554
 555   // Manual init of the ewald summation targets
 556   complex double *target_c = calloc(nelem_sc, sizeof(complex double));
 557   memset(target, 0, nelem_sc * sizeof(complex double));
 558   double *err_c = NULL;
 559   if (err) {
 560     err_c = calloc(nelem_sc, sizeof(double));
 561     memset(err, 0, nelem_sc * sizeof(double));
 562   }
 563
 564   const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC)
 565   assert(commonfac > 0);
 566
 567   // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
 568   qpms_csf_result Gamma_pq[lMax/2+1];
 569
 570   PGenSphReturnData pgen_retdata;
 571   // CHOOSE POINT BEGIN
 572   // TODO FIXME USE PGen_next_z
 573   while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
 574     assert(pgen_retdata.flags & PGEN_AT_Z);
 575     double K_z, beta_mu_z;
 576     if (pgen_generates_shifted_points) {
 577       beta_mu_z = ((pgen_retdata.point_sph.theta == 0) ?
 578         pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); //!!!CHECKSIGN!!!
 579       K_z = beta_mu_z - beta_z;
 580     } else { // as in the old _points_and_shift functions
 581       K_z = ((pgen_retdata.point_sph.theta == 0) ?
 582           pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
 583       beta_mu_z = K_z + beta_z;
 584     }
 585     double rbeta_mu = fabs(beta_mu_z);
 586   // CHOOSE POINT END
 587
 588     const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
 589
 590     // R-DEPENDENT BEGIN
 591     complex double gamma_pq = clilgamma(rbeta_mu/k);  // For real beta and k this is real or pure imaginary ...
 592     const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real.
 593     for(qpms_l_t j = 0; j <= lMax/2; ++j) {
 594       int retval = complex_gamma_inc_e(0.5-j, z,
 595           // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
 596           QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, Gamma_pq+j);
 597       QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
 598     }
 599     // R-DEPENDENT END
 600     // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
 601     // and just fetched for each n
 602     for(qpms_l_t n = 0; n <= lMax; ++n) {
 603         const qpms_y_t y = qpms_mn2y_sc(0, n);
 604         complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
 605         double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
 606         for(qpms_l_t j = 0; j <= n/2; ++j) {
 607           complex double summand = pow(rbeta_mu/k, n-2*j)
 608             * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop
 609                 c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] :
 610                 (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n))
 611               )
 612             // * min1pow_m_neg(m) // not needed as m == 0
 613             * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation
 614             * c->s1_constfacs_1Dz[n][j];
 615               /* s1_consstfacs_1Dz[n][j] =
 616                *
 617                *     -I**(n+1) (-1)**j * n!
 618                *   --------------------------
 619                *   j! * 2**(2*j) * (n - 2*j)!
 620                */
 621
 622           if(err) {
 623             // FIXME include also other errors than Gamma_pq's relative error
 624              kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
 625           }
 626           summand *= Gamma_pq[j].val; // GGG
 627           ckahanadd(&jsum, &jsum_c, summand);
 628         }
 629         jsum *= phasefac; // PFC
 630         ckahanadd(target + y, target_c + y, jsum);
 631         if(err) kahanadd(err + y, err_c + y, jsum_err);
 632       }
 633   } // END POINT LOOP
 634
 635   free(err_c);
 636   free(target_c);
 637   for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
 638     target[y] *= commonfac;
 639   if(err)
 640     for(qpms_y_t y = 0; y < nelem_sc; ++y)
 641       err[y] *= commonfac;
 642   return 0;
 643 }
 644
 645 int ewald3_sigma_long (
 646     complex double *target, // must be c->nelem_sc long
 647     double *err,
 648     const qpms_ewald3_constants_t *c,
 649     const double eta, const complex double k,
 650     const double unitcell_volume /* with the corresponding lattice dimensionality */,
 651     const LatticeDimensionality latdim,
 652     PGen *pgen_K, const bool pgen_generates_shifted_points
 653     /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 654      * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 655      * and adds beta to the generated points before calculations.
 656      * If true, it assumes that they are already shifted.
 657      */,
 658     const cart3_t beta,
 659     const cart3_t particle_shift
 660     )
 661 {
 662   assert(latdim & SPACE3D);
 663   if (latdim & LAT_XYONLY)
 664     return ewald3_21_xy_sigma_long(target, err, c, eta, k, unitcell_volume,
 665         latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
 666   else if (latdim & LAT_ZONLY)
 667     return ewald3_1_z_sigma_long(target, err, c, eta, k, unitcell_volume,
 668         latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
 669   // TODO 3D case and general 2D case
 670   else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
 671 }
 672
 673 struct sigma2_integrand_params {
 674   int n;
 675   double k, R;
 676 };
 677
 678 static double sigma2_integrand(double ksi, void *params) {
 679   struct sigma2_integrand_params *p = (struct sigma2_integrand_params *) params;
 680   return exp(-sq(p->R * ksi) + sq(p->k / ksi / 2)) * pow(ksi, 2*p->n);
 681 }
 682
 683 static int ewald32_sr_integral(double r, double k, int n, double eta,
 684     double *result, double *err, gsl_integration_workspace *workspace)
 685 {
 686   struct sigma2_integrand_params p;
 687
 688
 689   const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
 690   p.n = n;
 691   eta *= R0;
 692   p.k = k * R0;
 693   p.R = r / R0;  // i.e. p.R = 1
 694
 695   gsl_function F;
 696   F.function = sigma2_integrand;
 697   F.params = &p;
 698
 699   int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 700       INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 701       workspace, result, err);
 702   double normfac = pow(R0, -2*p.n - 1);
 703   *result *= normfac;
 704   *err *= normfac;
 705   return retval;
 706 }
 707
 708 // a version allowing complex k
 709
 710 struct sigma2_integrand_params_ck {
 711   int n;
 712   complex double k;
 713   double R;
 714 };
 715
 716 // TODO ther might be some space for optimisation if needed, as now we calculate everything twice
 717 // including the whole complex exponential (throwing the imaginary/real part away)
 718 static double sigma2_integrand_ck_real(double ksi, void *params) {
 719   struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
 720   return creal(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
 721 }
 722 static double sigma2_integrand_ck_imag(double ksi, void *params) {
 723   struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
 724   return cimag(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
 725 }
 726
 727 static int ewald32_sr_integral_ck(double r, complex double k, int n, double eta,
 728     complex double *result, double *err, gsl_integration_workspace *workspace)
 729 {
 730   struct sigma2_integrand_params_ck p;
 731
 732
 733   const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
 734   p.n = n;
 735   eta *= R0;
 736   p.k = k * R0;
 737   p.R = r / R0;  // i.e. p.R = 1
 738
 739   gsl_function F;
 740   F.params = &p;
 741   double result_real, result_imag, err_real, err_imag;
 742
 743   F.function = sigma2_integrand_ck_real;
 744   // TODO check return values
 745   int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 746       INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 747       workspace, &result_real, &err_real);
 748
 749   F.function = sigma2_integrand_ck_imag;
 750   // TODO check return values
 751   retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 752       INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 753       workspace, &result_imag, &err_imag);
 754
 755   *result = result_real + I*result_imag;
 756   *err = sqrt(sq(err_real) + sq(err_imag));
 757
 758   double normfac = pow(R0, -2*p.n - 1);
 759   *result *= normfac;
 760   *err *= normfac;
 761   return retval;
 762 }
 763
 764 int ewald3_sigma_short(
 765                 complex double *target, // must be c->nelem_sc long
 766                 double *err, // must be c->nelem_sc long or NULL
 767                 const qpms_ewald3_constants_t *c,
 768                 const double eta, const complex double k /* TODO COMPLEX */,
 769                 const LatticeDimensionality latdim, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
 770                 PGen *pgen_R, const bool pgen_generates_shifted_points
 771                 /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
 772                  * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
 773                  * and adds particle_shift to the generated points before calculations.
 774                  * If true, it assumes that they are already shifted (if calculating interaction between
 775                  * different particles in the unit cell).
 776                  */,
 777                 const cart3_t beta,
 778                 const cart3_t particle_shift
 779                 )
 780 {
 781   const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
 782   const double kreal = creal(k);
 783   const qpms_y_t nelem_sc = c->nelem_sc;
 784   const qpms_l_t lMax = c->lMax;
 785   gsl_integration_workspace *workspace =
 786     gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
 787
 788   double legendre_buf[gsl_sf_legendre_array_n(c->lMax)]; // work space for the legendre polynomials (used only in the general case)
 789
 790   // Manual init of the ewald summation targets
 791   complex double * const target_c = calloc(nelem_sc, sizeof(complex double));
 792   memset(target, 0, nelem_sc * sizeof(complex double));
 793   double *err_c = NULL;
 794   if (err) {
 795     err_c = calloc(nelem_sc, sizeof(double));
 796     memset(err, 0, nelem_sc * sizeof(double));
 797   }
 798
 799 #ifdef EWALD_AUTO_CUTOFF
 800   /*
 801    * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
 802    */
 803   size_t lw = 1; // "Layer" index.
 804   size_t li = 0; // Counter of points inside the current "layer".
 805   double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
 806   kahaninit(&lsum, &lsum_c);
 807 #endif
 808
 809   PGenSphReturnData pgen_retdata;
 810 #ifndef NDEBUG
 811   double r_pq_shifted_prev;
 812 #endif
 813   // recyclable variables if r_pq_shifted stays the same:
 814   double intres[lMax+1], interr[lMax+1];
 815   complex double cintres[lMax+1];
 816
 817 // CHOOSE POINT BEGIN
 818 // TODO check whether _next_sph is the optimal coordinate system choice here
 819   while ((pgen_retdata = PGen_next_sph(pgen_R)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
 820 // CHOOSE POINT END
 821     cart3_t Rpq_shifted_cart; // I will need both sph and cart representations anyway...
 822     sph_t Rpq_shifted_sph;
 823     if (pgen_generates_shifted_points) {
 824       Rpq_shifted_sph = pgen_retdata.point_sph;
 825       Rpq_shifted_cart = sph2cart(Rpq_shifted_sph);
 826     } else { // as in the old _points_and_shift functions
 827       //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
 828       const sph_t bravais_point_sph = pgen_retdata.point_sph;
 829       const cart3_t bravais_point_cart = sph2cart(bravais_point_sph);
 830       Rpq_shifted_cart = cart3_add(bravais_point_cart, cart3_scale(-1, particle_shift)); // CHECKSIGN!!!
 831       Rpq_shifted_sph = cart2sph(Rpq_shifted_cart);
 832     }
 833
 834     // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
 835     const double Rpq_shifted_arg = Rpq_shifted_sph.phi; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
 836     const double Rpq_shifted_theta = Rpq_shifted_sph.theta; // POINT-DEPENDENT
 837     const double r_pq_shifted = Rpq_shifted_sph.r;
 838
 839     // if the radius is the same as in previous cycle, most of the calculations can be recycled
 840     const bool new_r_pq_shifted = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
 841     if (!new_r_pq_shifted) assert(r_pq_shifted_prev == r_pq_shifted);
 842
 843     const complex double e_beta_Rpq = cexp(I*cart3_dot(beta, Rpq_shifted_cart)); // POINT-DEPENDENT
 844     LatticeDimensionality speedup_regime = 0;
 845     if ((latdim & LAT_2D_IN_3D_XYONLY) == LAT_2D_IN_3D_XYONLY) speedup_regime = LAT_2D_IN_3D_XYONLY;
 846     if ((latdim & LAT_1D_IN_3D_ZONLY)  == LAT_1D_IN_3D_ZONLY)  speedup_regime = LAT_1D_IN_3D_ZONLY;
 847
 848     const double * legendre_array;
 849
 850     switch(speedup_regime) {
 851       // speedup checks for special geometries and Legendre polynomials
 852       case LAT_1D_IN_3D_ZONLY:
 853         assert((pgen_retdata.flags & PGEN_AT_Z) == PGEN_AT_Z);
 854         assert(Rpq_shifted_theta == M_PI || Rpq_shifted_theta == 0);
 855         legendre_array = (Rpq_shifted_theta == 0) ? c->legendre_plus1 : c->legendre_minus1; // CHECKSIGN
 856         break;
 857       case LAT_2D_IN_3D_XYONLY:
 858         assert((pgen_retdata.flags &PGEN_AT_XY) == PGEN_AT_XY);
 859         assert(fabs(Rpq_shifted_theta - M_PI_2) < DBL_EPSILON * 1024);
 860         // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
 861         legendre_array = c->legendre0;
 862         break;
 863       default:
 864         QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, cos(Rpq_shifted_theta), c->legendre_csphase, legendre_buf));
 865         legendre_array = legendre_buf;
 866         break;
 867     }
 868
 869     for(qpms_l_t n = 0; n <= lMax; ++n) {
 870       const double complex prefacn = - I * (k_is_real ? pow(2./creal(k),n+1) : cpow(2./k, n+1)) * M_2_SQRTPI / 2; // profiling TODO put outside the R-loop and multiply in the end?
 871       const double R_pq_pown = pow(r_pq_shifted, n); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
 872       if (new_r_pq_shifted) {
 873         int retval;
 874         if (k_is_real) {
 875           double intres_real;
 876           retval = ewald32_sr_integral(r_pq_shifted, kreal, n, eta,
 877                &intres_real, interr + n, workspace);
 878           cintres[n] = intres_real;
 879         } else
 880           retval = ewald32_sr_integral_ck(r_pq_shifted, k, n, eta,
 881                cintres+n, interr + n, workspace);
 882         QPMS_ENSURE_SUCCESS(retval);
 883       } // otherwise recycle the integrals
 884       for (qpms_m_t m = -n; m <= n; ++m){
 885         complex double e_imf;
 886         // SPEEDUPS for some special geometries
 887         if(speedup_regime == LAT_2D_IN_3D_XYONLY) { //2D lattice inside the xy plane
 888           if((m+n) % 2 != 0) // odd coefficients are zero.
 889             continue; // nothing needed, already done by memset
 890           e_imf = cexp(I*m*Rpq_shifted_arg); // profiling TODO: calculate outside the n-loop?
 891         } else if (speedup_regime == LAT_1D_IN_3D_ZONLY) { // 1D lattice along the z axis
 892           if (m != 0) // m-non-zero coefficients are zero
 893             continue; // nothing needed, already done by memset
 894           e_imf = 1;
 895         } else { // general 1D/2D/3D lattice in 3D space
 896           e_imf = cexp(I*m*Rpq_shifted_arg);
 897         }
 898
 899         const double leg = legendre_array[gsl_sf_legendre_array_index(n, abs(m))];
 900
 901         const qpms_y_t y = qpms_mn2y_sc(m,n);
 902         if(err)
 903           kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
 904               * interr[n])); // TODO include also other errors
 905         complex double thesummand = prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m);
 906         ckahanadd(target + y, target_c + y, thesummand);
 907 #ifdef EWALD_AUTO_CUTOFF
 908         kahanadd(&lsum, &lsum_c, cabs(thesummand));
 909 #endif
 910       }
 911
 912     }
 913 #ifndef NDEBUG
 914     r_pq_shifted_prev = r_pq_shifted;
 915 #endif
 916 #ifdef EWALD_AUTO_CUTOFF
 917     ++li;
 918     double cursum_min = INFINITY;
 919     for (qpms_y_t y = 0; y < nelem_sc; ++y) {
 920       const double a = cabs(target[y]);
 921       if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
 922         cursum_min = MIN(cursum_min, a);
 923     }
 924     if (li >= 12 * lw + 1) {
 925       if(lsum < DBL_EPSILON * cursum_min)
 926         goto ewald3_sigma_short_end_point_loop;
 927       kahaninit(&lsum, &lsum_c);
 928       li = 0;
 929       ++lw;
 930     }
 931 #endif
 932   }
 933   //END POINT LOOP
 934 #ifdef EWALD_AUTO_CUTOFF
 935 ewald3_sigma_short_end_point_loop:
 936 #endif
 937
 938   gsl_integration_workspace_free(workspace);
 939   if(err) free(err_c);
 940   free(target_c);
 941   return 0;
 942 }
 943