Infinite periodic system scattered field "bases"
[qpms.git] / qpms / scatsystem.c
blobf36c0deededcfac58c749f938233ccee1c9c8421
1 #include <stdlib.h>
2 #define lapack_int int
3 #define lapack_complex_double complex double
4 #define lapack_complex_double_real(z) (creal(z))
5 #define lapack_complex_double_imag(z) (cimag(z))
6 #include <lapacke.h>
7 #include <cblas.h>
8 #include <lapacke.h>
9 #include "scatsystem.h"
10 #include "indexing.h"
11 #include "vswf.h"
12 #include "groups.h"
13 #include "symmetries.h"
14 #include <assert.h>
15 #include <unistd.h>
16 #include "vectors.h"
17 #include "quaternions.h"
18 #include <string.h>
19 #include "qpms_error.h"
20 #include "translations.h"
21 #include "tmatrices.h"
22 #include <pthread.h>
23 #include "kahansum.h"
24 #include "tolerances.h"
25 #include "beyn.h"
26 #include "tiny_inlines.h"
28 #ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
29 #include "qpmsblas.h"
30 #define SERIAL_ZGEMM qpms_zgemm
31 #else
32 #define SERIAL_ZGEMM cblas_zgemm
33 #endif
35 #define QPMS_SCATSYS_LEN_RTOL 1e-13
36 #define QPMS_SCATSYS_TMATRIX_ATOL 1e-12
37 #define QPMS_SCATSYS_TMATRIX_RTOL 1e-12
39 // This is used in adjustment of Ewald parameter to avoid high frequency breakdown.
40 // Very roughly, the value of 16 should lead to allowing terms containing incomplete Gamma
41 // functions with magnitudes around exp(16) == 8.9e6
42 static const double QPMS_SCATSYS_EWALD_MAX_EXPONENT = 16.;
44 long qpms_scatsystem_nthreads_default = 4;
45 long qpms_scatsystem_nthreads_override = 0;
47 void qpms_scatsystem_set_nthreads(long n) {
48 qpms_scatsystem_nthreads_override = n;
51 static inline void qpms_ss_ensure_periodic(const qpms_scatsys_t *ss) {
52 QPMS_ENSURE(ss->lattice_dimension > 0, "This method is applicable only to periodic systems.");
55 static inline void qpms_ss_ensure_periodic_a(const qpms_scatsys_t *ss, const char *s) {
56 QPMS_ENSURE(ss->lattice_dimension > 0, "This method is applicable only to periodic systems. Use %s instead.", s);
59 static inline void qpms_ss_ensure_nonperiodic(const qpms_scatsys_t *ss) {
60 QPMS_ENSURE(ss->lattice_dimension == 0, "This method is applicable only to nonperiodic systems.");
63 static inline void qpms_ss_ensure_nonperiodic_a(const qpms_scatsys_t *ss, const char *s) {
64 QPMS_ENSURE(ss->lattice_dimension == 0, "This method is applicable only to nonperiodic systems. Use %s instead.", s);
67 // Adjust Ewald parameter to avoid high-frequency breakdown
68 double qpms_ss_adjusted_eta(const qpms_scatsys_t *ss, complex double wavenumber, const double k[3]) {
69 qpms_ss_ensure_periodic(ss);
70 const double eta_default = ss->per.eta;
71 // FIXME here we silently assume that k lies in the first Brillioun zone, we should ensure that.
72 const double k2 = k[0]*k[0] + k[1]*k[1] + k[2] * k[2];
73 const double kappa2 = SQ(cabs(wavenumber)); // maybe creal would be enough
74 if(kappa2 < k2) // This should happen only for pretty low frequencies
75 return eta_default;
76 const qpms_l_t maxj = ss->c->lMax; // Based on ewald.c:301, note that VSWF (c's) lMax is already half of corresponding translation matrix Ewald factors' (c->e3c's) lMax
77 const double mina = 0.5 * (ss->lattice_dimension - 1) - maxj; // minimum incomplete Gamma first argument, based on ewald.c:301; CHECKME whether this is fine also for 3D lattice
78 const double eta_min = sqrt(fabs((kappa2 - k2) * (mina - 1.) / QPMS_SCATSYS_EWALD_MAX_EXPONENT));
79 return MAX(eta_default, eta_min);
82 // ------------ Stupid implementation of qpms_scatsys_apply_symmetry() -------------
84 // The following functions are just to make qpms_scatsys_apply_symmetry more readable.
85 // They are not to be used elsewhere, as they do not perform any array capacity checks etc.
87 /// Compare two orbit types in a scattering system.
88 static bool orbit_types_equal(const qpms_ss_orbit_type_t *a, const qpms_ss_orbit_type_t *b) {
89 if (a->size != b->size) return false;
90 if (memcmp(a->action, b->action, a->size*sizeof(qpms_ss_orbit_pi_t))) return false;
91 if (memcmp(a->tmatrices, b->tmatrices, a->size*sizeof(qpms_ss_tmi_t))) return false;
92 return true;
95 // Extend the action to all particles in orbit if only the action on the 0th
96 // particle has been filled.
97 static void extend_orbit_action(qpms_scatsys_t *ss, qpms_ss_orbit_type_t *ot) {
98 for(qpms_ss_orbit_pi_t src = 1; src < ot->size; ++src) {
99 // find any element g that sends 0 to src:
100 qpms_gmi_t g;
101 for (g = 0; g < ss->sym->order; ++g)
102 if (ot->action[g] == src) break;
103 assert(g < ss->sym->order);
104 // invg sends src to 0
105 qpms_gmi_t invg = qpms_finite_group_inv(ss->sym, g);
106 for (qpms_gmi_t f = 0; f < ss->sym->order; ++f)
107 // if f sends 0 to dest, then f * invg sends src to dest
108 ot->action[src * ss->sym->order +
109 qpms_finite_group_mul(ss->sym,f,invg)] = ot->action[f];
113 //Add orbit type to a scattering system, updating the ss->otspace_end pointer accordingly
114 static void add_orbit_type(qpms_scatsys_t *ss, const qpms_ss_orbit_type_t *ot_current) {
115 qpms_ss_orbit_type_t * const ot_new = & (ss->orbit_types[ss->orbit_type_count]);
116 ot_new->size = ot_current->size;
118 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_tmi(ss, ot_current->tmatrices[0]);
119 const size_t bspecn = bspec->n;
120 ot_new->bspecn = bspecn;
122 const size_t actionsiz = sizeof(ot_current->action[0]) * ot_current->size
123 * ss->sym->order;
124 ot_new->action = (void *) (ss->otspace_end);
125 memcpy(ot_new->action, ot_current->action, actionsiz);
126 // N.B. we copied mostly garbage ^^^, most of it is initialized just now:
127 extend_orbit_action(ss, ot_new);
128 #ifdef DUMP_ORBIT_ACTION
129 fprintf(stderr, "Orbit action:\n");
130 for (qpms_gmi_t gmi = 0; gmi < ss->sym->order; ++gmi) {
131 const qpms_quat4d_t q = qpms_quat_4d_from_2c(ss->sym->rep3d[gmi].rot);
132 fprintf(stderr, "%+d[%g %g %g %g] ", (int)ss->sym->rep3d[gmi].det,
133 q.c1, q.ci, q.cj, q.ck);
134 fprintf(stderr, "%s\t", (ss->sym->permrep && ss->sym->permrep[gmi])?
135 ss->sym->permrep[gmi] : "");
136 for (qpms_ss_orbit_pi_t pi = 0; pi < ot_new->size; ++pi)
137 fprintf(stderr, "%d\t", (int) ot_new->action[gmi + pi*ss->sym->order]);
138 fprintf(stderr, "\n");
140 #endif
141 ss->otspace_end += actionsiz;
143 const size_t tmsiz = sizeof(ot_current->tmatrices[0]) * ot_current->size;
144 ot_new->tmatrices = (void *) (ss->otspace_end);
145 memcpy(ot_new->tmatrices, ot_current->tmatrices, tmsiz);
146 ss->otspace_end += tmsiz;
148 const size_t irbase_sizes_siz = sizeof(ot_new->irbase_sizes[0]) * ss->sym->nirreps;
149 ot_new->irbase_sizes = (void *) (ss->otspace_end);
150 ss->otspace_end += irbase_sizes_siz;
151 ot_new->irbase_cumsizes = (void *) (ss->otspace_end);
152 ss->otspace_end += irbase_sizes_siz;
153 ot_new->irbase_offsets = (void *) (ss->otspace_end);
154 ss->otspace_end += irbase_sizes_siz;
156 const size_t irbases_siz = sizeof(ot_new->irbases[0]) * SQ(ot_new->size * bspecn);
157 ot_new->irbases = (void *) (ss->otspace_end);
158 ss->otspace_end += irbases_siz;
160 size_t lastbs, bs_cumsum = 0;
161 for(qpms_iri_t iri = 0; iri < ss->sym->nirreps; ++iri) {
162 ot_new->irbase_offsets[iri] = bs_cumsum * bspecn * ot_new->size;
163 qpms_orbit_irrep_basis(&lastbs,
164 ot_new->irbases + bs_cumsum*ot_new->size*bspecn,
165 ot_new, bspec, ss->sym, iri);
166 ot_new->irbase_sizes[iri] = lastbs;
167 bs_cumsum += lastbs;
168 ot_new->irbase_cumsizes[iri] = bs_cumsum;
170 QPMS_ENSURE(bs_cumsum == ot_new->size * bspecn,
171 "The cumulative size of the symmetry-adapted bases is wrong; "
172 "expected %d = %d * %d, got %d.",
173 ot_new->size * bspecn, ot_new->size, bspecn, bs_cumsum);
174 ot_new->instance_count = 0;
175 ss->orbit_type_count++;
178 // Standardises the lattice base vectors, and fills the other contents of ss->per[0].
179 // LPTODO split the functionality in smaller functions, these might be useful elsewhere.
180 static void process_lattice_bases(qpms_scatsys_t *ss, const qpms_tolerance_spec_t *tol) {
181 switch(ss->lattice_dimension) {
182 case 1: {
183 double normsq;
184 normsq = cart3_normsq(ss->per.lattice_basis[0]);
185 ss->per.unitcell_volume = sqrt(normsq);
186 ss->per.reciprocal_basis[0] = cart3_divscale(ss->per.lattice_basis[0], normsq);
187 ss->per.eta = 2. / M_2_SQRTPI / ss->per.unitcell_volume;
188 } break;
189 case 2: {
190 // (I) Create orthonormal basis of the plane in which the basis vectors lie
191 cart3_t onbasis[2];
192 double norm0 = cart3norm(ss->per.lattice_basis[0]);
193 // 0: Just normalised 0. basis vector
194 onbasis[0] = cart3_divscale(ss->per.lattice_basis[0], norm0);
195 // 1: Gram-Schmidt complement of the other basis vector
196 const double b0norm_dot_b1 = cart3_dot(onbasis[0], ss->per.lattice_basis[1]);
197 onbasis[1] = cart3_substract(ss->per.lattice_basis[1],
198 cart3_scale(b0norm_dot_b1, onbasis[0]));
199 onbasis[1] = cart3_divscale(onbasis[1], cart3norm(onbasis[1]));
200 // (II) Express the lattice basis in terms of the new 2d plane vector basis onbasis
201 cart2_t b2d[2] = {{.x = norm0, .y = 0},
202 {.x = b0norm_dot_b1, .y = cart3_dot(onbasis[1], ss->per.lattice_basis[1])}};
203 // (III) Reduce lattice basis and get reciprocal bases in the 2D plane
204 l2d_reduceBasis(b2d[0], b2d[1], &b2d[0], &b2d[1]);
205 ss->per.unitcell_volume = l2d_unitcell_area(b2d[0], b2d[1]);
206 ss->per.eta = 2. / M_2_SQRTPI / sqrt(ss->per.unitcell_volume);
207 cart2_t r2d[2];
208 QPMS_ENSURE_SUCCESS(l2d_reciprocalBasis1(b2d[0], b2d[1], &r2d[0], &r2d[1]));
209 // (IV) Rotate everything back to the original 3D space.
210 for(int i = 0; i < 2; ++i) {
211 ss->per.lattice_basis[i] = cart3_add(
212 cart3_scale(b2d[i].x, onbasis[0]),
213 cart3_scale(b2d[i].y, onbasis[1]));
214 ss->per.reciprocal_basis[i] = cart3_add(
215 cart3_scale(r2d[i].x, onbasis[0]),
216 cart3_scale(r2d[i].y, onbasis[1]));
218 } break;
219 case 3: {
220 l3d_reduceBasis(ss->per.lattice_basis, ss->per.lattice_basis);
221 ss->per.unitcell_volume = fabs(l3d_reciprocalBasis1(ss->per.lattice_basis,
222 ss->per.reciprocal_basis));
223 ss->per.eta = 2. / M_2_SQRTPI / cbrt(ss->per.unitcell_volume);
224 // TODO check unitcell_volume for sanity w.r.t tolerance
225 } break;
226 default:
227 QPMS_WTF;
232 // Almost 200 lines. This whole thing deserves a rewrite!
233 qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
234 const qpms_finite_group_t *sym, complex double omega,
235 const qpms_tolerance_spec_t *tol) {
236 if (sym == NULL) {
237 QPMS_WARN("No symmetry group pointer provided, assuming trivial symmetry"
238 " (this is currently the only option for periodic systems).");
239 sym = &QPMS_FINITE_GROUP_TRIVIAL;
242 // TODO check data sanity
244 qpms_l_t lMax = 0; // the overall lMax of all base specs.
245 qpms_normalisation_t normalisation = QPMS_NORMALISATION_UNDEF;
247 // First, determine the rough radius of the array; it should be nonzero
248 // in order to particle position equivalence work correctly
249 double lenscale = 0;
251 double minx = +INFINITY, miny = +INFINITY, minz = +INFINITY;
252 double maxx = -INFINITY, maxy = -INFINITY, maxz = -INFINITY;
253 for (qpms_ss_pi_t i = 0; i < orig->p_count; ++i) {
254 minx = MIN(minx,orig->p[i].pos.x);
255 miny = MIN(miny,orig->p[i].pos.y);
256 minz = MIN(minz,orig->p[i].pos.z);
257 maxx = MAX(maxx,orig->p[i].pos.x);
258 maxy = MAX(maxy,orig->p[i].pos.y);
259 maxz = MAX(maxz,orig->p[i].pos.z);
261 lenscale = (fabs(maxx)+fabs(maxy)+fabs(maxz)+(maxx-minx)+(maxy-miny)+(maxz-minz)) / 3;
264 // Second, check that there are no duplicit positions in the input system.
265 for (qpms_ss_pi_t i = 0; i < orig->p_count; ++i)
266 for (qpms_ss_pi_t j = 0; j < i; ++j)
267 assert(!cart3_isclose(orig->p[i].pos, orig->p[j].pos, 0, QPMS_SCATSYS_LEN_RTOL * lenscale));
270 // Allocate T-matrix, particle and particle orbit info arrays
271 qpms_scatsys_t *ss;
272 QPMS_CRASHING_MALLOC(ss, sizeof(*ss));
273 ss->lattice_dimension = orig->lattice_dimension;
274 // TODO basic periodic lattices related stuff here.
275 if (ss->lattice_dimension) {
276 ss->per = orig->per;
277 process_lattice_bases(ss, tol);
279 for(int i = 0; i < ss->lattice_dimension; ++i) // extend lenscale by basis vectors
280 lenscale = MAX(lenscale, cart3norm(ss->per.lattice_basis[i]));
281 ss->lenscale = lenscale;
282 ss->sym = sym;
283 ss->medium = orig->medium;
285 // Copy the qpms_tmatrix_fuction_t from orig
286 ss->tmg_count = orig->tmg_count;
287 QPMS_CRASHING_MALLOC(ss->tmg, ss->tmg_count * sizeof(*(ss->tmg)));
288 memcpy(ss->tmg, orig->tmg, ss->tmg_count * sizeof(*(ss->tmg)));
290 ss->tm_capacity = sym->order * orig->tm_count;
291 QPMS_CRASHING_MALLOC(ss->tm, ss->tm_capacity * sizeof(*(ss->tm)));
293 ss->p_capacity = sym->order * orig->p_count;
294 QPMS_CRASHING_MALLOC(ss->p, ss->p_capacity * sizeof(qpms_particle_tid_t));
295 QPMS_CRASHING_MALLOC(ss->p_orbitinfo, ss->p_capacity * sizeof(qpms_ss_particle_orbitinfo_t));
296 for (qpms_ss_pi_t pi = 0; pi < ss->p_capacity; ++pi) {
297 ss->p_orbitinfo[pi].t = QPMS_SS_P_ORBITINFO_UNDEF;
298 ss->p_orbitinfo[pi].p = QPMS_SS_P_ORBITINFO_UNDEF;
301 // Evaluate the original T-matrices at omega
302 qpms_tmatrix_t **tm_orig_omega;
303 QPMS_CRASHING_MALLOC(tm_orig_omega, orig->tmg_count * sizeof(*tm_orig_omega));
304 for(qpms_ss_tmgi_t i = 0; i < orig->tmg_count; ++i)
305 tm_orig_omega[i] = qpms_tmatrix_init_from_function(orig->tmg[i], omega);
307 // Evaluate the medium and derived T-matrices at omega.
308 qpms_scatsys_at_omega_t *ssw;
309 QPMS_CRASHING_MALLOC(ssw, sizeof(*ssw)); // returned
310 ssw->ss = ss;
311 ssw->omega = omega;
312 ssw->medium = qpms_epsmu_generator_eval(ss->medium, omega);
313 ssw->wavenumber = qpms_wavenumber(omega, ssw->medium);
314 // we will be using ss->tm_capacity also for ssw->tm
315 QPMS_CRASHING_MALLOC(ssw->tm, ss->tm_capacity * sizeof(*(ssw->tm))); // returned
317 // Evaluate T-matrices at omega; checking for duplicities
319 ss->max_bspecn = 0; // We'll need it later.for memory alloc estimates.
321 qpms_ss_tmi_t tm_dupl_remap[ss->tm_capacity]; // Auxilliary array to label remapping the indices after ignoring t-matrix duplicities; VLA!
322 ss->tm_count = 0;
323 for (qpms_ss_tmi_t i = 0; i < orig->tm_count; ++i) {
324 qpms_tmatrix_t *ti = qpms_tmatrix_apply_operation(&(orig->tm[i].op), tm_orig_omega[orig->tm[i].tmgi]);
325 qpms_ss_tmi_t j;
326 for (j = 0; j < ss->tm_count; ++j)
327 if (qpms_tmatrix_isclose(ti, ssw->tm[j], tol->rtol, tol->atol)) {
328 break;
330 if (j == ss->tm_count) { // duplicity not found, save both the "abstract" and "at omega" T-matrices
331 qpms_tmatrix_operation_copy(&ss->tm[j].op, &orig->tm[i].op);
332 ss->tm[j].tmgi = orig->tm[i].tmgi; // T-matrix functions are preserved.
333 ssw->tm[j] = ti;
334 ss->max_bspecn = MAX(ssw->tm[j]->spec->n, ss->max_bspecn);
335 lMax = MAX(lMax, ssw->tm[j]->spec->lMax);
336 ++(ss->tm_count);
338 else qpms_tmatrix_free(ti);
339 tm_dupl_remap[i] = j;
340 if (normalisation == QPMS_NORMALISATION_UNDEF)
341 normalisation = ssw->tm[i]->spec->norm;
342 // We expect all bspec norms to be the same.
343 else QPMS_ENSURE(normalisation == ssw->tm[j]->spec->norm,
344 "Normalisation convention must be the same for all T-matrices."
345 " %d != %d\n", normalisation, ssw->tm[j]->spec->norm);
348 // Free the original T-matrices at omega
349 for(qpms_ss_tmgi_t i = 0; i < orig->tmg_count; ++i)
350 qpms_tmatrix_free(tm_orig_omega[i]);
351 free(tm_orig_omega);
353 // Copy particles, remapping the t-matrix indices
354 for (qpms_ss_pi_t i = 0; i < orig->p_count; ++(i)) {
355 ss->p[i] = orig->p[i];
356 ss->p[i].tmatrix_id = tm_dupl_remap[ss->p[i].tmatrix_id];
358 ss->p_count = orig->p_count;
360 // allocate t-matrix symmetry map
361 QPMS_CRASHING_MALLOC(ss->tm_sym_map, sizeof(qpms_ss_tmi_t) * sym->order * sym->order * ss->tm_count);
363 // Extend the T-matrices list by the symmetry operations
364 for (qpms_ss_tmi_t tmi = 0; tmi < ss->tm_count; ++tmi)
365 for (qpms_gmi_t gmi = 0; gmi < sym->order; ++gmi){
366 const size_t d = ssw->tm[tmi]->spec->n;
367 complex double *m;
368 QPMS_CRASHING_MALLOC(m, d*d*sizeof(complex double)); // ownership passed to ss->tm[ss->tm_count].op
369 qpms_irot3_uvswfi_dense(m, ssw->tm[tmi]->spec, sym->rep3d[gmi]);
370 qpms_tmatrix_t *transformed = qpms_tmatrix_apply_symop(ssw->tm[tmi], m);
371 qpms_ss_tmi_t tmj;
372 for (tmj = 0; tmj < ss->tm_count; ++tmj)
373 if (qpms_tmatrix_isclose(transformed, ssw->tm[tmj], tol->rtol, tol->atol))
374 break;
375 if (tmj < ss->tm_count) { // HIT, transformed T-matrix already exists
376 //TODO some "rounding error cleanup" symmetrisation could be performed here?
377 qpms_tmatrix_free(transformed);
378 free(m);
379 } else { // MISS, save the matrix (also the "abstract" one)
380 ssw->tm[ss->tm_count] = transformed;
381 ss->tm[ss->tm_count].tmgi = ss->tm[tmi].tmgi;
382 qpms_tmatrix_operation_compose_chain_init(&(ss->tm[ss->tm_count].op), 2, 1); // FIXME MEMLEAK
383 struct qpms_tmatrix_operation_compose_chain * const o = &(ss->tm[ss->tm_count].op.op.compose_chain);
384 o->ops[0] = & ss->tm[tmi].op; // Let's just borrow this
385 o->ops_owned[0] = false;
386 o->opmem[0].typ = QPMS_TMATRIX_OPERATION_LRMATRIX;
387 o->opmem[0].op.lrmatrix.m = m;
388 o->opmem[0].op.lrmatrix.owns_m = true;
389 o->opmem[0].op.lrmatrix.m_size = d * d;
390 o->ops[1] = o->opmem;
391 o->ops_owned[1] = true;
392 ++(ss->tm_count);
394 ss->tm_sym_map[gmi + tmi * sym->order] = tmj; // In any case, tmj now indexes the correct transformed matrix
396 // Possibly free some space using the new ss->tm_count instead of (old) ss->tm_count*sym->order
397 QPMS_CRASHING_REALLOC(ss->tm_sym_map, sizeof(qpms_ss_tmi_t) * sym->order * ss->tm_count);
398 // tm could be realloc'd as well, but those are just pointers, not likely many.
401 // allocate particle symmetry map
402 QPMS_CRASHING_MALLOC(ss->p_sym_map, sizeof(qpms_ss_pi_t) * sym->order * sym->order * ss->p_count);
403 // allocate orbit type array (TODO realloc later if too long)
404 ss->orbit_type_count = 0;
405 QPMS_CRASHING_CALLOC(ss->orbit_types, ss->p_count, sizeof(qpms_ss_orbit_type_t));
407 QPMS_CRASHING_MALLOC(ss->otspace, // reallocate later
408 (sizeof(qpms_ss_orbit_pi_t) * sym->order * sym->order
409 + sizeof(qpms_ss_tmi_t) * sym->order
410 + 3*sizeof(size_t) * sym->nirreps
411 + sizeof(complex double) * SQ(sym->order * ss->max_bspecn)) * ss->p_count
413 ss->otspace_end = ss->otspace;
415 // Workspace for the orbit type determination
416 qpms_ss_orbit_type_t ot_current;
417 qpms_ss_orbit_pi_t ot_current_action[sym->order * sym->order];
418 qpms_ss_tmi_t ot_current_tmatrices[sym->order];
420 qpms_ss_pi_t current_orbit[sym->order];
421 ot_current.action = ot_current_action;
422 ot_current.tmatrices = ot_current_tmatrices;
425 // Extend the particle list by the symmetry operations, check that particles mapped by symmetry ops on themselves
426 // have the correct symmetry
427 // TODO this could be sped up to O(npart * log(npart)); let's see later whether needed.
428 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
429 const bool new_orbit = (ss->p_orbitinfo[pi].t == QPMS_SS_P_ORBITINFO_UNDEF); // TODO build the orbit!!!
430 if (new_orbit){
431 current_orbit[0] = pi;
432 ot_current.size = 1;
433 ot_current.tmatrices[0] = ss->p[pi].tmatrix_id;
434 #ifdef DUMP_PARTICLE_POSITIONS
435 cart3_t pos = ss->p[pi].pos;
436 fprintf(stderr, "An orbit [%.4g, %.4g, %.4g] => ", pos.x, pos.y, pos.z);
437 #endif
440 for (qpms_gmi_t gmi = 0; gmi < sym->order; ++gmi) {
441 cart3_t newpoint = qpms_irot3_apply_cart3(sym->rep3d[gmi], ss->p[pi].pos);
442 qpms_ss_tmi_t new_tmi = ss->tm_sym_map[gmi + ss->p[pi].tmatrix_id * sym->order]; // transformed t-matrix index
443 qpms_ss_pi_t pj;
444 for (pj = 0; pj < ss->p_count; ++pj)
445 if (cart3_isclose(newpoint, ss->p[pj].pos, 0, ss->lenscale * QPMS_SCATSYS_LEN_RTOL)) {
446 if (new_tmi != ss->p[pj].tmatrix_id)
447 qpms_pr_error("The %d. particle with coords (%lg, %lg, %lg) "
448 "is mapped to %d. another (or itself) with cords (%lg, %lg, %lg) "
449 "without having the required symmetry", (int)pi,
450 ss->p[pi].pos.x, ss->p[pi].pos.y, ss->p[pi].pos.z,
451 (int)pj, ss->p[pj].pos.x, ss->p[pj].pos.y, ss->p[pj].pos.z);
452 break;
454 if (pj < ss->p_count) { // HIT, the particle is transformed to an "existing" one.
456 } else { // MISS, the symmetry transforms the particle to a new location, so add it.
457 qpms_particle_tid_t newparticle = {newpoint, new_tmi};
458 ss->p[ss->p_count] = newparticle;
459 ++(ss->p_count);
460 #ifdef DUMP_PARTICLE_POSITIONS
461 if(new_orbit)
462 fprintf(stderr, "[%.4g, %.4g, %.4g] ", newpoint.x, newpoint.y, newpoint.z);
463 #endif
465 ss->p_sym_map[gmi + pi * sym->order] = pj;
467 if (new_orbit) {
468 // Now check whether the particle (result of the symmetry op) is already in the current orbit
469 qpms_ss_orbit_pi_t opj;
470 for (opj = 0; opj < ot_current.size; ++opj)
471 if (current_orbit[opj] == pj) break; // HIT, pj already on current orbit
472 if (opj == ot_current.size) { // MISS, pj is new on the orbit, extend the size and set the T-matrix id
473 current_orbit[opj] = pj;
474 ++ot_current.size;
475 ot_current.tmatrices[opj] = ss->p[pj].tmatrix_id;
477 ot_current.action[gmi] = opj;
480 if (new_orbit) { // Now compare if the new orbit corresponds to some of the existing types.
481 #ifdef DUMP_PARTICLE_POSITIONS
482 fputc('\n', stderr);
483 #endif
484 qpms_ss_oti_t oti;
485 for(oti = 0; oti < ss->orbit_type_count; ++oti)
486 if (orbit_types_equal(&ot_current, &(ss->orbit_types[oti]))) break; // HIT, orbit type already exists
487 assert(0 == sym->order % ot_current.size);
488 if (oti == ss->orbit_type_count) // MISS, add the new orbit type
489 add_orbit_type(ss, &ot_current);
491 // Walk through the orbit again and set up the orbit info of the particles
492 for (qpms_ss_orbit_pi_t opi = 0; opi < ot_current.size; ++opi) {
493 const qpms_ss_pi_t pi_opi = current_orbit[opi];
494 ss->p_orbitinfo[pi_opi].t = oti;
495 ss->p_orbitinfo[pi_opi].p = opi;
496 ss->p_orbitinfo[pi_opi].osn = ss->orbit_types[oti].instance_count;
498 ss->orbit_types[oti].instance_count++;
501 // Possibly free some space using the new ss->p_count instead of (old) ss->p_count*sym->order
502 QPMS_CRASHING_REALLOC(ss->p_sym_map, sizeof(qpms_ss_pi_t) * sym->order * ss->p_count);
503 QPMS_CRASHING_REALLOC(ss->p, sizeof(qpms_particle_tid_t) * ss->p_count);
504 QPMS_CRASHING_REALLOC(ss->p_orbitinfo, sizeof(qpms_ss_particle_orbitinfo_t)*ss->p_count);
505 ss->p_capacity = ss->p_count;
507 { // Reallocate the orbit type data space and update the pointers if needed.
508 size_t otspace_sz = ss->otspace_end - ss->otspace;
509 char *old_otspace = ss->otspace;
510 QPMS_CRASHING_REALLOC(ss->otspace, otspace_sz);
511 ptrdiff_t shift = ss->otspace - old_otspace;
512 if(shift) {
513 for (size_t oi = 0; oi < ss->orbit_type_count; ++oi) {
514 ss->orbit_types[oi].action = (void *)(((char *) (ss->orbit_types[oi].action)) + shift);
515 ss->orbit_types[oi].tmatrices = (void *)(((char *) (ss->orbit_types[oi].tmatrices)) + shift);
516 ss->orbit_types[oi].irbase_sizes = (void *)(((char *) (ss->orbit_types[oi].irbase_sizes)) + shift);
517 ss->orbit_types[oi].irbase_cumsizes = (void *)(((char *) (ss->orbit_types[oi].irbase_cumsizes)) + shift);
518 ss->orbit_types[oi].irbase_offsets = (void *)(((char *) (ss->orbit_types[oi].irbase_offsets)) + shift);
519 ss->orbit_types[oi].irbases = (void *)(((char *) (ss->orbit_types[oi].irbases)) + shift);
521 ss->otspace_end += shift;
525 // Set ss->fecv_size and ss->fecv_pstarts
526 ss->fecv_size = 0;
527 QPMS_CRASHING_MALLOC(ss->fecv_pstarts, ss->p_count * sizeof(size_t));
528 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
529 ss->fecv_pstarts[pi] = ss->fecv_size;
530 ss->fecv_size += ssw->tm[ss->p[pi].tmatrix_id]->spec->n; // That's a lot of dereferencing!
533 QPMS_CRASHING_MALLOC(ss->saecv_sizes, sizeof(size_t) * sym->nirreps);
534 QPMS_CRASHING_MALLOC(ss->saecv_ot_offsets, sizeof(size_t) * sym->nirreps * ss->orbit_type_count);
535 for(qpms_iri_t iri = 0; iri < sym->nirreps; ++iri) {
536 ss->saecv_sizes[iri] = 0;
537 for(qpms_ss_oti_t oti = 0; oti < ss->orbit_type_count; ++oti) {
538 ss->saecv_ot_offsets[iri * ss->orbit_type_count + oti] = ss->saecv_sizes[iri];
539 const qpms_ss_orbit_type_t *ot = &(ss->orbit_types[oti]);
540 ss->saecv_sizes[iri] += ot->instance_count * ot->irbase_sizes[iri];
544 qpms_ss_pi_t p_ot_cumsum = 0;
545 for (qpms_ss_oti_t oti = 0; oti < ss->orbit_type_count; ++oti) {
546 qpms_ss_orbit_type_t *ot = ss->orbit_types + oti;
547 ot->p_offset = p_ot_cumsum;
548 p_ot_cumsum += ot->size * ot->instance_count;
551 // Set ss->p_by_orbit[]
552 QPMS_CRASHING_MALLOC(ss->p_by_orbit, sizeof(qpms_ss_pi_t) * ss->p_count);
553 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
554 const qpms_ss_particle_orbitinfo_t *oi = ss->p_orbitinfo + pi;
555 const qpms_ss_oti_t oti = oi->t;
556 const qpms_ss_orbit_type_t *ot = ss->orbit_types + oti;
557 ss->p_by_orbit[ot->p_offset + ot->size * oi->osn + oi->p] = pi;
560 ss->c = qpms_trans_calculator_init(lMax, normalisation);
562 return ssw;
566 void qpms_scatsys_free(qpms_scatsys_t *ss) {
567 if(ss) {
568 QPMS_ASSERT(ss->tm);
569 for(qpms_ss_tmi_t tmi = 0; tmi < ss->tm_count; ++tmi)
570 qpms_tmatrix_operation_clear(&ss->tm[tmi].op);
571 free(ss->tm);
572 free(ss->tmg);
573 free(ss->p);
574 free(ss->fecv_pstarts);
575 free(ss->tm_sym_map);
576 free(ss->p_sym_map);
577 free(ss->otspace);
578 free(ss->p_orbitinfo);
579 free(ss->orbit_types);
580 free(ss->saecv_ot_offsets);
581 free(ss->saecv_sizes);
582 free(ss->p_by_orbit);
583 qpms_trans_calculator_free(ss->c);
585 free(ss);
588 void qpms_scatsys_at_omega_refill(qpms_scatsys_at_omega_t *ssw,
589 complex double omega) {
590 const qpms_scatsys_t * const ss = ssw->ss;
591 ssw->omega = omega;
592 ssw->medium = qpms_epsmu_generator_eval(ss->medium, omega);
593 ssw->wavenumber = qpms_wavenumber(omega, ssw->medium);
594 qpms_tmatrix_t **tmatrices_preop;
595 QPMS_CRASHING_CALLOC(tmatrices_preop, ss->tmg_count, sizeof(*tmatrices_preop));
596 for (qpms_ss_tmgi_t tmgi = 0; tmgi < ss->tmg_count; ++tmgi)
597 tmatrices_preop[tmgi] = qpms_tmatrix_init_from_function(ss->tmg[tmgi], omega);
598 for (qpms_ss_tmi_t tmi = 0; tmi < ss->tm_count; ++tmi)
599 qpms_tmatrix_apply_operation_replace(ssw->tm[tmi], &ss->tm[tmi].op,
600 tmatrices_preop[ss->tm[tmi].tmgi]);
601 for (qpms_ss_tmgi_t tmgi = 0; tmgi < ss->tmg_count; ++tmgi)
602 qpms_tmatrix_free(tmatrices_preop[tmgi]);
603 free(tmatrices_preop);
606 qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss,
607 complex double omega) {
608 // TODO
609 qpms_scatsys_at_omega_t *ssw;
610 QPMS_CRASHING_MALLOC(ssw, sizeof(*ssw));
611 ssw->omega = omega;
612 ssw->ss = ss;
613 ssw->medium = qpms_epsmu_generator_eval(ss->medium, omega);
614 ssw->wavenumber = qpms_wavenumber(omega, ssw->medium);
615 QPMS_CRASHING_CALLOC(ssw->tm, ss->tm_count, sizeof(*ssw->tm));
616 qpms_tmatrix_t **tmatrices_preop;
617 QPMS_CRASHING_CALLOC(tmatrices_preop, ss->tmg_count, sizeof(*tmatrices_preop));
618 for (qpms_ss_tmgi_t tmgi = 0; tmgi < ss->tmg_count; ++tmgi)
619 tmatrices_preop[tmgi] = qpms_tmatrix_init_from_function(ss->tmg[tmgi], omega);
620 for (qpms_ss_tmi_t tmi = 0; tmi < ss->tm_count; ++tmi) {
621 ssw->tm[tmi] = qpms_tmatrix_apply_operation(&ss->tm[tmi].op,
622 tmatrices_preop[ss->tm[tmi].tmgi]); //<- main difference to .._refill()
623 QPMS_ENSURE(ssw->tm[tmi],
624 "Got NULL pointer from qpms_tmatrix_apply_operation");
626 for (qpms_ss_tmgi_t tmgi = 0; tmgi < ss->tmg_count; ++tmgi)
627 qpms_tmatrix_free(tmatrices_preop[tmgi]);
628 free(tmatrices_preop);
629 return ssw;
632 void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw) {
633 if (ssw) {
634 if(ssw->tm)
635 for(qpms_ss_tmi_t i = 0; i < ssw->ss->tm_count; ++i)
636 qpms_tmatrix_free(ssw->tm[i]);
637 free(ssw->tm);
639 free(ssw);
642 // (copypasta from symmetries.c)
643 // TODO at some point, maybe support also other norms.
644 // (perhaps just use qpms_normalisation_t_factor() at the right places)
645 static inline void check_norm_compat(const qpms_vswf_set_spec_t *s)
647 switch (s->norm & QPMS_NORMALISATION_NORM_BITS) {
648 case QPMS_NORMALISATION_NORM_POWER:
649 break;
650 case QPMS_NORMALISATION_NORM_SPHARM:
651 break;
652 default:
653 QPMS_WTF; // Only SPHARM and POWER norms are supported right now.
657 complex double *qpms_orbit_action_matrix(complex double *target,
658 const qpms_ss_orbit_type_t *ot, const qpms_vswf_set_spec_t *bspec,
659 const qpms_finite_group_t *sym, const qpms_gmi_t g) {
660 assert(sym); assert(g < sym->order);
661 assert(sym->rep3d);
662 assert(ot); assert(ot->size > 0);
663 // check_norm_compat(bspec); not needed here, the qpms_irot3_uvswfi_dense should complain if necessary
664 const size_t n = bspec->n;
665 const qpms_gmi_t N = ot->size;
666 if (target == NULL)
667 QPMS_CRASHING_MALLOC(target, n*n*N*N*sizeof(complex double));
668 memset(target, 0, n*n*N*N*sizeof(complex double));
669 complex double tmp[n][n]; // this is the 'per-particle' action
670 qpms_irot3_uvswfi_dense(tmp[0], bspec, sym->rep3d[g]);
671 for(qpms_ss_orbit_pi_t Col = 0; Col < ot->size; ++Col) {
672 // Row is the 'destination' of the symmetry operation, Col is the 'source'
673 const qpms_ss_orbit_pi_t Row = ot->action[sym->order * Col + g];
674 for(size_t row = 0; row < bspec->n; ++row)
675 for(size_t col = 0; col < bspec->n; ++col)
676 target[n*n*N*Row + n*Col + n*N*row + col] = conj(tmp[row][col]); //CHECKCONJ
678 #ifdef DUMP_ACTIONMATRIX
679 fprintf(stderr,"%d: %s\n",
680 (int) g, (sym->permrep && sym->permrep[g])?
681 sym->permrep[g] : "");
682 for (size_t Row = 0; Row < ot->size; ++Row) {
683 fprintf(stderr, "--------------------------\n");
684 for (size_t row = 0; row < bspec->n; ++row) {
685 for (size_t Col = 0; Col < ot->size; ++Col) {
686 fprintf(stderr, "| ");
687 for (size_t col = 0; col < bspec->n; ++col)
688 fprintf(stderr, "%+2.3f%+2.3fj ", creal(target[n*n*N*Row + n*Col + n*N*row + col]),cimag(target[n*n*N*Row + n*Col + n*N*row + col]));
690 fprintf(stderr, "|\n");
693 fprintf(stderr, "-------------------------------\n\n");
694 #endif
696 return target;
699 complex double *qpms_orbit_irrep_projector_matrix(complex double *target,
700 const qpms_ss_orbit_type_t *ot, const qpms_vswf_set_spec_t *bspec,
701 const qpms_finite_group_t *sym, const qpms_iri_t iri) {
702 assert(sym);
703 assert(sym->rep3d);
704 assert(ot); assert(ot->size > 0);
705 assert(iri < sym->nirreps); assert(sym->irreps);
706 // check_norm_compat(bspec); // probably not needed here, let the called functions complain if necessary, but CHEKME
707 const size_t n = bspec->n;
708 const qpms_gmi_t N = ot->size;
709 if (target == NULL)
710 QPMS_CRASHING_MALLOC(target, n*n*N*N*sizeof(complex double));
711 memset(target, 0, n*n*N*N*sizeof(complex double));
712 // Workspace for the orbit group action matrices
713 complex double *tmp = malloc(n*n*N*N*sizeof(complex double));
714 const int d = sym->irreps[iri].dim;
715 double prefac = d / (double) sym->order;
716 for(int partner = 0; partner < d; ++partner) {
717 for(qpms_gmi_t g = 0; g < sym->order; ++g) {
718 // We use the diagonal elements of D_g
719 complex double D_g_conj = sym->irreps[iri].m[g*d*d + partner*d + partner];
720 #ifdef DUMP_ACTIONMATRIX
721 fprintf(stderr,"(factor %+g%+gj) ", creal(D_g_conj), cimag(D_g_conj));
722 #endif
723 qpms_orbit_action_matrix(tmp, ot, bspec, sym, g);
724 // TODO kahan sum?
725 for(size_t i = 0; i < n*n*N*N; ++i)
726 target[i] += prefac * D_g_conj * tmp[i];
729 free(tmp);
730 #ifdef DUMP_PROJECTORMATRIX
731 fprintf(stderr,"Projector %d (%s):\n", (int) iri,
732 sym->irreps[iri].name?sym->irreps[iri].name:"");
733 for (size_t Row = 0; Row < ot->size; ++Row) {
734 fprintf(stderr, "--------------------------\n");
735 for (size_t row = 0; row < bspec->n; ++row) {
736 for (size_t Col = 0; Col < ot->size; ++Col) {
737 fprintf(stderr, "| ");
738 for (size_t col = 0; col < bspec->n; ++col)
739 fprintf(stderr, "%+2.3f%+2.3fj ", creal(target[n*n*N*Row + n*Col + n*N*row + col]),cimag(target[n*n*N*Row + n*Col + n*N*row + col]));
741 fprintf(stderr, "|\n");
744 fprintf(stderr, "-------------------------------\n\n");
745 #endif
746 return target;
749 #define SVD_ATOL 1e-8
751 complex double *qpms_orbit_irrep_basis(size_t *basis_size,
752 complex double *target,
753 const qpms_ss_orbit_type_t *ot, const qpms_vswf_set_spec_t *bspec,
754 const qpms_finite_group_t *sym, const qpms_iri_t iri) {
755 assert(sym);
756 assert(sym->rep3d);
757 assert(ot); assert(ot->size > 0);
758 assert(iri < sym->nirreps); assert(sym->irreps);
759 check_norm_compat(bspec); // Here I'm not sure; CHECKME
760 const size_t n = bspec->n;
761 const qpms_gmi_t N = ot->size;
762 const bool newtarget = (target == NULL);
763 if (newtarget)
764 QPMS_CRASHING_MALLOC(target,n*n*N*N*sizeof(complex double));
765 memset(target, 0, n*n*N*N*sizeof(complex double));
767 // Get the projector (also workspace for right sg. vect.)
768 complex double *projector = qpms_orbit_irrep_projector_matrix(NULL,
769 ot, bspec, sym, iri);
770 QPMS_ENSURE(projector != NULL, "got NULL from qpms_orbit_irrep_projector_matrix()");
771 // Workspace for the right singular vectors.
772 complex double *V_H; QPMS_CRASHING_MALLOC(V_H, n*n*N*N*sizeof(complex double));
773 // THIS SHOULD NOT BE NECESSARY
774 complex double *U; QPMS_CRASHING_MALLOC(U, n*n*N*N*sizeof(complex double));
775 double *s; QPMS_CRASHING_MALLOC(s, n*N*sizeof(double));
777 QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR,
778 'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
779 n*N /* m */, n*N /* n */, projector /* a */, n*N /* lda */,
780 s /* s */, U /* u */, n*N /* ldu, irrelev. */, V_H /* vt */,
781 n*N /* ldvt */));
783 size_t bs;
784 for(bs = 0; bs < n*N; ++bs) {
785 QPMS_ENSURE(s[bs] <= 1 + SVD_ATOL, "%zd. SV too large: %.16lf", bs, s[bs]);
786 QPMS_ENSURE(!(s[bs] > SVD_ATOL && fabs(1-s[bs]) > SVD_ATOL),
787 "%zd. SV in the 'wrong' interval: %.16lf", bs, s[bs]);
788 if(s[bs] < SVD_ATOL) break;
791 memcpy(target, V_H, bs*N*n*sizeof(complex double));
792 if(newtarget) QPMS_CRASHING_REALLOC(target, bs*N*n*sizeof(complex double));
793 if(basis_size) *basis_size = bs;
795 free(s);
796 free(U);
797 free(V_H);
798 free(projector);
799 return target;
802 complex double *qpms_scatsys_irrep_transform_matrix(complex double *U,
803 const qpms_scatsys_t *ss, qpms_iri_t iri) {
804 const size_t packedlen = ss->saecv_sizes[iri];
805 const size_t full_len = ss->fecv_size;
806 if (U == NULL)
807 QPMS_CRASHING_MALLOC(U,full_len * packedlen * sizeof(complex double));
808 memset(U, 0, full_len * packedlen * sizeof(complex double));
810 size_t fullvec_offset = 0;
812 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
813 const qpms_ss_oti_t oti = ss->p_orbitinfo[pi].t;
814 const qpms_ss_orbit_type_t *const ot = ss->orbit_types + oti;
815 const qpms_ss_osn_t osn = ss->p_orbitinfo[pi].osn;
816 const qpms_ss_orbit_pi_t opi = ss->p_orbitinfo[pi].p;
817 // This is where the particle's orbit starts in the "packed" vector:
818 const size_t packed_orbit_offset = ss->saecv_ot_offsets[iri*ss->orbit_type_count + oti]
819 + osn * ot->irbase_sizes[iri];
820 // Orbit coeff vector's full size:
821 const size_t orbit_fullsize = ot->size * ot->bspecn;
822 const size_t orbit_packedsize = ot->irbase_sizes[iri];
823 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
824 const complex double *om = ot->irbases + ot->irbase_offsets[iri];
826 for (size_t prow = 0; prow < orbit_packedsize; ++prow)
827 for (size_t pcol = 0; pcol < ot->bspecn; ++pcol)
828 U[full_len * (packed_orbit_offset + prow) + (fullvec_offset + pcol)]
829 = om[orbit_fullsize * prow + (opi * ot->bspecn + pcol)];
830 fullvec_offset += ot->bspecn;
833 return U;
836 complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed,
837 const complex double *orig_full, const qpms_scatsys_t *ss,
838 qpms_iri_t iri){
839 const size_t packedlen = ss->saecv_sizes[iri];
840 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
841 return target_packed;
842 const size_t full_len = ss->fecv_size;
843 if (target_packed == NULL)
844 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
845 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
847 // Workspace for the intermediate matrix
848 complex double *tmp;
849 QPMS_CRASHING_MALLOC(tmp, full_len * packedlen * sizeof(complex double));
851 complex double *U = qpms_scatsys_irrep_transform_matrix(NULL, ss, iri);
853 const complex double one = 1, zero = 0;
855 // tmp = F U*
856 cblas_zgemm(
857 CblasRowMajor, CblasNoTrans, CblasConjTrans,
858 full_len /*M*/, packedlen /*N*/, full_len /*K*/,
859 &one /*alpha*/, orig_full/*A*/, full_len/*ldA*/,
860 U /*B*/, full_len/*ldB*/,
861 &zero /*beta*/, tmp /*C*/, packedlen /*LDC*/);
862 // target = U tmp
863 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
864 packedlen /*M*/, packedlen /*N*/, full_len /*K*/,
865 &one /*alpha*/, U/*A*/, full_len/*ldA*/,
866 tmp /*B*/, packedlen /*ldB*/, &zero /*beta*/,
867 target_packed /*C*/, packedlen /*ldC*/);
869 free(tmp);
870 free(U);
871 return target_packed;
874 /// Transforms a big "packed" matrix into the full basis (trivial implementation).
875 complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full,
876 const complex double *orig_packed, const qpms_scatsys_t *ss,
877 qpms_iri_t iri, bool add){
878 const size_t packedlen = ss->saecv_sizes[iri];
879 const size_t full_len = ss->fecv_size;
880 if (target_full == NULL)
881 QPMS_CRASHING_MALLOC(target_full, SQ(full_len)*sizeof(complex double));
882 if(!add) memset(target_full, 0, SQ(full_len)*sizeof(complex double));
884 if(!packedlen) return target_full; // Empty irrep, do nothing.
886 // Workspace for the intermediate matrix result
887 complex double *tmp;
888 QPMS_CRASHING_MALLOC(tmp, full_len * packedlen * sizeof(complex double));
890 complex double *U = qpms_scatsys_irrep_transform_matrix(NULL, ss, iri);
892 const complex double one = 1, zero = 0;
894 // tmp = P U
895 cblas_zgemm(
896 CblasRowMajor, CblasNoTrans, CblasNoTrans,
897 packedlen /*M*/, full_len /*N*/, packedlen /*K*/,
898 &one /*alpha*/, orig_packed/*A*/, packedlen/*ldA*/,
899 U /*B*/, full_len/*ldB*/,
900 &zero /*beta*/, tmp /*C*/, full_len /*LDC*/);
901 // target += U* tmp
902 cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans,
903 full_len /*M*/, full_len /*N*/, packedlen /*K*/,
904 &one /*alpha*/, U/*A*/, full_len/*ldA*/,
905 tmp /*B*/, full_len /*ldB*/, &one /*beta*/,
906 target_full /*C*/, full_len /*ldC*/);
907 free(tmp);
908 free(U);
909 return target_full;
912 complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed,
913 const complex double *orig_full, const qpms_scatsys_t *ss,
914 qpms_iri_t iri){
915 const size_t packedlen = ss->saecv_sizes[iri];
916 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
917 return target_packed;
918 const size_t full_len = ss->fecv_size;
919 if (target_packed == NULL)
920 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
921 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
923 // Workspace for the intermediate particle-orbit matrix result
924 complex double *tmp;
925 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
927 const complex double one = 1, zero = 0;
929 size_t fullvec_offsetR = 0;
930 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) { //Row loop
931 const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t;
932 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
933 const qpms_ss_osn_t osnR = ss->p_orbitinfo[piR].osn;
934 const qpms_ss_orbit_pi_t opiR = ss->p_orbitinfo[piR].p;
935 // This is where the particle's orbit starts in the "packed" vector:
936 const size_t packed_orbit_offsetR =
937 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
938 + osnR * otR->irbase_sizes[iri];
939 // Orbit coeff vector's full size:
940 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
941 const size_t particle_fullsizeR = otR->bspecn;
942 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
943 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
944 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
946 size_t fullvec_offsetC = 0;
947 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
948 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
949 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
950 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
951 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
952 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
953 // This is where the particle's orbit starts in the "packed" vector:
954 const size_t packed_orbit_offsetC =
955 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
956 + osnC * otC->irbase_sizes[iri];
957 // Orbit coeff vector's full size:
958 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
959 const size_t particle_fullsizeC = otC->bspecn;
960 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
961 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
962 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
964 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
965 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
966 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
967 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
968 &one /*alpha*/, orig_full + full_len*fullvec_offsetR + fullvec_offsetC/*A*/,
969 full_len/*ldA*/,
970 omC + opiC*particle_fullsizeC /*B*/,
971 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
972 tmp /*C*/, orbit_packedsizeC /*LDC*/);
974 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
975 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
976 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
977 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/,
978 orbit_fullsizeR/*ldA*/,
979 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
980 target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
981 packedlen /*ldC*/);
983 fullvec_offsetC += otC->bspecn;
986 fullvec_offsetR += otR->bspecn;
988 free(tmp);
989 return target_packed;
993 /// Transforms a big "packed" matrix into the full basis.
994 /** TODO doc */
995 complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full,
996 const complex double *orig_packed, const qpms_scatsys_t *ss,
997 qpms_iri_t iri, bool add){
998 const size_t packedlen = ss->saecv_sizes[iri];
999 const size_t full_len = ss->fecv_size;
1000 if (target_full == NULL)
1001 QPMS_CRASHING_MALLOC(target_full, SQ(full_len)*sizeof(complex double));
1002 if(!add) memset(target_full, 0, SQ(full_len)*sizeof(complex double));
1004 if(!packedlen) return target_full; // Empty irrep, do nothing.
1006 // Workspace for the intermediate particle-orbit matrix result
1007 complex double *tmp;
1008 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1010 const complex double one = 1, zero = 0;
1012 size_t fullvec_offsetR = 0;
1013 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) { //Row loop
1014 const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t;
1015 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1016 const qpms_ss_osn_t osnR = ss->p_orbitinfo[piR].osn;
1017 const qpms_ss_orbit_pi_t opiR = ss->p_orbitinfo[piR].p;
1018 // This is where the particle's orbit starts in the "packed" vector:
1019 const size_t packed_orbit_offsetR =
1020 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1021 + osnR * otR->irbase_sizes[iri];
1022 // Orbit coeff vector's full size:
1023 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1024 const size_t particle_fullsizeR = otR->bspecn;
1025 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1026 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1027 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1029 size_t fullvec_offsetC = 0;
1030 if (orbit_packedsizeR) // avoid crash on empty irrep
1031 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1032 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1033 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1034 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1035 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1036 // This is where the particle's orbit starts in the "packed" vector:
1037 const size_t packed_orbit_offsetC =
1038 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1039 + osnC * otC->irbase_sizes[iri];
1040 // Orbit coeff vector's full size:
1041 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1042 const size_t particle_fullsizeC = otC->bspecn;
1043 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1044 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1045 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1046 if (orbit_packedsizeC) { // avoid crash on empty irrep
1047 // tmp = P U
1048 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC]
1049 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1050 orbit_packedsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeC /*K*/,
1051 &one /*alpha*/, orig_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC/*A*/,
1052 packedlen/*ldA*/,
1053 omC + opiC*particle_fullsizeC /*B*/,
1054 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1055 tmp /*C*/, particle_fullsizeC /*LDC*/);
1057 // target[oiR|piR,oiC|piC] += U*[...] tmp[...]
1058 cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans,
1059 particle_fullsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeR /*K*/,
1060 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/,
1061 orbit_fullsizeR/*ldA*/,
1062 tmp /*B*/, particle_fullsizeC /*ldB*/, &one /*beta*/,
1063 target_full + full_len*fullvec_offsetR + fullvec_offsetC /*C*/,
1064 full_len /*ldC*/);
1066 fullvec_offsetC += otC->bspecn;
1068 fullvec_offsetR += otR->bspecn;
1071 free(tmp);
1072 return target_full;
1075 /// Projects a "big" vector onto an irrep.
1076 /** TODO doc */
1077 complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed,
1078 const complex double *orig_full, const qpms_scatsys_t *ss,
1079 const qpms_iri_t iri) {
1080 const size_t packedlen = ss->saecv_sizes[iri];
1081 if (!packedlen) return target_packed; // Empty irrep
1082 if (target_packed == NULL)
1083 QPMS_CRASHING_MALLOC(target_packed, packedlen*sizeof(complex double));
1084 memset(target_packed, 0, packedlen*sizeof(complex double));
1086 const complex double one = 1;
1088 size_t fullvec_offset = 0;
1089 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
1090 const qpms_ss_oti_t oti = ss->p_orbitinfo[pi].t;
1091 const qpms_ss_orbit_type_t *const ot = ss->orbit_types + oti;
1092 const qpms_ss_osn_t osn = ss->p_orbitinfo[pi].osn;
1093 const qpms_ss_orbit_pi_t opi = ss->p_orbitinfo[pi].p;
1094 // This is where the particle's orbit starts in the "packed" vector:
1095 const size_t packed_orbit_offset = ss->saecv_ot_offsets[iri*ss->orbit_type_count + oti]
1096 + osn * ot->irbase_sizes[iri];
1097 // Orbit coeff vector's full size:
1098 const size_t orbit_fullsize = ot->size * ot->bspecn;
1099 const size_t particle_fullsize = ot->bspecn;
1100 const size_t orbit_packedsize = ot->irbase_sizes[iri];
1101 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1102 const complex double *om = ot->irbases + ot->irbase_offsets[iri];
1103 if (orbit_packedsize) // avoid crash on empty irrep
1104 cblas_zgemv(CblasRowMajor/*order*/, CblasNoTrans/*transA*/,
1105 orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/,
1106 om + opi*particle_fullsize/*A*/, orbit_fullsize/*lda*/,
1107 orig_full+fullvec_offset/*X*/, 1/*incX*/,
1108 &one/*beta*/, target_packed+packed_orbit_offset/*Y*/, 1/*incY*/);
1109 fullvec_offset += ot->bspecn;
1111 return target_packed;
1114 /// Transforms a big "packed" vector into the full basis.
1115 /** TODO doc */
1116 complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full,
1117 const complex double *orig_packed, const qpms_scatsys_t *ss,
1118 const qpms_iri_t iri, bool add) {
1119 const size_t full_len = ss->fecv_size;
1120 if (target_full == NULL)
1121 QPMS_CRASHING_MALLOC(target_full, full_len*sizeof(complex double));
1122 if (!add) memset(target_full, 0, full_len*sizeof(complex double));
1124 const complex double one = 1;
1125 const size_t packedlen = ss->saecv_sizes[iri];
1126 if (!packedlen) return target_full; // Completely empty irrep
1128 size_t fullvec_offset = 0;
1129 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
1130 const qpms_ss_oti_t oti = ss->p_orbitinfo[pi].t;
1131 const qpms_ss_orbit_type_t *const ot = ss->orbit_types + oti;
1132 const qpms_ss_osn_t osn = ss->p_orbitinfo[pi].osn;
1133 const qpms_ss_orbit_pi_t opi = ss->p_orbitinfo[pi].p;
1134 // This is where the particle's orbit starts in the "packed" vector:
1135 const size_t packed_orbit_offset = ss->saecv_ot_offsets[iri*ss->orbit_type_count + oti]
1136 + osn * ot->irbase_sizes[iri];
1137 // Orbit coeff vector's full size:
1138 const size_t orbit_fullsize = ot->size * ot->bspecn;
1139 const size_t particle_fullsize = ot->bspecn;
1140 const size_t orbit_packedsize = ot->irbase_sizes[iri];
1141 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1142 const complex double *om = ot->irbases + ot->irbase_offsets[iri];
1144 if (orbit_packedsize) // empty irrep, avoid zgemv crashing.
1145 cblas_zgemv(CblasRowMajor/*order*/, CblasConjTrans/*transA*/,
1146 orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/, om + opi*particle_fullsize/*A*/,
1147 orbit_fullsize/*lda*/, orig_packed+packed_orbit_offset /*X*/, 1/*incX*/, &one/*beta*/,
1148 target_full+fullvec_offset/*Y*/, 1/*incY*/);
1150 fullvec_offset += ot->bspecn;
1152 return target_full;
1155 complex double *qpms_scatsys_build_translation_matrix_full(
1156 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1157 complex double *target,
1158 const qpms_scatsys_t *ss,
1159 complex double k ///< Wave number to use in the translation matrix.
1162 return qpms_scatsys_build_translation_matrix_e_full(
1163 target, ss, k, QPMS_HANKEL_PLUS);
1166 complex double *qpms_scatsyswk_build_translation_matrix_full(
1167 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1168 complex double *target,
1169 const qpms_scatsys_at_omega_k_t *sswk
1172 const qpms_scatsys_at_omega_t *ssw = sswk->ssw;
1173 const complex double wavenumber = ssw->wavenumber;
1174 const qpms_scatsys_t *ss = ssw->ss;
1175 qpms_ss_ensure_periodic(ss);
1176 const cart3_t k_cart3 = cart3_from_double_array(sswk->k);
1177 return qpms_scatsys_periodic_build_translation_matrix_full(target, ss, wavenumber, &k_cart3, sswk->eta);
1180 complex double *qpms_scatsys_build_translation_matrix_e_full(
1181 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1182 complex double *target,
1183 const qpms_scatsys_t *ss,
1184 complex double k, ///< Wave number to use in the translation matrix.
1185 qpms_bessel_t J ///< Bessel function type.
1188 qpms_ss_ensure_nonperiodic(ss);
1189 const size_t full_len = ss->fecv_size;
1190 if(!target)
1191 QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
1192 memset(target, 0, SQ(full_len) * sizeof(complex double)); //unnecessary?
1193 { // Non-diagonal part; M[piR, piC] = T[piR] S(piR<-piC)
1194 size_t fullvec_offsetR = 0;
1195 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) {
1196 const qpms_vswf_set_spec_t *bspecR = qpms_ss_bspec_pi(ss, piR);
1197 const cart3_t posR = ss->p[piR].pos;
1198 size_t fullvec_offsetC = 0;
1199 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
1200 const qpms_vswf_set_spec_t *bspecC = qpms_ss_bspec_pi(ss, piC);
1201 if(piC != piR) { // The diagonal will be dealt with later.
1202 const cart3_t posC = ss->p[piC].pos;
1203 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1204 target + fullvec_offsetR*full_len + fullvec_offsetC,
1205 bspecR, full_len, bspecC, 1,
1206 k, posR, posC, J));
1208 fullvec_offsetC += bspecC->n;
1210 assert(fullvec_offsetC == full_len);
1211 fullvec_offsetR += bspecR->n;
1213 assert(fullvec_offsetR == full_len);
1216 return target;
1219 static inline int qpms_ss_ppair_W32xy(const qpms_scatsys_t *ss,
1220 qpms_ss_pi_t pdest, qpms_ss_pi_t psrc, complex double wavenumber, const cart2_t kvector,
1221 complex double *target, const ptrdiff_t deststride, const ptrdiff_t srcstride,
1222 qpms_ewald_part parts, double eta) {
1223 const qpms_vswf_set_spec_t *srcspec = qpms_ss_bspec_pi(ss, psrc);
1224 const qpms_vswf_set_spec_t *destspec = qpms_ss_bspec_pi(ss, pdest);
1226 // This might be a bit arbitrary, roughly "copied" from Unitcell constructor. TODO review.
1227 const double maxR = sqrt(ss->per.unitcell_volume) * 64;
1228 const double maxK = 2048 * 2 * M_PI / maxR;
1230 return qpms_trans_calculator_get_trans_array_e32_e(ss->c,
1231 target, NULL /*err*/, destspec, deststride, srcspec, srcstride,
1232 eta, wavenumber,
1233 cart3xy2cart2(ss->per.lattice_basis[0]), cart3xy2cart2(ss->per.lattice_basis[1]),
1234 kvector,
1235 cart3_substract(ss->p[pdest].pos, ss->p[psrc].pos),
1236 maxR, maxK, parts);
1239 static inline int qpms_ss_ppair_W(const qpms_scatsys_t *ss,
1240 qpms_ss_pi_t pdest, qpms_ss_pi_t psrc, complex double wavenumber, const double wavevector[],
1241 complex double *target, const ptrdiff_t deststride, const ptrdiff_t srcstride,
1242 qpms_ewald_part parts, double eta) {
1243 if(ss->lattice_dimension == 2 && // Currently, we can only the xy-plane
1244 !ss->per.lattice_basis[0].z && !ss->per.lattice_basis[1].z &&
1245 !wavevector[2])
1246 return qpms_ss_ppair_W32xy(ss, pdest, psrc, wavenumber, cart2_from_double_array(wavevector),
1247 target, deststride, srcstride, parts, eta);
1248 else
1249 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1252 complex double *qpms_scatsys_periodic_build_translation_matrix_full(
1253 complex double *target, const qpms_scatsys_t *ss,
1254 complex double wavenumber, const cart3_t *wavevector, double eta) {
1255 QPMS_UNTESTED;
1256 qpms_ss_ensure_periodic(ss);
1257 if (eta == 0 || isnan(eta)) {
1258 double tmp[3];
1259 cart3_to_double_array(tmp, *wavevector);
1260 eta = qpms_ss_adjusted_eta(ss, wavenumber, tmp);
1263 const size_t full_len = ss->fecv_size;
1264 if(!target)
1265 QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
1266 const ptrdiff_t deststride = ss->fecv_size, srcstride = 1;
1267 // We have some limitations in the current implementation
1268 if(ss->lattice_dimension == 2 && // Currently, we can only the xy-plane
1269 !ss->per.lattice_basis[0].z && !ss->per.lattice_basis[1].z &&
1270 !wavevector->z) {
1271 for (qpms_ss_pi_t pd = 0; pd < ss->p_count; ++pd)
1272 for (qpms_ss_pi_t ps = 0; ps < ss->p_count; ++ps) {
1273 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W32xy(ss, pd, ps, wavenumber, cart3xy2cart2(*wavevector),
1274 target + deststride * ss->fecv_pstarts[pd] + srcstride * ss->fecv_pstarts[ps],
1275 deststride, srcstride, QPMS_EWALD_FULL, eta));
1277 } else
1278 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1279 return target;
1282 // Common implementation of qpms_scatsysw[k]_build_modeproblem_matrix_full
1283 static inline complex double *qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1284 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1285 complex double *target,
1286 const qpms_scatsys_at_omega_t *ssw,
1287 const double k[], // NULL if non-periodic
1288 const double eta // ignored if non-periodic
1291 const complex double wavenumber = ssw->wavenumber;
1292 const qpms_scatsys_t *ss = ssw->ss;
1293 const size_t full_len = ss->fecv_size;
1294 if(!target)
1295 QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
1296 complex double *tmp; // translation matrix, S or W
1297 QPMS_CRASHING_MALLOC(tmp, SQ(ss->max_bspecn) * sizeof(complex double));
1298 memset(target, 0, SQ(full_len) * sizeof(complex double));
1299 const complex double zero = 0, minusone = -1;
1300 { // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
1301 size_t fullvec_offsetR = 0;
1302 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) {
1303 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec;
1304 const cart3_t posR = ss->p[piR].pos;
1305 size_t fullvec_offsetC = 0;
1306 // dest particle T-matrix
1307 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1308 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
1309 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1310 if (k == NULL) { // non-periodic case
1311 if(piC != piR) { // No "self-interaction" in non-periodic case
1312 const cart3_t posC = ss->p[piC].pos;
1313 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1314 tmp, // tmp is S(piR<-piC)
1315 bspecR, bspecC->n, bspecC, 1,
1316 wavenumber, posR, posC, QPMS_HANKEL_PLUS));
1318 } else { // periodic case
1319 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W(ss, piR, piC, wavenumber, k,
1320 tmp /*target*/, bspecC->n /*deststride*/, 1 /*srcstride*/,
1321 QPMS_EWALD_FULL, eta));
1324 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1325 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1326 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1327 tmp/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1328 target + fullvec_offsetR*full_len + fullvec_offsetC /*c*/,
1329 full_len /*ldc*/);
1331 fullvec_offsetC += bspecC->n;
1333 fullvec_offsetR += bspecR->n;
1337 // Add the identity, diagonal part M[pi,pi] += 1
1338 for (size_t i = 0; i < full_len; ++i) target[full_len * i + i] += 1;
1340 free(tmp);
1341 return target;
1344 complex double *qpms_scatsysw_build_modeproblem_matrix_full(
1345 complex double *target, const qpms_scatsys_at_omega_t *ssw) {
1346 qpms_ss_ensure_nonperiodic_a(ssw->ss, "qpms_scatsyswk_build_modeproblem_matrix_full()");
1347 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1348 target, ssw, NULL, NAN);
1351 complex double *qpms_scatsyswk_build_modeproblem_matrix_full(
1352 complex double *target, const qpms_scatsys_at_omega_k_t *sswk)
1354 qpms_ss_ensure_periodic_a(sswk->ssw->ss, "qpms_scatsysw_build_modeproblem_matrix_full()");
1355 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(target, sswk->ssw, sswk->k, sswk->eta);
1359 // Serial reference implementation.
1360 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_serial(
1361 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1362 complex double *target_packed,
1363 const qpms_scatsys_at_omega_t *ssw,
1364 qpms_iri_t iri
1367 const qpms_scatsys_t *ss = ssw->ss;
1368 qpms_ss_ensure_nonperiodic(ss);
1369 const complex double k = ssw->wavenumber;
1370 const size_t packedlen = ss->saecv_sizes[iri];
1371 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1372 return target_packed;
1373 if (target_packed == NULL)
1374 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
1375 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1377 // some of the following workspaces are probably redundant; TODO optimize later.
1379 // workspaces for the uncompressed particle<-particle tranlation matrix block
1380 // and the result of multiplying with a T-matrix (times -1)
1381 complex double *Sblock, *TSblock;
1382 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1383 QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
1385 // Workspace for the intermediate particle-orbit matrix result
1386 complex double *tmp;
1387 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1389 const complex double one = 1, zero = 0, minusone = -1;
1391 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) { //Row loop
1392 const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t;
1393 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1394 const qpms_ss_osn_t osnR = ss->p_orbitinfo[piR].osn;
1395 const qpms_ss_orbit_pi_t opiR = ss->p_orbitinfo[piR].p;
1396 // This is where the particle's orbit starts in the "packed" vector:
1397 const size_t packed_orbit_offsetR =
1398 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1399 + osnR * otR->irbase_sizes[iri];
1400 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec;
1401 // Orbit coeff vector's full size:
1402 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1403 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1404 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1405 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1406 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1407 const cart3_t posR = ss->p[piR].pos;
1408 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1409 // dest particle T-matrix
1410 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1411 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1412 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1413 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1414 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1415 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1416 // This is where the particle's orbit starts in the "packed" vector:
1417 const size_t packed_orbit_offsetC =
1418 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1419 + osnC * otC->irbase_sizes[iri];
1420 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1421 // Orbit coeff vector's full size:
1422 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1423 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1424 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1425 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1426 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1428 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1429 if(piC != piR) { // non-diagonal, calculate TS
1430 const cart3_t posC = ss->p[piC].pos;
1431 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1432 Sblock, // Sblock is S(piR->piC)
1433 bspecR, bspecC->n, bspecC, 1,
1434 k, posR, posC, QPMS_HANKEL_PLUS));
1436 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1437 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1438 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1439 Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1440 TSblock /*c*/, bspecC->n /*ldc*/);
1441 } else { // diagonal, fill with diagonal +1
1442 for (size_t row = 0; row < bspecR->n; ++row)
1443 for (size_t col = 0; col < bspecC->n; ++col)
1444 TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
1447 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1448 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1449 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1450 &one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
1451 omC + opiC*particle_fullsizeC /*B*/,
1452 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1453 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1455 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1456 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1457 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1458 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1459 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1460 target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1461 packedlen /*ldC*/);
1466 free(tmp);
1467 free(Sblock);
1468 free(TSblock);
1469 return target_packed;
1472 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
1473 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1474 complex double *target_packed,
1475 const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri
1478 const qpms_scatsys_t *ss = ssw->ss;
1479 qpms_ss_ensure_nonperiodic(ss);
1480 const complex double k = ssw->wavenumber;
1481 const size_t packedlen = ss->saecv_sizes[iri];
1482 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1483 return target_packed;
1484 if (target_packed == NULL)
1485 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
1486 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1488 // some of the following workspaces are probably redundant; TODO optimize later.
1490 // workspaces for the uncompressed particle<-particle tranlation matrix block
1491 // and the result of multiplying with a T-matrix (times -1)
1492 complex double *Sblock, *TSblock;
1493 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1494 QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
1496 // Workspace for the intermediate particle-orbit matrix result
1497 complex double *tmp;
1498 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1500 const complex double one = 1, zero = 0, minusone = -1;
1502 for(qpms_ss_pi_t opistartR = 0; opistartR < ss->p_count;
1503 opistartR += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size //orbit_p_countR; might write a while() instead
1505 const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
1506 const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
1507 const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
1508 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1509 const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
1510 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1512 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1513 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1514 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[orbitstartpiR].tmatrix_id]->spec;
1515 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1516 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1517 // Orbit coeff vector's full size:
1518 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1519 // This is where the orbit starts in the "packed" vector:
1520 const size_t packed_orbit_offsetR =
1521 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1522 + osnR * otR->irbase_sizes[iri];
1523 for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
1524 qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
1525 assert(opiR == ss->p_orbitinfo[piR].p);
1526 assert(otiR == ss->p_orbitinfo[piR].t);
1527 assert(ss->p_orbitinfo[piR].osn == osnR);
1528 const cart3_t posR = ss->p[piR].pos;
1529 // dest particle T-matrix
1530 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1531 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1532 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1533 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1534 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1535 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1536 // This is where the particle's orbit starts in the "packed" vector:
1537 const size_t packed_orbit_offsetC =
1538 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1539 + osnC * otC->irbase_sizes[iri];
1540 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1541 // Orbit coeff vector's full size:
1542 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1543 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1544 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1545 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1546 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1548 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1549 if(piC != piR) { // non-diagonal, calculate TS
1550 const cart3_t posC = ss->p[piC].pos;
1551 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1552 Sblock, // Sblock is S(piR->piC)
1553 bspecR, bspecC->n, bspecC, 1,
1554 k, posR, posC, QPMS_HANKEL_PLUS));
1556 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1557 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1558 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1559 Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1560 TSblock /*c*/, bspecC->n /*ldc*/);
1561 } else { // diagonal, fill with diagonal +1
1562 for (size_t row = 0; row < bspecR->n; ++row)
1563 for (size_t col = 0; col < bspecC->n; ++col)
1564 TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
1567 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1568 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1569 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1570 &one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
1571 omC + opiC*particle_fullsizeC /*B*/,
1572 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1573 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1575 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1576 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1577 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1578 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1579 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1580 target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1581 packedlen /*ldC*/);
1587 free(tmp);
1588 free(Sblock);
1589 free(TSblock);
1590 return target_packed;
1593 struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg{
1594 const qpms_scatsys_at_omega_t *ssw;
1595 qpms_ss_pi_t *opistartR_ptr;
1596 pthread_mutex_t *opistartR_mutex;
1597 qpms_iri_t iri;
1598 complex double *target_packed;
1601 static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg)
1603 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1604 *a = arg;
1605 const qpms_scatsys_at_omega_t *ssw = a->ssw;
1606 const complex double k = ssw->wavenumber;
1607 const qpms_scatsys_t *ss = ssw->ss;
1608 const qpms_iri_t iri = a->iri;
1609 const size_t packedlen = ss->saecv_sizes[iri];
1611 // some of the following workspaces are probably redundant; TODO optimize later.
1613 // workspaces for the uncompressed particle<-particle tranlation matrix block
1614 // and the result of multiplying with a T-matrix (times -1)
1615 complex double *Sblock, *TSblock;
1616 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1617 QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
1619 // Workspace for the intermediate particle-orbit matrix result
1620 complex double *tmp;
1621 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1623 const complex double one = 1, zero = 0, minusone = -1;
1625 while(1) {
1626 // In the beginning, pick a target (row) orbit for this thread
1627 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a->opistartR_mutex));
1628 if(*(a->opistartR_ptr) >= ss->p_count) {// Everything is already done, end
1629 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1630 break;
1632 const qpms_ss_pi_t opistartR = *(a->opistartR_ptr);
1633 // Now increment it for another thread:
1634 *(a->opistartR_ptr) += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size;
1635 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1637 // Orbit picked (defined by opistartR), do the work.
1638 const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
1639 const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
1640 const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
1641 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1642 const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
1643 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1645 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1646 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1647 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[orbitstartpiR].tmatrix_id]->spec;
1648 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1649 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1650 // Orbit coeff vector's full size:
1651 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1652 // This is where the orbit starts in the "packed" vector:
1653 const size_t packed_orbit_offsetR =
1654 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1655 + osnR * otR->irbase_sizes[iri];
1656 for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
1657 qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
1658 assert(opiR == ss->p_orbitinfo[piR].p);
1659 assert(otiR == ss->p_orbitinfo[piR].t);
1660 assert(ss->p_orbitinfo[piR].osn == osnR);
1661 const cart3_t posR = ss->p[piR].pos;
1662 // dest particle T-matrix
1663 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1664 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1665 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1666 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1667 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1668 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1669 // This is where the particle's orbit starts in the "packed" vector:
1670 const size_t packed_orbit_offsetC =
1671 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1672 + osnC * otC->irbase_sizes[iri];
1673 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1674 // Orbit coeff vector's full size:
1675 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1676 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1677 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1678 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1679 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1681 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1682 if(piC != piR) { // non-diagonal, calculate TS
1683 const cart3_t posC = ss->p[piC].pos;
1684 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1685 Sblock, // Sblock is S(piR->piC)
1686 bspecR, bspecC->n, bspecC, 1,
1687 k, posR, posC, QPMS_HANKEL_PLUS));
1689 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1690 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1691 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1692 Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1693 TSblock /*c*/, bspecC->n /*ldc*/);
1694 } else { // diagonal, fill with diagonal +1
1695 for (size_t row = 0; row < bspecR->n; ++row)
1696 for (size_t col = 0; col < bspecC->n; ++col)
1697 TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
1700 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1701 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1702 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1703 &one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
1704 omC + opiC*particle_fullsizeC /*B*/,
1705 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1706 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1708 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1709 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1710 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1711 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1712 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1713 a->target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1714 packedlen /*ldC*/);
1723 free(tmp);
1724 free(Sblock);
1725 free(TSblock);
1726 return NULL;
1729 // this differs from the ...build_modeproblem_matrix... only by the `J`
1730 // maybe I should use this one there as well to save lines... TODO
1731 struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg{
1732 const qpms_scatsys_t *ss;
1733 qpms_ss_pi_t *opistartR_ptr;
1734 pthread_mutex_t *opistartR_mutex;
1735 qpms_iri_t iri;
1736 complex double *target_packed;
1737 complex double k;
1738 qpms_bessel_t J;
1741 static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread(void *arg)
1743 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1744 *a = arg;
1745 const qpms_scatsys_t *ss = a->ss;
1746 const qpms_iri_t iri = a->iri;
1747 const size_t packedlen = ss->saecv_sizes[iri];
1748 const qpms_bessel_t J = a->J;
1750 // some of the following workspaces are probably redundant; TODO optimize later.
1752 // workspace for the uncompressed particle<-particle tranlation matrix block
1753 complex double *Sblock;
1754 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1756 // Workspace for the intermediate particle-orbit matrix result
1757 complex double *tmp;
1758 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1760 const complex double one = 1, zero = 0;
1762 while(1) {
1763 // In the beginning, pick a target (row) orbit for this thread
1764 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a->opistartR_mutex));
1765 if(*(a->opistartR_ptr) >= ss->p_count) {// Everything is already done, end
1766 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1767 break;
1769 const qpms_ss_pi_t opistartR = *(a->opistartR_ptr);
1770 // Now increment it for another thread:
1771 *(a->opistartR_ptr) += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size;
1772 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1774 // Orbit picked (defined by opistartR), do the work.
1775 const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
1776 const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
1777 const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
1778 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1779 const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
1780 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1782 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1783 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1784 const qpms_vswf_set_spec_t *bspecR = qpms_ss_bspec_pi(ss, orbitstartpiR);
1785 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1786 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1787 // Orbit coeff vector's full size:
1788 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1789 // This is where the orbit starts in the "packed" vector:
1790 const size_t packed_orbit_offsetR =
1791 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1792 + osnR * otR->irbase_sizes[iri];
1793 for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
1794 qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
1795 assert(opiR == ss->p_orbitinfo[piR].p);
1796 assert(otiR == ss->p_orbitinfo[piR].t);
1797 assert(ss->p_orbitinfo[piR].osn == osnR);
1798 const cart3_t posR = ss->p[piR].pos;
1799 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1800 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1801 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1802 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1803 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1804 // This is where the particle's orbit starts in the "packed" vector:
1805 const size_t packed_orbit_offsetC =
1806 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1807 + osnC * otC->irbase_sizes[iri];
1808 const qpms_vswf_set_spec_t *bspecC = qpms_ss_bspec_pi(ss, piC);
1809 // Orbit coeff vector's full size:
1810 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1811 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1812 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1813 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1814 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1816 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1817 // THIS IS THE MAIN PART DIFFERENT FROM ...modeproblem...() TODO unify
1818 // somehow to save lines
1819 if(piC != piR) { // non-diagonal, calculate S
1820 const cart3_t posC = ss->p[piC].pos;
1821 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1822 Sblock, // Sblock is S(piR->piC)
1823 bspecR, bspecC->n, bspecC, 1,
1824 a->k, posR, posC, J));
1825 } else { // diagonal, fill with zeros; TODO does this make sense?
1826 // would unit matrix be better? or unit only for QPMS_BESSEL_REGULAR?
1827 for (size_t row = 0; row < bspecR->n; ++row)
1828 for (size_t col = 0; col < bspecC->n; ++col)
1829 Sblock[row * bspecC->n + col] = 0; //(row == col)? 1 : 0;
1832 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1833 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1834 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1835 &one /*alpha*/, Sblock/*A*/, particle_fullsizeC/*ldA*/,
1836 omC + opiC*particle_fullsizeC /*B*/,
1837 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1838 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1840 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1841 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1842 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1843 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1844 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1845 a->target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1846 packedlen /*ldC*/);
1855 free(tmp);
1856 free(Sblock);
1857 return NULL;
1860 // Almost the same as ...build_modeproblem_matrix_...parallelR
1861 // --> TODO write this in a more generic way to save LoC.
1862 complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
1863 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1864 complex double *target_packed,
1865 const qpms_scatsys_t *ss,
1866 qpms_iri_t iri,
1867 const complex double k,
1868 qpms_bessel_t J
1871 qpms_ss_ensure_nonperiodic(ss);
1872 QPMS_UNTESTED;
1873 const size_t packedlen = ss->saecv_sizes[iri];
1874 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1875 return target_packed;
1876 if (target_packed == NULL)
1877 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
1878 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1880 qpms_ss_pi_t opistartR = 0;
1881 pthread_mutex_t opistartR_mutex;
1882 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex, NULL));
1883 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1884 arg = {ss, &opistartR, &opistartR_mutex, iri, target_packed, k, J};
1886 // FIXME THIS IS NOT PORTABLE:
1887 long nthreads;
1888 if (qpms_scatsystem_nthreads_override > 0) {
1889 nthreads = qpms_scatsystem_nthreads_override;
1890 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "Using overriding value of %ld thread(s).",
1891 nthreads);
1892 } else {
1893 nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1894 if (nthreads < 1) {
1895 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1896 nthreads, qpms_scatsystem_nthreads_default);
1897 nthreads = qpms_scatsystem_nthreads_default;
1898 } else {
1899 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads);
1902 pthread_t thread_ids[nthreads];
1903 for(long thi = 0; thi < nthreads; ++thi)
1904 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids + thi, NULL,
1905 qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread,
1906 (void *) &arg));
1907 for(long thi = 0; thi < nthreads; ++thi) {
1908 void *retval;
1909 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids[thi], &retval));
1912 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex));
1913 return target_packed;
1917 // Parallel implementation, now default
1918 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
1919 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1920 complex double *target_packed,
1921 const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri
1924 qpms_ss_ensure_nonperiodic(ssw->ss);
1925 const size_t packedlen = ssw->ss->saecv_sizes[iri];
1926 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1927 return target_packed;
1928 if (target_packed == NULL)
1929 QPMS_CRASHING_MALLOC(target_packed,SQ(packedlen)*sizeof(complex double));
1930 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1932 qpms_ss_pi_t opistartR = 0;
1933 pthread_mutex_t opistartR_mutex;
1934 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex, NULL));
1935 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1936 arg = {ssw, &opistartR, &opistartR_mutex, iri, target_packed};
1938 // FIXME THIS IS NOT PORTABLE:
1939 long nthreads;
1940 if (qpms_scatsystem_nthreads_override > 0) {
1941 nthreads = qpms_scatsystem_nthreads_override;
1942 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "Using overriding value of %ld thread(s).",
1943 nthreads);
1944 } else {
1945 nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1946 if (nthreads < 1) {
1947 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1948 nthreads, qpms_scatsystem_nthreads_default);
1949 nthreads = qpms_scatsystem_nthreads_default;
1950 } else {
1951 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads);
1954 pthread_t thread_ids[nthreads];
1955 for(long thi = 0; thi < nthreads; ++thi)
1956 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids + thi, NULL,
1957 qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread,
1958 (void *) &arg));
1959 for(long thi = 0; thi < nthreads; ++thi) {
1960 void *retval;
1961 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids[thi], &retval));
1964 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex));
1965 return target_packed;
1969 complex double *qpms_scatsys_incident_field_vector_full(
1970 complex double *target_full, const qpms_scatsys_t *ss,
1971 qpms_incfield_t f, const void *args, bool add ) {
1972 QPMS_UNTESTED;
1973 if (!target_full) QPMS_CRASHING_CALLOC(target_full, ss->fecv_size,
1974 sizeof(complex double));
1975 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
1976 complex double *ptarget = target_full + ss->fecv_pstarts[pi];
1977 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_pi(ss, pi);
1978 const cart3_t pos = ss->p[pi].pos;
1979 QPMS_ENSURE_SUCCESS(f(ptarget, bspec, pos, args, add));
1981 return target_full;
1985 #if 0
1986 complex double *qpms_scatsys_incident_field_vector_irrep_packed(
1987 complex double *target_full, const qpms_scatsys_t *ss,
1988 const qpms_iri_t iri, qpms_incfield_t f,
1989 const void *args, bool add) {
1990 TODO;
1992 #endif
1995 complex double *qpms_scatsysw_apply_Tmatrices_full(
1996 complex double *target_full, const complex double *inc_full,
1997 const qpms_scatsys_at_omega_t *ssw) {
1998 QPMS_UNTESTED;
1999 const qpms_scatsys_t *ss = ssw->ss;
2000 if (!target_full) QPMS_CRASHING_CALLOC(target_full, ss->fecv_size,
2001 sizeof(complex double));
2002 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
2003 complex double *ptarget = target_full + ss->fecv_pstarts[pi];
2004 const complex double *psrc = inc_full + ss->fecv_pstarts[pi];
2005 // TODO check whether T-matrix is non-virtual after virtual t-matrices are implemented.
2006 const qpms_tmatrix_t *T = ssw->tm[ss->p[pi].tmatrix_id];
2007 qpms_apply_tmatrix(ptarget, psrc, T);
2009 return target_full;
2012 ccart3_t qpms_scatsys_scattered_E(const qpms_scatsys_t *ss,
2013 qpms_bessel_t btyp,
2014 const complex double k,
2015 const complex double *cvf,
2016 const cart3_t where
2018 QPMS_UNTESTED;
2019 ccart3_t res = {0,0,0};
2020 ccart3_t res_kc = {0,0,0}; // kahan sum compensation
2022 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
2023 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_pi(ss, pi);
2024 const cart3_t particle_pos = ss->p[pi].pos;
2025 const complex double *particle_cv = cvf + ss->fecv_pstarts[pi];
2027 const csph_t kr = sph_cscale(k, cart2sph(
2028 cart3_substract(where, particle_pos)));
2029 const csphvec_t E_sph = qpms_eval_uvswf(bspec, particle_cv, kr, btyp);
2030 const ccart3_t E_cart = csphvec2ccart_csph(E_sph, kr);
2031 ckahanadd(&(res.x), &(res_kc.x), E_cart.x);
2032 ckahanadd(&(res.y), &(res_kc.y), E_cart.y);
2033 ckahanadd(&(res.z), &(res_kc.z), E_cart.z);
2035 return res;
2038 ccart3_t qpms_scatsysw_scattered_E(const qpms_scatsys_at_omega_t *ssw,
2039 qpms_bessel_t btyp,
2040 const complex double *cvf, const cart3_t where) {
2041 qpms_ss_ensure_nonperiodic_a(ssw->ss, "qpms_scatsyswk_scattered_E()");
2042 return qpms_scatsys_scattered_E(ssw->ss, btyp, ssw->wavenumber,
2043 cvf, where);
2046 // Alternative implementation, using translation operator and regular dipole waves at zero
2047 ccart3_t qpms_scatsys_scattered_E__alt(const qpms_scatsys_t *ss,
2048 qpms_bessel_t btyp,
2049 const complex double k,
2050 const complex double *cvf,
2051 const cart3_t where
2053 QPMS_UNTESTED;
2054 qpms_ss_ensure_nonperiodic(ss);
2055 ccart3_t res = {0,0,0};
2056 ccart3_t res_kc = {0,0,0}; // kahan sum compensation
2058 static const int dipspecn = 3; // We have three basis vectors
2059 // bspec containing only electric dipoles
2060 const qpms_vswf_set_spec_t dipspec = {
2061 .n = dipspecn,
2062 .ilist = (qpms_uvswfi_t[]){
2063 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, -1, 1),
2064 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, 0, 1),
2065 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, +1, 1),
2067 .lMax=1, .lMax_M=0, .lMax_N=1, .lMax_L=-1,
2068 .capacity=0,
2069 .norm = ss->c->normalisation,
2072 ccart3_t regdipoles_0[dipspecn]; {
2073 const sph_t origin_sph = {.r = 0, .theta = M_PI_2, .phi=0}; // Should work with any theta/phi (TESTWORTHY)
2074 csphvec_t regdipoles_0_sph[dipspecn];
2075 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph, &dipspec,
2076 sph2csph(origin_sph), QPMS_BESSEL_REGULAR));
2077 for(int i = 0; i < dipspecn; ++i)
2078 regdipoles_0[i] = csphvec2ccart(regdipoles_0_sph[i], origin_sph);
2081 complex double *s; // Translation matrix
2082 QPMS_CRASHING_MALLOC(s, ss->max_bspecn * sizeof(*s) * dipspec.n);
2084 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
2085 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_pi(ss, pi);
2086 const cart3_t particle_pos = ss->p[pi].pos;
2087 const complex double *particle_cv = cvf + ss->fecv_pstarts[pi];
2089 const cart3_t origin_cart = {0, 0, 0};
2091 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(
2092 ss->c, s, &dipspec, 1, bspec, dipspecn, k, particle_pos, where, btyp));
2094 for(size_t i = 0; i < bspec->n; ++i)
2095 for(size_t j = 0; j < dipspecn; ++j){
2096 ccart3_t summand = ccart3_scale(particle_cv[i] * s[dipspecn*i+j], regdipoles_0[j]);
2097 ckahanadd(&(res.x), &(res_kc.x), summand.x);
2098 ckahanadd(&(res.y), &(res_kc.y), summand.y);
2099 ckahanadd(&(res.z), &(res_kc.z), summand.z);
2102 free(s);
2103 return res;
2106 ccart3_t qpms_scatsysw_scattered_E__alt(const qpms_scatsys_at_omega_t *ssw,
2107 qpms_bessel_t btyp, const complex double *cvf, const cart3_t where) {
2108 return qpms_scatsys_scattered_E__alt(ssw->ss, btyp, ssw->wavenumber,
2109 cvf, where);
2112 // For periodic lattices, we use directly the "alternative" implementation,
2113 // using translation operator and regular dipole waves at zero
2114 ccart3_t qpms_scatsyswk_scattered_E(const qpms_scatsys_at_omega_k_t *sswk,
2115 qpms_bessel_t btyp,
2116 const complex double *cvf,
2117 const cart3_t where
2119 QPMS_UNTESTED;
2120 if (btyp != QPMS_HANKEL_PLUS)
2121 QPMS_NOT_IMPLEMENTED("Only scattered field with first kind Hankel functions currently implemented.");
2122 const qpms_scatsys_t *ss = sswk->ssw->ss;
2123 if (ss->lattice_dimension != 2)
2124 QPMS_NOT_IMPLEMENTED("Only 2D-periodic lattices implemented");
2125 ccart3_t res = {0,0,0};
2126 ccart3_t res_kc = {0,0,0}; // kahan sum compensation
2128 static const int dipspecn = 3; // We have three basis vectors
2129 // bspec containing only electric dipoles
2130 const qpms_vswf_set_spec_t dipspec = {
2131 .n = dipspecn,
2132 .ilist = (qpms_uvswfi_t[]){
2133 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, -1, 1),
2134 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, 0, 1),
2135 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, +1, 1),
2137 .lMax=1, .lMax_M=0, .lMax_N=1, .lMax_L=-1,
2138 .capacity=0,
2139 .norm = ss->c->normalisation,
2142 ccart3_t regdipoles_0[dipspecn]; {
2143 const sph_t origin_sph = {.r = 0, .theta = M_PI_2, .phi=0}; // Should work with any theta/phi (TESTWORTHY)
2144 csphvec_t regdipoles_0_sph[dipspecn];
2145 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph, &dipspec,
2146 sph2csph(origin_sph), QPMS_BESSEL_REGULAR));
2147 for(int i = 0; i < dipspecn; ++i)
2148 regdipoles_0[i] = csphvec2ccart(regdipoles_0_sph[i], origin_sph);
2151 complex double *s; // Translation matrix
2152 QPMS_CRASHING_MALLOC(s, ss->max_bspecn * sizeof(*s) * dipspec.n);
2154 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
2155 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_pi(ss, pi);
2156 const cart3_t particle_pos = ss->p[pi].pos;
2157 const complex double *particle_cv = cvf + ss->fecv_pstarts[pi];
2159 const cart3_t origin_cart = {0, 0, 0};
2161 QPMS_ASSERT(sswk->k[2] == 0); // At least not implemented now
2162 QPMS_ASSERT(ss->per.lattice_basis[0].z == 0);
2163 QPMS_ASSERT(ss->per.lattice_basis[1].z == 0);
2165 // Same choices as in qpms_ss_ppair_W32xy; TODO make it more dynamic
2166 const double maxR = sqrt(ss->per.unitcell_volume) * 64;
2167 const double maxK = 2048 * 2 * M_PI / maxR;
2169 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_e32(
2170 ss->c, s, NULL,
2171 &dipspec, 1, bspec, dipspecn,
2172 sswk->eta, sswk->ssw->wavenumber,
2173 cart3xy2cart2(ss->per.lattice_basis[0]), cart3xy2cart2(ss->per.lattice_basis[1]),
2174 cart2_from_double_array(sswk->k), cart3_substract(where, particle_pos) /*CHECKSIGN*/,
2175 maxR, maxK));
2177 for(size_t i = 0; i < bspec->n; ++i)
2178 for(size_t j = 0; j < dipspecn; ++j){
2179 ccart3_t summand = ccart3_scale(particle_cv[i] * s[dipspecn*i+j], regdipoles_0[j]);
2180 ckahanadd(&(res.x), &(res_kc.x), summand.x);
2181 ckahanadd(&(res.y), &(res_kc.y), summand.y);
2182 ckahanadd(&(res.z), &(res_kc.z), summand.z);
2185 free(s);
2186 return res;
2189 qpms_errno_t qpms_scatsyswk_scattered_field_basis(
2190 ccart3_t *target,
2191 const qpms_scatsys_at_omega_k_t *sswk,
2192 const qpms_bessel_t btyp,
2193 const cart3_t where
2195 QPMS_UNTESTED;
2196 if (btyp != QPMS_HANKEL_PLUS)
2197 QPMS_NOT_IMPLEMENTED("Only scattered field with first kind Hankel functions currently implemented.");
2198 const qpms_scatsys_t *ss = sswk->ssw->ss;
2199 if (ss->lattice_dimension != 2)
2200 QPMS_NOT_IMPLEMENTED("Only 2D-periodic lattices implemented");
2201 //ccart3_t res = {0,0,0};
2202 //ccart3_t res_kc = {0,0,0}; // kahan sum compensation
2204 static const int dipspecn = 3; // We have three basis vectors
2205 // bspec containing only electric dipoles
2206 const qpms_vswf_set_spec_t dipspec = {
2207 .n = dipspecn,
2208 .ilist = (qpms_uvswfi_t[]){
2209 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, -1, 1),
2210 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, 0, 1),
2211 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC, +1, 1),
2213 .lMax=1, .lMax_M=0, .lMax_N=1, .lMax_L=-1,
2214 .capacity=0,
2215 .norm = ss->c->normalisation,
2218 ccart3_t regdipoles_0[dipspecn]; {
2219 const sph_t origin_sph = {.r = 0, .theta = M_PI_2, .phi=0}; // Should work with any theta/phi (TESTWORTHY)
2220 csphvec_t regdipoles_0_sph[dipspecn];
2221 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph, &dipspec,
2222 sph2csph(origin_sph), QPMS_BESSEL_REGULAR));
2223 for(int i = 0; i < dipspecn; ++i)
2224 regdipoles_0[i] = csphvec2ccart(regdipoles_0_sph[i], origin_sph);
2227 complex double *s; // Translation matrix
2228 QPMS_CRASHING_MALLOC(s, ss->max_bspecn * sizeof(*s) * dipspec.n);
2230 memset(target, 0, ss->fecv_size * sizeof(*target));
2232 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
2233 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_pi(ss, pi);
2234 const cart3_t particle_pos = ss->p[pi].pos;
2235 //const complex double *particle_cv = cvf + ss->fecv_pstarts[pi];
2237 const cart3_t origin_cart = {0, 0, 0};
2239 QPMS_ASSERT(sswk->k[2] == 0); // At least not implemented now
2240 QPMS_ASSERT(ss->per.lattice_basis[0].z == 0);
2241 QPMS_ASSERT(ss->per.lattice_basis[1].z == 0);
2243 // Same choices as in qpms_ss_ppair_W32xy; TODO make it more dynamic
2244 const double maxR = sqrt(ss->per.unitcell_volume) * 64;
2245 const double maxK = 2048 * 2 * M_PI / maxR;
2247 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_e32(
2248 ss->c, s, NULL,
2249 &dipspec, 1, bspec, dipspecn,
2250 sswk->eta, sswk->ssw->wavenumber,
2251 cart3xy2cart2(ss->per.lattice_basis[0]), cart3xy2cart2(ss->per.lattice_basis[1]),
2252 cart2_from_double_array(sswk->k), cart3_substract(where, particle_pos) /*CHECKSIGN*/,
2253 maxR, maxK));
2255 for(size_t i = 0; i < bspec->n; ++i)
2256 for(size_t j = 0; j < dipspecn; ++j){
2257 target[ss->fecv_pstarts[pi] + i] = ccart3_add(target[ss->fecv_pstarts[pi] + i],
2258 ccart3_scale(s[dipspecn*i+j], regdipoles_0[j]));
2259 //ccart3_t summand = ccart3_scale(particle_cv[i] * s[dipspecn*i+j], regdipoles_0[j]);
2260 //ckahanadd(&(res.x), &(res_kc.x), summand.x);
2261 //ckahanadd(&(res.y), &(res_kc.y), summand.y);
2262 //ckahanadd(&(res.z), &(res_kc.z), summand.z);
2265 free(s);
2266 return QPMS_SUCCESS;
2269 #if 0
2270 ccart3_t qpms_scatsys_scattered_E_irrep(const qpms_scatsys_t *ss,
2271 qpms_iri_t iri, const complex double *cvr, cart3_t where) {
2272 TODO;
2274 #endif
2276 void qpms_ss_LU_free(qpms_ss_LU lu) {
2277 free(lu.a);
2278 free(lu.ipiv);
2281 qpms_ss_LU qpms_scatsysw_modeproblem_matrix_full_factorise(complex double *mpmatrix_full,
2282 int *target_piv, const qpms_scatsys_at_omega_t *ssw, const qpms_scatsys_at_omega_k_t *sswk) {
2283 if (sswk) {
2284 QPMS_ASSERT(sswk->ssw == ssw || !ssw);
2285 ssw = sswk->ssw;
2286 QPMS_ASSERT(ssw->ss->lattice_dimension > 0);
2287 } else {
2288 QPMS_ASSERT(ssw->ss->lattice_dimension == 0);
2290 const qpms_scatsys_t *ss = ssw->ss;
2291 QPMS_ENSURE(mpmatrix_full, "A non-NULL pointer to the pre-calculated mode matrix is required");
2292 if (!target_piv) QPMS_CRASHING_MALLOC(target_piv, ss->fecv_size * sizeof(int));
2293 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR, ss->fecv_size, ss->fecv_size,
2294 mpmatrix_full, ss->fecv_size, target_piv));
2295 qpms_ss_LU lu;
2296 lu.a = mpmatrix_full;
2297 lu.ipiv = target_piv;
2298 lu.ssw = ssw;
2299 lu.sswk = sswk;
2300 lu.full = true;
2301 lu.iri = -1;
2302 return lu;
2305 qpms_ss_LU qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(complex double *mpmatrix_packed,
2306 int *target_piv, const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri) {
2307 QPMS_ENSURE(mpmatrix_packed, "A non-NULL pointer to the pre-calculated mode matrix is required");
2308 qpms_ss_ensure_nonperiodic(ssw->ss);
2309 size_t n = ssw->ss->saecv_sizes[iri];
2310 if (!target_piv) QPMS_CRASHING_MALLOC(target_piv, n * sizeof(int));
2311 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR, n, n,
2312 mpmatrix_packed, n, target_piv));
2313 qpms_ss_LU lu;
2314 lu.a = mpmatrix_packed;
2315 lu.ipiv = target_piv;
2316 lu.ssw = ssw;
2317 lu.full = false;
2318 lu.iri = iri;
2319 return lu;
2322 qpms_ss_LU qpms_scatsysw_build_modeproblem_matrix_full_LU(
2323 complex double *target, int *target_piv,
2324 const qpms_scatsys_at_omega_t *ssw){
2325 qpms_ss_ensure_nonperiodic_a(ssw->ss, "qpms_scatsyswk_build_modeproblem_matrix_full_LU()");
2326 target = qpms_scatsysw_build_modeproblem_matrix_full(target, ssw);
2327 return qpms_scatsysw_modeproblem_matrix_full_factorise(target, target_piv, ssw, NULL);
2330 qpms_ss_LU qpms_scatsyswk_build_modeproblem_matrix_full_LU(
2331 complex double *target, int *target_piv,
2332 const qpms_scatsys_at_omega_k_t *sswk){
2333 target = qpms_scatsyswk_build_modeproblem_matrix_full(target, sswk);
2334 return qpms_scatsysw_modeproblem_matrix_full_factorise(target, target_piv, sswk->ssw, sswk);
2337 qpms_ss_LU qpms_scatsysw_build_modeproblem_matrix_irrep_packed_LU(
2338 complex double *target, int *target_piv,
2339 const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri){
2340 target = qpms_scatsysw_build_modeproblem_matrix_irrep_packed(target, ssw, iri);
2341 return qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(target, target_piv, ssw, iri);
2344 complex double *qpms_scatsys_scatter_solve(
2345 complex double *f, const complex double *a_inc, qpms_ss_LU lu) {
2346 const size_t n = lu.full ? lu.ssw->ss->fecv_size : lu.ssw->ss->saecv_sizes[lu.iri];
2347 if (!f) QPMS_CRASHING_MALLOC(f, n * sizeof(complex double));
2348 memcpy(f, a_inc, n*sizeof(complex double)); // It will be rewritten by zgetrs
2349 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR, 'N' /*trans*/, n /*n*/, 1 /*nrhs number of right hand sides*/,
2350 lu.a /*a*/, n /*lda*/, lu.ipiv /*ipiv*/, f/*b*/, 1 /*ldb; CHECKME*/));
2351 return f;
2354 struct qpms_scatsys_finite_eval_Beyn_ImTS_param {
2355 const qpms_scatsys_t *ss;
2356 qpms_iri_t iri;
2359 /// Wrapper for Beyn algorithm (non-periodic system)
2360 static int qpms_scatsys_finite_eval_Beyn_ImTS(complex double *target,
2361 size_t m, complex double omega, void *params) {
2362 const struct qpms_scatsys_finite_eval_Beyn_ImTS_param *p = params;
2363 qpms_scatsys_at_omega_t *ssw = qpms_scatsys_at_omega(p->ss, omega);
2364 QPMS_ENSURE(ssw != NULL, "qpms_scatsys_at_omega() returned NULL");
2365 if (p->iri == QPMS_NO_IRREP) {
2366 QPMS_ASSERT(m == p->ss->fecv_size);
2367 QPMS_ENSURE(NULL != qpms_scatsysw_build_modeproblem_matrix_full(
2368 target, ssw),
2369 "qpms_scatsysw_build_modeproblem_matrix_full() returned NULL");
2370 } else {
2371 QPMS_ASSERT(m == p->ss->saecv_sizes[p->iri]);
2372 QPMS_ENSURE(NULL != qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
2373 target, ssw, p->iri),
2374 "qpms_scatsysw_build_modeproblem_matrix_irrep_packed() returned NULL");
2376 qpms_scatsys_at_omega_free(ssw);
2377 return QPMS_SUCCESS;
2380 beyn_result_t *qpms_scatsys_finite_find_eigenmodes(
2381 const qpms_scatsys_t * const ss, const qpms_iri_t iri,
2382 complex double omega_centre, double omega_rr, double omega_ri,
2383 size_t contour_npoints,
2384 double rank_tol, size_t rank_sel_min, double res_tol) {
2385 qpms_ss_ensure_nonperiodic_a(ss, "qpms_scatsys_periodic_find_eigenmodes()");
2386 size_t n; // matrix dimension
2387 if (qpms_iri_is_valid(ss->sym, iri)) {
2388 n = ss->saecv_sizes[iri];
2389 } else if (iri == QPMS_NO_IRREP) {
2390 n = ss->fecv_size;
2391 } else QPMS_WTF;
2393 beyn_contour_t *contour = beyn_contour_ellipse(omega_centre,
2394 omega_rr, omega_ri, contour_npoints);
2396 struct qpms_scatsys_finite_eval_Beyn_ImTS_param p = {ss, iri};
2397 beyn_result_t *result = beyn_solve(n, n /* possibly make smaller? */,
2398 qpms_scatsys_finite_eval_Beyn_ImTS, NULL, (void *) &p,
2399 contour, rank_tol, rank_sel_min, res_tol);
2400 QPMS_ENSURE(result != NULL, "beyn_solve() returned NULL");
2402 free(contour);
2403 return result;
2406 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param {
2407 const qpms_scatsys_t *ss;
2408 const double *k; ///< Wavevector in cartesian coordinates.
2411 /// Wrapper for Beyn algorithm (periodic system)
2412 static int qpms_scatsys_periodic_eval_Beyn_ImTW(complex double *target,
2413 size_t m, complex double omega, void *params){
2414 const struct qpms_scatsys_periodic_eval_Beyn_ImTW_param *p = params;
2415 qpms_scatsys_at_omega_t *ssw = qpms_scatsys_at_omega(p->ss, omega);
2416 QPMS_ENSURE(ssw != NULL, "qpms_scatsys_at_omega() returned NULL");
2417 qpms_scatsys_at_omega_k_t sswk = {
2418 .ssw = ssw,
2419 .k = {p->k[0], p->k[1], p->k[2]},
2420 .eta = qpms_ss_adjusted_eta(p->ss, ssw->wavenumber, p->k)
2422 QPMS_ASSERT(m == p->ss->fecv_size);
2423 QPMS_ENSURE(NULL !=
2424 qpms_scatsyswk_build_modeproblem_matrix_full(target, &sswk),
2425 "qpms_scatsyswk_build_modeproblem_matrix_full() returned NULL");
2426 qpms_scatsys_at_omega_free(ssw);
2427 return QPMS_SUCCESS;
2430 beyn_result_t *qpms_scatsys_periodic_find_eigenmodes(
2431 const qpms_scatsys_t * const ss, const double k[3],
2432 complex double omega_centre, double omega_rr, double omega_ri,
2433 size_t contour_npoints,
2434 double rank_tol, size_t rank_sel_min, double res_tol) {
2435 qpms_ss_ensure_periodic_a(ss, "qpms_scatsys_finite_find_eigenmodes()");
2436 size_t n = ss->fecv_size; // matrix dimension
2438 beyn_contour_t *contour = beyn_contour_ellipse(omega_centre,
2439 omega_rr, omega_ri, contour_npoints);
2441 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param p = {ss, k};
2442 beyn_result_t *result = beyn_solve(n, n,
2443 qpms_scatsys_periodic_eval_Beyn_ImTW, NULL, (void *) &p,
2444 contour, rank_tol, rank_sel_min, res_tol);
2445 QPMS_ENSURE(result != NULL, "beyn_solve() returned NULL");
2447 free(contour);
2448 return result;