Rewriting scatsystem: compiles without errors now.
[qpms.git] / qpms / scatsystem.c
blob8aa43f6cafd10751725ee830628f8e2f3b8fe2b7
1 #include <stdlib.h>
2 #define lapack_int int
3 #define lapack_complex_double complex double
4 #define lapack_complex_double_real(z) (creal(z))
5 #define lapack_complex_double_imag(z) (cimag(z))
6 #include <lapacke.h>
7 #include <cblas.h>
8 #include <lapacke.h>
9 #include "scatsystem.h"
10 #include "indexing.h"
11 #include "vswf.h"
12 #include "groups.h"
13 #include "symmetries.h"
14 #include <assert.h>
15 #include <unistd.h>
16 #include "vectors.h"
17 #include "quaternions.h"
18 #include <string.h>
19 #include "qpms_error.h"
20 #include "translations.h"
21 #include "tmatrices.h"
22 #include <pthread.h>
23 #include "kahansum.h"
24 #include "tolerances.h"
26 #ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
27 #include "qpmsblas.h"
28 #define SERIAL_ZGEMM qpms_zgemm
29 #else
30 #define SERIAL_ZGEMM cblas_zgemm
31 #endif
33 #define SQ(x) ((x)*(x))
34 #define QPMS_SCATSYS_LEN_RTOL 1e-13
35 #define QPMS_SCATSYS_TMATRIX_ATOL 1e-12
36 #define QPMS_SCATSYS_TMATRIX_RTOL 1e-12
38 long qpms_scatsystem_nthreads_default = 4;
39 long qpms_scatsystem_nthreads_override = 0;
41 void qpms_scatsystem_set_nthreads(long n) {
42 qpms_scatsystem_nthreads_override = n;
45 // ------------ Stupid implementation of qpms_scatsys_apply_symmetry() -------------
47 #define MIN(x,y) (((x)<(y))?(x):(y))
48 #define MAX(x,y) (((x)>(y))?(x):(y))
50 // The following functions are just to make qpms_scatsys_apply_symmetry more readable.
51 // They are not to be used elsewhere, as they do not perform any array capacity checks etc.
53 /// Compare two orbit types in a scattering system.
54 static bool orbit_types_equal(const qpms_ss_orbit_type_t *a, const qpms_ss_orbit_type_t *b) {
55 if (a->size != b->size) return false;
56 if (memcmp(a->action, b->action, a->size*sizeof(qpms_ss_orbit_pi_t))) return false;
57 if (memcmp(a->tmatrices, b->tmatrices, a->size*sizeof(qpms_ss_tmi_t))) return false;
58 return true;
61 // Extend the action to all particles in orbit if only the action on the 0th
62 // particle has been filled.
63 static void extend_orbit_action(qpms_scatsys_t *ss, qpms_ss_orbit_type_t *ot) {
64 for(qpms_ss_orbit_pi_t src = 1; src < ot->size; ++src) {
65 // find any element g that sends 0 to src:
66 qpms_gmi_t g;
67 for (g = 0; g < ss->sym->order; ++g)
68 if (ot->action[g] == src) break;
69 assert(g < ss->sym->order);
70 // invg sends src to 0
71 qpms_gmi_t invg = qpms_finite_group_inv(ss->sym, g);
72 for (qpms_gmi_t f = 0; f < ss->sym->order; ++f)
73 // if f sends 0 to dest, then f * invg sends src to dest
74 ot->action[src * ss->sym->order +
75 qpms_finite_group_mul(ss->sym,f,invg)] = ot->action[f];
79 //Add orbit type to a scattering system, updating the ss->otspace_end pointer accordingly
80 static void add_orbit_type(qpms_scatsys_t *ss, const qpms_ss_orbit_type_t *ot_current) {
81 qpms_ss_orbit_type_t * const ot_new = & (ss->orbit_types[ss->orbit_type_count]);
82 ot_new->size = ot_current->size;
84 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_tmi(ss, ot_current->tmatrices[0]);
85 const size_t bspecn = bspec->n;
86 ot_new->bspecn = bspecn;
88 const size_t actionsiz = sizeof(ot_current->action[0]) * ot_current->size
89 * ss->sym->order;
90 ot_new->action = (void *) (ss->otspace_end);
91 memcpy(ot_new->action, ot_current->action, actionsiz);
92 // N.B. we copied mostly garbage ^^^, most of it is initialized just now:
93 extend_orbit_action(ss, ot_new);
94 #ifdef DUMP_ORBIT_ACTION
95 fprintf(stderr, "Orbit action:\n");
96 for (qpms_gmi_t gmi = 0; gmi < ss->sym->order; ++gmi) {
97 const qpms_quat4d_t q = qpms_quat_4d_from_2c(ss->sym->rep3d[gmi].rot);
98 fprintf(stderr, "%+d[%g %g %g %g] ", (int)ss->sym->rep3d[gmi].det,
99 q.c1, q.ci, q.cj, q.ck);
100 fprintf(stderr, "%s\t", (ss->sym->permrep && ss->sym->permrep[gmi])?
101 ss->sym->permrep[gmi] : "");
102 for (qpms_ss_orbit_pi_t pi = 0; pi < ot_new->size; ++pi)
103 fprintf(stderr, "%d\t", (int) ot_new->action[gmi + pi*ss->sym->order]);
104 fprintf(stderr, "\n");
106 #endif
107 ss->otspace_end += actionsiz;
109 const size_t tmsiz = sizeof(ot_current->tmatrices[0]) * ot_current->size;
110 ot_new->tmatrices = (void *) (ss->otspace_end);
111 memcpy(ot_new->tmatrices, ot_current->tmatrices, tmsiz);
112 ss->otspace_end += tmsiz;
114 const size_t irbase_sizes_siz = sizeof(ot_new->irbase_sizes[0]) * ss->sym->nirreps;
115 ot_new->irbase_sizes = (void *) (ss->otspace_end);
116 ss->otspace_end += irbase_sizes_siz;
117 ot_new->irbase_cumsizes = (void *) (ss->otspace_end);
118 ss->otspace_end += irbase_sizes_siz;
119 ot_new->irbase_offsets = (void *) (ss->otspace_end);
120 ss->otspace_end += irbase_sizes_siz;
122 const size_t irbases_siz = sizeof(ot_new->irbases[0]) * SQ(ot_new->size * bspecn);
123 ot_new->irbases = (void *) (ss->otspace_end);
124 ss->otspace_end += irbases_siz;
126 size_t lastbs, bs_cumsum = 0;
127 for(qpms_iri_t iri = 0; iri < ss->sym->nirreps; ++iri) {
128 ot_new->irbase_offsets[iri] = bs_cumsum * bspecn * ot_new->size;
129 qpms_orbit_irrep_basis(&lastbs,
130 ot_new->irbases + bs_cumsum*ot_new->size*bspecn,
131 ot_new, bspec, ss->sym, iri);
132 ot_new->irbase_sizes[iri] = lastbs;
133 bs_cumsum += lastbs;
134 ot_new->irbase_cumsizes[iri] = bs_cumsum;
136 if(bs_cumsum != ot_new->size * bspecn)
137 qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
138 "The cumulative size of the symmetry-adapted bases is wrong; "
139 "expected %d = %d * %d, got %d.",
140 ot_new->size * bspecn, ot_new->size, bspecn, bs_cumsum);
141 ot_new->instance_count = 0;
142 ss->orbit_type_count++;
146 // Almost 200 lines. This whole thing deserves a rewrite!
147 qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
148 const qpms_finite_group_t *sym, complex double omega,
149 const qpms_tolerance_spec_t *tol) {
150 // TODO check data sanity
152 qpms_l_t lMax = 0; // the overall lMax of all base specs.
153 qpms_normalisation_t normalisation = QPMS_NORMALISATION_UNDEF;
155 // First, determine the rough radius of the array; it should be nonzero
156 // in order to particle position equivalence work correctly
157 double lenscale = 0;
159 double minx = +INFINITY, miny = +INFINITY, minz = +INFINITY;
160 double maxx = -INFINITY, maxy = -INFINITY, maxz = -INFINITY;
161 for (qpms_ss_pi_t i = 0; i < orig->p_count; ++i) {
162 minx = MIN(minx,orig->p[i].pos.x);
163 miny = MIN(miny,orig->p[i].pos.y);
164 minz = MIN(minz,orig->p[i].pos.z);
165 maxx = MAX(maxx,orig->p[i].pos.x);
166 maxy = MAX(maxy,orig->p[i].pos.y);
167 maxz = MAX(maxz,orig->p[i].pos.z);
169 lenscale = (fabs(maxx)+fabs(maxy)+fabs(maxz)+(maxx-minx)+(maxy-miny)+(maxz-minz)) / 3;
172 // Second, check that there are no duplicit positions in the input system.
173 for (qpms_ss_pi_t i = 0; i < orig->p_count; ++i)
174 for (qpms_ss_pi_t j = 0; j < i; ++j)
175 assert(!cart3_isclose(orig->p[i].pos, orig->p[j].pos, 0, QPMS_SCATSYS_LEN_RTOL * lenscale));
178 // Allocate T-matrix, particle and particle orbit info arrays
179 qpms_scatsys_t *ss;
180 QPMS_CRASHING_MALLOC(ss, sizeof(qpms_scatsys_t));
181 ss->lenscale = lenscale;
182 ss->sym = sym;
184 // Copy the qpms_tmatrix_fuction_t from orig
185 ss->tmg_count = orig->tmg_count;
186 QPMS_CRASHING_MALLOC(ss->tmg, ss->tmg_count * sizeof(*(ss->tmg)));
187 memcpy(ss->tmg, orig->tmg, ss->tmg_count * sizeof(*(ss->tmg)));
189 ss->tm_capacity = sym->order * orig->tm_count;
190 QPMS_CRASHING_MALLOC(ss->tm, ss->tm_capacity * sizeof(*(ss->tm)));
192 ss->p_capacity = sym->order * orig->p_count;
193 QPMS_CRASHING_MALLOC(ss->p, ss->p_capacity * sizeof(qpms_particle_tid_t));
194 QPMS_CRASHING_MALLOC(ss->p_orbitinfo, ss->p_capacity * sizeof(qpms_ss_particle_orbitinfo_t));
195 for (qpms_ss_pi_t pi = 0; pi < ss->p_capacity; ++pi) {
196 ss->p_orbitinfo[pi].t = QPMS_SS_P_ORBITINFO_UNDEF;
197 ss->p_orbitinfo[pi].p = QPMS_SS_P_ORBITINFO_UNDEF;
200 // Evaluate the original T-matrices at omega
201 qpms_tmatrix_t **tm_orig_omega;
202 QPMS_CRASHING_MALLOC(tm_orig_omega, orig->tmg_count * sizeof(*tm_orig_omega));
203 for(qpms_ss_tmgi_t i = 0; i < orig->tmg_count; ++i)
204 tm_orig_omega[i] = qpms_tmatrix_init_from_function(orig->tmg[i], omega);
206 // Evaluate the medium and derived T-matrices at omega.
207 qpms_scatsys_at_omega_t *ssw;
208 QPMS_CRASHING_MALLOC(ssw, sizeof(*ssw)); // returned
209 ssw->ss = ss;
210 ssw->omega = omega;
211 ssw->medium = qpms_epsmu_generator_eval(ss->medium, omega);
212 ssw->wavenumber = qpms_wavenumber(omega, ssw->medium);
213 // we will be using ss->tm_capacity also for ssw->tm
214 QPMS_CRASHING_MALLOC(ssw->tm, ss->tm_capacity * sizeof(*(ssw->tm))); // returned
216 // Evaluate T-matrices at omega; checking for duplicities
218 ss->max_bspecn = 0; // We'll need it later.for memory alloc estimates.
220 qpms_ss_tmi_t tm_dupl_remap[ss->tm_capacity]; // Auxilliary array to label remapping the indices after ignoring t-matrix duplicities; VLA!
221 ss->tm_count = 0;
222 for (qpms_ss_tmi_t i = 0; i < orig->tm_count; ++i) {
223 qpms_tmatrix_t *ti = qpms_tmatrix_apply_operation(&(orig->tm[i].op), tm_orig_omega[orig->tm[i].tmgi]);
224 qpms_ss_tmi_t j;
225 for (j = 0; j < ss->tm_count; ++j)
226 if (qpms_tmatrix_isclose(ssw->tm[i], ssw->tm[j], tol->rtol, tol->atol)) {
227 break;
229 if (j == ss->tm_count) { // duplicity not found, save both the "abstract" and "at omega" T-matrices
230 qpms_tmatrix_operation_copy(&ss->tm[j].op, &orig->tm[j].op);
231 ss->tm[j].tmgi = orig->tm[j].tmgi; // T-matrix functions are preserved.
232 ssw->tm[j] = ti;
233 ss->max_bspecn = MAX(ssw->tm[j]->spec->n, ss->max_bspecn);
234 lMax = MAX(lMax, ssw->tm[j]->spec->lMax);
235 ++(ss->tm_count);
237 else qpms_tmatrix_free(ti);
238 tm_dupl_remap[i] = j;
239 if (normalisation == QPMS_NORMALISATION_UNDEF)
240 normalisation = ssw->tm[i]->spec->norm;
241 // We expect all bspec norms to be the same.
242 else QPMS_ENSURE(normalisation == ssw->tm[j]->spec->norm,
243 "Normalisation convention must be the same for all T-matrices."
244 " %d != %d\n", normalisation, ssw->tm[j]->spec->norm);
247 // Free the original T-matrices at omega
248 for(qpms_ss_tmgi_t i = 0; i < orig->tmg_count; ++i)
249 qpms_tmatrix_free(tm_orig_omega[i]);
250 free(tm_orig_omega);
252 // Copy particles, remapping the t-matrix indices
253 for (qpms_ss_pi_t i = 0; i < orig->p_count; ++(i)) {
254 ss->p[i] = orig->p[i];
255 ss->p[i].tmatrix_id = tm_dupl_remap[ss->p[i].tmatrix_id];
257 ss->p_count = orig->p_count;
259 // allocate t-matrix symmetry map
260 QPMS_CRASHING_MALLOC(ss->tm_sym_map, sizeof(qpms_ss_tmi_t) * sym->order * sym->order * ss->tm_count);
262 // Extend the T-matrices list by the symmetry operations
263 for (qpms_ss_tmi_t tmi = 0; tmi < ss->tm_count; ++tmi)
264 for (qpms_gmi_t gmi = 0; gmi < sym->order; ++gmi){
265 const size_t d = ssw->tm[tmi]->spec->n;
266 complex double *m;
267 QPMS_CRASHING_MALLOC(m, d*d*sizeof(complex double)); // ownership passed to ss->tm[ss->tm_count].op
268 qpms_irot3_uvswfi_dense(m, ssw->tm[tmi]->spec, sym->rep3d[gmi]);
269 qpms_tmatrix_t *transformed = qpms_tmatrix_apply_symop(ssw->tm[tmi], m);
270 qpms_ss_tmi_t tmj;
271 for (tmj = 0; tmj < ss->tm_count; ++tmj)
272 if (qpms_tmatrix_isclose(transformed, ssw->tm[tmj], tol->rtol, tol->atol))
273 break;
274 if (tmj < ss->tm_count) { // HIT, transformed T-matrix already exists
275 //TODO some "rounding error cleanup" symmetrisation could be performed here?
276 qpms_tmatrix_free(transformed);
277 } else { // MISS, save the matrix (also the "abstract" one)
278 ssw->tm[ss->tm_count] = transformed;
279 qpms_tmatrix_operation_compose_chain_init(&(ss->tm[ss->tm_count].op), 2, 1);
280 struct qpms_tmatrix_operation_compose_chain * const o = &(ss->tm[ss->tm_count].op.op.compose_chain);
281 o->ops[0] = & ss->tm[tmj].op; // Let's just borrow this
282 o->ops_owned[0] = false;
283 o->opmem[0].typ = QPMS_TMATRIX_OPERATION_LRMATRIX;
284 o->opmem[0].op.lrmatrix.m = m;
285 o->opmem[0].op.lrmatrix.m_size = d * d;
286 o->ops[1] = o->opmem;
287 o->ops_owned[1] = true;
288 ++(ss->tm_count);
290 ss->tm_sym_map[gmi + tmi * sym->order] = tmj; // In any case, tmj now indexes the correct transformed matrix
292 // Possibly free some space using the new ss->tm_count instead of (old) ss->tm_count*sym->order
293 QPMS_CRASHING_REALLOC(ss->tm_sym_map, sizeof(qpms_ss_tmi_t) * sym->order * ss->tm_count);
294 // tm could be realloc'd as well, but those are just pointers, not likely many.
297 // allocate particle symmetry map
298 QPMS_CRASHING_MALLOC(ss->p_sym_map, sizeof(qpms_ss_pi_t) * sym->order * sym->order * ss->p_count);
299 // allocate orbit type array (TODO realloc later if too long)
300 ss->orbit_type_count = 0;
301 QPMS_CRASHING_CALLOC(ss->orbit_types, ss->p_count, sizeof(qpms_ss_orbit_type_t));
303 QPMS_CRASHING_MALLOC(ss->otspace, // reallocate later
304 (sizeof(qpms_ss_orbit_pi_t) * sym->order * sym->order
305 + sizeof(qpms_ss_tmi_t) * sym->order
306 + 3*sizeof(size_t) * sym->nirreps
307 + sizeof(complex double) * SQ(sym->order * ss->max_bspecn)) * ss->p_count
309 ss->otspace_end = ss->otspace;
311 // Workspace for the orbit type determination
312 qpms_ss_orbit_type_t ot_current;
313 qpms_ss_orbit_pi_t ot_current_action[sym->order * sym->order];
314 qpms_ss_tmi_t ot_current_tmatrices[sym->order];
316 qpms_ss_pi_t current_orbit[sym->order];
317 ot_current.action = ot_current_action;
318 ot_current.tmatrices = ot_current_tmatrices;
321 // Extend the particle list by the symmetry operations, check that particles mapped by symmetry ops on themselves
322 // have the correct symmetry
323 // TODO this could be sped up to O(npart * log(npart)); let's see later whether needed.
324 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
325 const bool new_orbit = (ss->p_orbitinfo[pi].t == QPMS_SS_P_ORBITINFO_UNDEF); // TODO build the orbit!!!
326 if (new_orbit){
327 current_orbit[0] = pi;
328 ot_current.size = 1;
329 ot_current.tmatrices[0] = ss->p[pi].tmatrix_id;
330 #ifdef DUMP_PARTICLE_POSITIONS
331 cart3_t pos = ss->p[pi].pos;
332 fprintf(stderr, "An orbit [%.4g, %.4g, %.4g] => ", pos.x, pos.y, pos.z);
333 #endif
336 for (qpms_gmi_t gmi = 0; gmi < sym->order; ++gmi) {
337 cart3_t newpoint = qpms_irot3_apply_cart3(sym->rep3d[gmi], ss->p[pi].pos);
338 qpms_ss_tmi_t new_tmi = ss->tm_sym_map[gmi + ss->p[pi].tmatrix_id * sym->order]; // transformed t-matrix index
339 qpms_ss_pi_t pj;
340 for (pj = 0; pj < ss->p_count; ++pj)
341 if (cart3_isclose(newpoint, ss->p[pj].pos, 0, ss->lenscale * QPMS_SCATSYS_LEN_RTOL)) {
342 if (new_tmi != ss->p[pj].tmatrix_id)
343 qpms_pr_error("The %d. particle with coords (%lg, %lg, %lg) "
344 "is mapped to %d. another (or itself) with cords (%lg, %lg, %lg) "
345 "without having the required symmetry", (int)pi,
346 ss->p[pi].pos.x, ss->p[pi].pos.y, ss->p[pi].pos.z,
347 (int)pj, ss->p[pj].pos.x, ss->p[pj].pos.y, ss->p[pj].pos.z);
348 break;
350 if (pj < ss->p_count) { // HIT, the particle is transformed to an "existing" one.
352 } else { // MISS, the symmetry transforms the particle to a new location, so add it.
353 qpms_particle_tid_t newparticle = {newpoint, new_tmi};
354 ss->p[ss->p_count] = newparticle;
355 ++(ss->p_count);
356 #ifdef DUMP_PARTICLE_POSITIONS
357 if(new_orbit)
358 fprintf(stderr, "[%.4g, %.4g, %.4g] ", newpoint.x, newpoint.y, newpoint.z);
359 #endif
361 ss->p_sym_map[gmi + pi * sym->order] = pj;
363 if (new_orbit) {
364 // Now check whether the particle (result of the symmetry op) is already in the current orbit
365 qpms_ss_orbit_pi_t opj;
366 for (opj = 0; opj < ot_current.size; ++opj)
367 if (current_orbit[opj] == pj) break; // HIT, pj already on current orbit
368 if (opj == ot_current.size) { // MISS, pj is new on the orbit, extend the size and set the T-matrix id
369 current_orbit[opj] = pj;
370 ++ot_current.size;
371 ot_current.tmatrices[opj] = ss->p[pj].tmatrix_id;
373 ot_current.action[gmi] = opj;
376 if (new_orbit) { // Now compare if the new orbit corresponds to some of the existing types.
377 #ifdef DUMP_PARTICLE_POSITIONS
378 fputc('\n', stderr);
379 #endif
380 qpms_ss_oti_t oti;
381 for(oti = 0; oti < ss->orbit_type_count; ++oti)
382 if (orbit_types_equal(&ot_current, &(ss->orbit_types[oti]))) break; // HIT, orbit type already exists
383 assert(0 == sym->order % ot_current.size);
384 if (oti == ss->orbit_type_count) // MISS, add the new orbit type
385 add_orbit_type(ss, &ot_current);
387 // Walk through the orbit again and set up the orbit info of the particles
388 for (qpms_ss_orbit_pi_t opi = 0; opi < ot_current.size; ++opi) {
389 const qpms_ss_pi_t pi_opi = current_orbit[opi];
390 ss->p_orbitinfo[pi_opi].t = oti;
391 ss->p_orbitinfo[pi_opi].p = opi;
392 ss->p_orbitinfo[pi_opi].osn = ss->orbit_types[oti].instance_count;
394 ss->orbit_types[oti].instance_count++;
397 // Possibly free some space using the new ss->p_count instead of (old) ss->p_count*sym->order
398 QPMS_CRASHING_REALLOC(ss->p_sym_map, sizeof(qpms_ss_pi_t) * sym->order * ss->p_count);
399 QPMS_CRASHING_REALLOC(ss->p, sizeof(qpms_particle_tid_t) * ss->p_count);
400 QPMS_CRASHING_REALLOC(ss->p_orbitinfo, sizeof(qpms_ss_particle_orbitinfo_t)*ss->p_count);
401 ss->p_capacity = ss->p_count;
403 { // Reallocate the orbit type data space and update the pointers if needed.
404 size_t otspace_sz = ss->otspace_end - ss->otspace;
405 char *old_otspace = ss->otspace;
406 QPMS_CRASHING_REALLOC(ss->otspace, otspace_sz);
407 ptrdiff_t shift = ss->otspace - old_otspace;
408 if(shift) {
409 for (size_t oi = 0; oi < ss->orbit_type_count; ++oi) {
410 ss->orbit_types[oi].action = (void *)(((char *) (ss->orbit_types[oi].action)) + shift);
411 ss->orbit_types[oi].tmatrices = (void *)(((char *) (ss->orbit_types[oi].tmatrices)) + shift);
412 ss->orbit_types[oi].irbase_sizes = (void *)(((char *) (ss->orbit_types[oi].irbase_sizes)) + shift);
413 ss->orbit_types[oi].irbase_cumsizes = (void *)(((char *) (ss->orbit_types[oi].irbase_cumsizes)) + shift);
414 ss->orbit_types[oi].irbase_offsets = (void *)(((char *) (ss->orbit_types[oi].irbase_offsets)) + shift);
415 ss->orbit_types[oi].irbases = (void *)(((char *) (ss->orbit_types[oi].irbases)) + shift);
417 ss->otspace_end += shift;
421 // Set ss->fecv_size and ss->fecv_pstarts
422 ss->fecv_size = 0;
423 QPMS_CRASHING_MALLOC(ss->fecv_pstarts, ss->p_count * sizeof(size_t));
424 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
425 ss->fecv_pstarts[pi] = ss->fecv_size;
426 ss->fecv_size += ssw->tm[ss->p[pi].tmatrix_id]->spec->n; // That's a lot of dereferencing!
429 QPMS_CRASHING_MALLOC(ss->saecv_sizes, sizeof(size_t) * sym->nirreps);
430 QPMS_CRASHING_MALLOC(ss->saecv_ot_offsets, sizeof(size_t) * sym->nirreps * ss->orbit_type_count);
431 for(qpms_iri_t iri = 0; iri < sym->nirreps; ++iri) {
432 ss->saecv_sizes[iri] = 0;
433 for(qpms_ss_oti_t oti = 0; oti < ss->orbit_type_count; ++oti) {
434 ss->saecv_ot_offsets[iri * ss->orbit_type_count + oti] = ss->saecv_sizes[iri];
435 const qpms_ss_orbit_type_t *ot = &(ss->orbit_types[oti]);
436 ss->saecv_sizes[iri] += ot->instance_count * ot->irbase_sizes[iri];
440 qpms_ss_pi_t p_ot_cumsum = 0;
441 for (qpms_ss_oti_t oti = 0; oti < ss->orbit_type_count; ++oti) {
442 qpms_ss_orbit_type_t *ot = ss->orbit_types + oti;
443 ot->p_offset = p_ot_cumsum;
444 p_ot_cumsum += ot->size * ot->instance_count;
447 // Set ss->p_by_orbit[]
448 QPMS_CRASHING_MALLOC(ss->p_by_orbit, sizeof(qpms_ss_pi_t) * ss->p_count);
449 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
450 const qpms_ss_particle_orbitinfo_t *oi = ss->p_orbitinfo + pi;
451 const qpms_ss_oti_t oti = oi->t;
452 const qpms_ss_orbit_type_t *ot = ss->orbit_types + oti;
453 ss->p_by_orbit[ot->p_offset + ot->size * oi->osn + oi->p] = pi;
456 ss->c = qpms_trans_calculator_init(lMax, normalisation);
457 return ssw;
461 void qpms_scatsys_free(qpms_scatsys_t *ss) {
462 if(ss) {
463 free(ss->tm);
464 free(ss->tmg);
465 free(ss->p);
466 free(ss->fecv_pstarts);
467 free(ss->tm_sym_map);
468 free(ss->p_sym_map);
469 free(ss->otspace);
470 free(ss->p_orbitinfo);
471 free(ss->orbit_types);
472 free(ss->saecv_sizes);
473 free(ss->p_by_orbit);
474 qpms_trans_calculator_free(ss->c);
476 free(ss);
481 // (copypasta from symmetries.c)
482 // TODO at some point, maybe support also other norms.
483 // (perhaps just use qpms_normalisation_t_factor() at the right places)
484 static inline void check_norm_compat(const qpms_vswf_set_spec_t *s)
486 switch (s->norm & QPMS_NORMALISATION_NORM_BITS) {
487 case QPMS_NORMALISATION_NORM_POWER:
488 break;
489 case QPMS_NORMALISATION_NORM_SPHARM:
490 break;
491 default:
492 QPMS_WTF; // Only SPHARM and POWER norms are supported right now.
496 complex double *qpms_orbit_action_matrix(complex double *target,
497 const qpms_ss_orbit_type_t *ot, const qpms_vswf_set_spec_t *bspec,
498 const qpms_finite_group_t *sym, const qpms_gmi_t g) {
499 assert(sym); assert(g < sym->order);
500 assert(sym->rep3d);
501 assert(ot); assert(ot->size > 0);
502 // check_norm_compat(bspec); not needed here, the qpms_irot3_uvswfi_dense should complain if necessary
503 const size_t n = bspec->n;
504 const qpms_gmi_t N = ot->size;
505 if (target == NULL)
506 QPMS_CRASHING_MALLOC(target, n*n*N*N*sizeof(complex double));
507 memset(target, 0, n*n*N*N*sizeof(complex double));
508 complex double tmp[n][n]; // this is the 'per-particle' action
509 qpms_irot3_uvswfi_dense(tmp[0], bspec, sym->rep3d[g]);
510 for(qpms_ss_orbit_pi_t Col = 0; Col < ot->size; ++Col) {
511 // Row is the 'destination' of the symmetry operation, Col is the 'source'
512 const qpms_ss_orbit_pi_t Row = ot->action[sym->order * Col + g];
513 for(size_t row = 0; row < bspec->n; ++row)
514 for(size_t col = 0; col < bspec->n; ++col)
515 target[n*n*N*Row + n*Col + n*N*row + col] = conj(tmp[row][col]); //CHECKCONJ
517 #ifdef DUMP_ACTIONMATRIX
518 fprintf(stderr,"%d: %s\n",
519 (int) g, (sym->permrep && sym->permrep[g])?
520 sym->permrep[g] : "");
521 for (size_t Row = 0; Row < ot->size; ++Row) {
522 fprintf(stderr, "--------------------------\n");
523 for (size_t row = 0; row < bspec->n; ++row) {
524 for (size_t Col = 0; Col < ot->size; ++Col) {
525 fprintf(stderr, "| ");
526 for (size_t col = 0; col < bspec->n; ++col)
527 fprintf(stderr, "%+2.3f%+2.3fj ", creal(target[n*n*N*Row + n*Col + n*N*row + col]),cimag(target[n*n*N*Row + n*Col + n*N*row + col]));
529 fprintf(stderr, "|\n");
532 fprintf(stderr, "-------------------------------\n\n");
533 #endif
535 return target;
538 complex double *qpms_orbit_irrep_projector_matrix(complex double *target,
539 const qpms_ss_orbit_type_t *ot, const qpms_vswf_set_spec_t *bspec,
540 const qpms_finite_group_t *sym, const qpms_iri_t iri) {
541 assert(sym);
542 assert(sym->rep3d);
543 assert(ot); assert(ot->size > 0);
544 assert(iri < sym->nirreps); assert(sym->irreps);
545 // check_norm_compat(bspec); // probably not needed here, let the called functions complain if necessary, but CHEKME
546 const size_t n = bspec->n;
547 const qpms_gmi_t N = ot->size;
548 if (target == NULL)
549 QPMS_CRASHING_MALLOC(target, n*n*N*N*sizeof(complex double));
550 memset(target, 0, n*n*N*N*sizeof(complex double));
551 // Workspace for the orbit group action matrices
552 complex double *tmp = malloc(n*n*N*N*sizeof(complex double));
553 const int d = sym->irreps[iri].dim;
554 double prefac = d / (double) sym->order;
555 for(int partner = 0; partner < d; ++partner) {
556 for(qpms_gmi_t g = 0; g < sym->order; ++g) {
557 // We use the diagonal elements of D_g
558 complex double D_g_conj = sym->irreps[iri].m[g*d*d + partner*d + partner];
559 #ifdef DUMP_ACTIONMATRIX
560 fprintf(stderr,"(factor %+g%+gj) ", creal(D_g_conj), cimag(D_g_conj));
561 #endif
562 qpms_orbit_action_matrix(tmp, ot, bspec, sym, g);
563 // TODO kahan sum?
564 for(size_t i = 0; i < n*n*N*N; ++i)
565 target[i] += prefac * D_g_conj * tmp[i];
568 free(tmp);
569 #ifdef DUMP_PROJECTORMATRIX
570 fprintf(stderr,"Projector %d (%s):\n", (int) iri,
571 sym->irreps[iri].name?sym->irreps[iri].name:"");
572 for (size_t Row = 0; Row < ot->size; ++Row) {
573 fprintf(stderr, "--------------------------\n");
574 for (size_t row = 0; row < bspec->n; ++row) {
575 for (size_t Col = 0; Col < ot->size; ++Col) {
576 fprintf(stderr, "| ");
577 for (size_t col = 0; col < bspec->n; ++col)
578 fprintf(stderr, "%+2.3f%+2.3fj ", creal(target[n*n*N*Row + n*Col + n*N*row + col]),cimag(target[n*n*N*Row + n*Col + n*N*row + col]));
580 fprintf(stderr, "|\n");
583 fprintf(stderr, "-------------------------------\n\n");
584 #endif
585 return target;
588 #define SVD_ATOL 1e-8
590 complex double *qpms_orbit_irrep_basis(size_t *basis_size,
591 complex double *target,
592 const qpms_ss_orbit_type_t *ot, const qpms_vswf_set_spec_t *bspec,
593 const qpms_finite_group_t *sym, const qpms_iri_t iri) {
594 assert(sym);
595 assert(sym->rep3d);
596 assert(ot); assert(ot->size > 0);
597 assert(iri < sym->nirreps); assert(sym->irreps);
598 check_norm_compat(bspec); // Here I'm not sure; CHECKME
599 const size_t n = bspec->n;
600 const qpms_gmi_t N = ot->size;
601 const bool newtarget = (target == NULL);
602 if (newtarget)
603 QPMS_CRASHING_MALLOC(target,n*n*N*N*sizeof(complex double));
604 memset(target, 0, n*n*N*N*sizeof(complex double));
606 // Get the projector (also workspace for right sg. vect.)
607 complex double *projector = qpms_orbit_irrep_projector_matrix(NULL,
608 ot, bspec, sym, iri);
609 if(!projector) abort();
610 // Workspace for the right singular vectors.
611 complex double *V_H; QPMS_CRASHING_MALLOC(V_H, n*n*N*N*sizeof(complex double));
612 // THIS SHOULD NOT BE NECESSARY
613 complex double *U; QPMS_CRASHING_MALLOC(U, n*n*N*N*sizeof(complex double));
614 double *s; QPMS_CRASHING_MALLOC(s, n*N*sizeof(double));
616 int info = LAPACKE_zgesdd(LAPACK_ROW_MAJOR,
617 'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
618 n*N /* m */, n*N /* n */, projector /* a */, n*N /* lda */,
619 s /* s */, U /* u */, n*N /* ldu, irrelev. */, V_H /* vt */,
620 n*N /* ldvt */);
621 if (info) qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
622 "Something went wrong with the SVD.");
625 size_t bs;
626 for(bs = 0; bs < n*N; ++bs) {
627 #if 0
628 qpms_pr_debug_at_flf(__FILE__, __LINE__, __func__,
629 "%d. irrep, %zd. SV: %.16lf", (int) iri, bs, s[bs]);
630 #endif
631 if(s[bs] > 1 + SVD_ATOL) qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
632 "%zd. SV too large: %.16lf", bs, s[bs]);
633 if(s[bs] > SVD_ATOL && fabs(1-s[bs]) > SVD_ATOL)
634 qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
635 "%zd. SV in the 'wrong' interval: %.16lf", bs, s[bs]);
636 if(s[bs] < SVD_ATOL) break;
639 memcpy(target, V_H, bs*N*n*sizeof(complex double));
640 if(newtarget) QPMS_CRASHING_REALLOC(target, bs*N*n*sizeof(complex double));
641 if(basis_size) *basis_size = bs;
643 free(U);
644 free(V_H);
645 free(projector);
646 return target;
649 complex double *qpms_scatsys_irrep_transform_matrix(complex double *U,
650 const qpms_scatsys_t *ss, qpms_iri_t iri) {
651 const size_t packedlen = ss->saecv_sizes[iri];
652 const size_t full_len = ss->fecv_size;
653 if (U == NULL)
654 QPMS_CRASHING_MALLOC(U,full_len * packedlen * sizeof(complex double));
655 memset(U, 0, full_len * packedlen * sizeof(complex double));
657 size_t fullvec_offset = 0;
659 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
660 const qpms_ss_oti_t oti = ss->p_orbitinfo[pi].t;
661 const qpms_ss_orbit_type_t *const ot = ss->orbit_types + oti;
662 const qpms_ss_osn_t osn = ss->p_orbitinfo[pi].osn;
663 const qpms_ss_orbit_pi_t opi = ss->p_orbitinfo[pi].p;
664 // This is where the particle's orbit starts in the "packed" vector:
665 const size_t packed_orbit_offset = ss->saecv_ot_offsets[iri*ss->orbit_type_count + oti]
666 + osn * ot->irbase_sizes[iri];
667 // Orbit coeff vector's full size:
668 const size_t orbit_fullsize = ot->size * ot->bspecn;
669 const size_t orbit_packedsize = ot->irbase_sizes[iri];
670 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
671 const complex double *om = ot->irbases + ot->irbase_offsets[iri];
673 for (size_t prow = 0; prow < orbit_packedsize; ++prow)
674 for (size_t pcol = 0; pcol < ot->bspecn; ++pcol)
675 U[full_len * (packed_orbit_offset + prow) + (fullvec_offset + pcol)]
676 = om[orbit_fullsize * prow + (opi * ot->bspecn + pcol)];
677 fullvec_offset += ot->bspecn;
680 return U;
683 complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed,
684 const complex double *orig_full, const qpms_scatsys_t *ss,
685 qpms_iri_t iri){
686 const size_t packedlen = ss->saecv_sizes[iri];
687 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
688 return target_packed;
689 const size_t full_len = ss->fecv_size;
690 if (target_packed == NULL)
691 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
692 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
694 // Workspace for the intermediate matrix
695 complex double *tmp;
696 QPMS_CRASHING_MALLOC(tmp, full_len * packedlen * sizeof(complex double));
698 complex double *U = qpms_scatsys_irrep_transform_matrix(NULL, ss, iri);
700 const complex double one = 1, zero = 0;
702 // tmp = F U*
703 cblas_zgemm(
704 CblasRowMajor, CblasNoTrans, CblasConjTrans,
705 full_len /*M*/, packedlen /*N*/, full_len /*K*/,
706 &one /*alpha*/, orig_full/*A*/, full_len/*ldA*/,
707 U /*B*/, full_len/*ldB*/,
708 &zero /*beta*/, tmp /*C*/, packedlen /*LDC*/);
709 // target = U tmp
710 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
711 packedlen /*M*/, packedlen /*N*/, full_len /*K*/,
712 &one /*alpha*/, U/*A*/, full_len/*ldA*/,
713 tmp /*B*/, packedlen /*ldB*/, &zero /*beta*/,
714 target_packed /*C*/, packedlen /*ldC*/);
716 free(tmp);
717 free(U);
718 return target_packed;
721 /// Transforms a big "packed" matrix into the full basis (trivial implementation).
722 complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full,
723 const complex double *orig_packed, const qpms_scatsys_t *ss,
724 qpms_iri_t iri, bool add){
725 const size_t packedlen = ss->saecv_sizes[iri];
726 const size_t full_len = ss->fecv_size;
727 if (target_full == NULL)
728 QPMS_CRASHING_MALLOC(target_full, SQ(full_len)*sizeof(complex double));
729 if(!add) memset(target_full, 0, SQ(full_len)*sizeof(complex double));
731 if(!packedlen) return target_full; // Empty irrep, do nothing.
733 // Workspace for the intermediate matrix result
734 complex double *tmp;
735 QPMS_CRASHING_MALLOC(tmp, full_len * packedlen * sizeof(complex double));
737 complex double *U = qpms_scatsys_irrep_transform_matrix(NULL, ss, iri);
739 const complex double one = 1, zero = 0;
741 // tmp = P U
742 cblas_zgemm(
743 CblasRowMajor, CblasNoTrans, CblasNoTrans,
744 packedlen /*M*/, full_len /*N*/, packedlen /*K*/,
745 &one /*alpha*/, orig_packed/*A*/, packedlen/*ldA*/,
746 U /*B*/, full_len/*ldB*/,
747 &zero /*beta*/, tmp /*C*/, full_len /*LDC*/);
748 // target += U* tmp
749 cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans,
750 full_len /*M*/, full_len /*N*/, packedlen /*K*/,
751 &one /*alpha*/, U/*A*/, full_len/*ldA*/,
752 tmp /*B*/, full_len /*ldB*/, &one /*beta*/,
753 target_full /*C*/, full_len /*ldC*/);
754 free(tmp);
755 free(U);
756 return target_full;
759 complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed,
760 const complex double *orig_full, const qpms_scatsys_t *ss,
761 qpms_iri_t iri){
762 const size_t packedlen = ss->saecv_sizes[iri];
763 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
764 return target_packed;
765 const size_t full_len = ss->fecv_size;
766 if (target_packed == NULL)
767 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
768 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
770 // Workspace for the intermediate particle-orbit matrix result
771 complex double *tmp;
772 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
774 const complex double one = 1, zero = 0;
776 size_t fullvec_offsetR = 0;
777 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) { //Row loop
778 const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t;
779 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
780 const qpms_ss_osn_t osnR = ss->p_orbitinfo[piR].osn;
781 const qpms_ss_orbit_pi_t opiR = ss->p_orbitinfo[piR].p;
782 // This is where the particle's orbit starts in the "packed" vector:
783 const size_t packed_orbit_offsetR =
784 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
785 + osnR * otR->irbase_sizes[iri];
786 // Orbit coeff vector's full size:
787 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
788 const size_t particle_fullsizeR = otR->bspecn;
789 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
790 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
791 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
793 size_t fullvec_offsetC = 0;
794 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
795 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
796 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
797 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
798 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
799 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
800 // This is where the particle's orbit starts in the "packed" vector:
801 const size_t packed_orbit_offsetC =
802 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
803 + osnC * otC->irbase_sizes[iri];
804 // Orbit coeff vector's full size:
805 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
806 const size_t particle_fullsizeC = otC->bspecn;
807 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
808 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
809 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
811 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
812 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
813 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
814 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
815 &one /*alpha*/, orig_full + full_len*fullvec_offsetR + fullvec_offsetC/*A*/,
816 full_len/*ldA*/,
817 omC + opiC*particle_fullsizeC /*B*/,
818 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
819 tmp /*C*/, orbit_packedsizeC /*LDC*/);
821 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
822 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
823 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
824 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/,
825 orbit_fullsizeR/*ldA*/,
826 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
827 target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
828 packedlen /*ldC*/);
830 fullvec_offsetC += otC->bspecn;
833 fullvec_offsetR += otR->bspecn;
835 free(tmp);
836 return target_packed;
840 /// Transforms a big "packed" matrix into the full basis.
841 /** TODO doc */
842 complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full,
843 const complex double *orig_packed, const qpms_scatsys_t *ss,
844 qpms_iri_t iri, bool add){
845 const size_t packedlen = ss->saecv_sizes[iri];
846 const size_t full_len = ss->fecv_size;
847 if (target_full == NULL)
848 QPMS_CRASHING_MALLOC(target_full, SQ(full_len)*sizeof(complex double));
849 if(!add) memset(target_full, 0, SQ(full_len)*sizeof(complex double));
851 if(!packedlen) return target_full; // Empty irrep, do nothing.
853 // Workspace for the intermediate particle-orbit matrix result
854 complex double *tmp;
855 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
857 const complex double one = 1, zero = 0;
859 size_t fullvec_offsetR = 0;
860 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) { //Row loop
861 const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t;
862 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
863 const qpms_ss_osn_t osnR = ss->p_orbitinfo[piR].osn;
864 const qpms_ss_orbit_pi_t opiR = ss->p_orbitinfo[piR].p;
865 // This is where the particle's orbit starts in the "packed" vector:
866 const size_t packed_orbit_offsetR =
867 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
868 + osnR * otR->irbase_sizes[iri];
869 // Orbit coeff vector's full size:
870 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
871 const size_t particle_fullsizeR = otR->bspecn;
872 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
873 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
874 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
876 size_t fullvec_offsetC = 0;
877 if (orbit_packedsizeR) // avoid crash on empty irrep
878 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
879 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
880 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
881 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
882 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
883 // This is where the particle's orbit starts in the "packed" vector:
884 const size_t packed_orbit_offsetC =
885 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
886 + osnC * otC->irbase_sizes[iri];
887 // Orbit coeff vector's full size:
888 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
889 const size_t particle_fullsizeC = otC->bspecn;
890 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
891 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
892 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
893 if (orbit_packedsizeC) { // avoid crash on empty irrep
894 // tmp = P U
895 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC]
896 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
897 orbit_packedsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeC /*K*/,
898 &one /*alpha*/, orig_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC/*A*/,
899 packedlen/*ldA*/,
900 omC + opiC*particle_fullsizeC /*B*/,
901 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
902 tmp /*C*/, particle_fullsizeC /*LDC*/);
904 // target[oiR|piR,oiC|piC] += U*[...] tmp[...]
905 cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans,
906 particle_fullsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeR /*K*/,
907 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/,
908 orbit_fullsizeR/*ldA*/,
909 tmp /*B*/, particle_fullsizeC /*ldB*/, &one /*beta*/,
910 target_full + full_len*fullvec_offsetR + fullvec_offsetC /*C*/,
911 full_len /*ldC*/);
913 fullvec_offsetC += otC->bspecn;
915 fullvec_offsetR += otR->bspecn;
918 free(tmp);
919 return target_full;
922 /// Projects a "big" vector onto an irrep.
923 /** TODO doc */
924 complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed,
925 const complex double *orig_full, const qpms_scatsys_t *ss,
926 const qpms_iri_t iri) {
927 const size_t packedlen = ss->saecv_sizes[iri];
928 if (!packedlen) return target_packed; // Empty irrep
929 if (target_packed == NULL)
930 QPMS_CRASHING_MALLOC(target_packed, packedlen*sizeof(complex double));
931 memset(target_packed, 0, packedlen*sizeof(complex double));
933 const complex double one = 1;
935 size_t fullvec_offset = 0;
936 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
937 const qpms_ss_oti_t oti = ss->p_orbitinfo[pi].t;
938 const qpms_ss_orbit_type_t *const ot = ss->orbit_types + oti;
939 const qpms_ss_osn_t osn = ss->p_orbitinfo[pi].osn;
940 const qpms_ss_orbit_pi_t opi = ss->p_orbitinfo[pi].p;
941 // This is where the particle's orbit starts in the "packed" vector:
942 const size_t packed_orbit_offset = ss->saecv_ot_offsets[iri*ss->orbit_type_count + oti]
943 + osn * ot->irbase_sizes[iri];
944 // Orbit coeff vector's full size:
945 const size_t orbit_fullsize = ot->size * ot->bspecn;
946 const size_t particle_fullsize = ot->bspecn;
947 const size_t orbit_packedsize = ot->irbase_sizes[iri];
948 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
949 const complex double *om = ot->irbases + ot->irbase_offsets[iri];
950 if (orbit_packedsize) // avoid crash on empty irrep
951 cblas_zgemv(CblasRowMajor/*order*/, CblasNoTrans/*transA*/,
952 orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/,
953 om + opi*particle_fullsize/*A*/, orbit_fullsize/*lda*/,
954 orig_full+fullvec_offset/*X*/, 1/*incX*/,
955 &one/*beta*/, target_packed+packed_orbit_offset/*Y*/, 1/*incY*/);
956 fullvec_offset += ot->bspecn;
958 return target_packed;
961 /// Transforms a big "packed" vector into the full basis.
962 /** TODO doc */
963 complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full,
964 const complex double *orig_packed, const qpms_scatsys_t *ss,
965 const qpms_iri_t iri, bool add) {
966 const size_t full_len = ss->fecv_size;
967 if (target_full == NULL)
968 QPMS_CRASHING_MALLOC(target_full, full_len*sizeof(complex double));
969 if (!add) memset(target_full, 0, full_len*sizeof(complex double));
971 const complex double one = 1;
972 const size_t packedlen = ss->saecv_sizes[iri];
973 if (!packedlen) return target_full; // Completely empty irrep
975 size_t fullvec_offset = 0;
976 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
977 const qpms_ss_oti_t oti = ss->p_orbitinfo[pi].t;
978 const qpms_ss_orbit_type_t *const ot = ss->orbit_types + oti;
979 const qpms_ss_osn_t osn = ss->p_orbitinfo[pi].osn;
980 const qpms_ss_orbit_pi_t opi = ss->p_orbitinfo[pi].p;
981 // This is where the particle's orbit starts in the "packed" vector:
982 const size_t packed_orbit_offset = ss->saecv_ot_offsets[iri*ss->orbit_type_count + oti]
983 + osn * ot->irbase_sizes[iri];
984 // Orbit coeff vector's full size:
985 const size_t orbit_fullsize = ot->size * ot->bspecn;
986 const size_t particle_fullsize = ot->bspecn;
987 const size_t orbit_packedsize = ot->irbase_sizes[iri];
988 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
989 const complex double *om = ot->irbases + ot->irbase_offsets[iri];
991 if (orbit_packedsize) // empty irrep, avoid zgemv crashing.
992 cblas_zgemv(CblasRowMajor/*order*/, CblasConjTrans/*transA*/,
993 orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/, om + opi*particle_fullsize/*A*/,
994 orbit_fullsize/*lda*/, orig_packed+packed_orbit_offset /*X*/, 1/*incX*/, &one/*beta*/,
995 target_full+fullvec_offset/*Y*/, 1/*incY*/);
997 fullvec_offset += ot->bspecn;
999 return target_full;
1002 complex double *qpms_scatsys_build_translation_matrix_full(
1003 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1004 complex double *target,
1005 const qpms_scatsys_t *ss,
1006 complex double k ///< Wave number to use in the translation matrix.
1009 return qpms_scatsys_build_translation_matrix_e_full(
1010 target, ss, k, QPMS_HANKEL_PLUS);
1013 complex double *qpms_scatsys_build_translation_matrix_e_full(
1014 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1015 complex double *target,
1016 const qpms_scatsys_t *ss,
1017 complex double k, ///< Wave number to use in the translation matrix.
1018 qpms_bessel_t J ///< Bessel function type.
1021 const size_t full_len = ss->fecv_size;
1022 if(!target)
1023 QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
1024 memset(target, 0, SQ(full_len) * sizeof(complex double)); //unnecessary?
1025 { // Non-diagonal part; M[piR, piC] = T[piR] S(piR<-piC)
1026 size_t fullvec_offsetR = 0;
1027 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) {
1028 const qpms_vswf_set_spec_t *bspecR = qpms_ss_bspec_pi(ss, piR);
1029 const cart3_t posR = ss->p[piR].pos;
1030 size_t fullvec_offsetC = 0;
1031 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
1032 const qpms_vswf_set_spec_t *bspecC = qpms_ss_bspec_pi(ss, piC);
1033 if(piC != piR) { // The diagonal will be dealt with later.
1034 const cart3_t posC = ss->p[piC].pos;
1035 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1036 target + fullvec_offsetR*full_len + fullvec_offsetC,
1037 bspecR, full_len, bspecC, 1,
1038 k, posR, posC, J));
1040 fullvec_offsetC += bspecC->n;
1042 assert(fullvec_offsetC = full_len);
1043 fullvec_offsetR += bspecR->n;
1045 assert(fullvec_offsetR == full_len);
1048 return target;
1053 complex double *qpms_scatsys_at_omega_build_modeproblem_matrix_full(
1054 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1055 complex double *target,
1056 const qpms_scatsys_at_omega_t *ssw
1059 const complex double k = ssw->wavenumber;
1060 const qpms_scatsys_t *ss = ssw->ss;
1061 const size_t full_len = ss->fecv_size;
1062 if(!target)
1063 QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
1064 complex double *tmp;
1065 QPMS_CRASHING_MALLOC(tmp, SQ(ss->max_bspecn) * sizeof(complex double));
1066 memset(target, 0, SQ(full_len) * sizeof(complex double)); //unnecessary?
1067 const complex double zero = 0, minusone = -1;
1068 { // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
1069 size_t fullvec_offsetR = 0;
1070 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) {
1071 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec;
1072 const cart3_t posR = ss->p[piR].pos;
1073 size_t fullvec_offsetC = 0;
1074 // dest particle T-matrix
1075 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1076 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
1077 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1078 if(piC != piR) { // The diagonal will be dealt with later.
1079 const cart3_t posC = ss->p[piC].pos;
1080 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1081 tmp, // tmp is S(piR<-piC)
1082 bspecR, bspecC->n, bspecC, 1,
1083 k, posR, posC, QPMS_HANKEL_PLUS));
1084 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1085 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1086 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1087 tmp/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1088 target + fullvec_offsetR*full_len + fullvec_offsetC /*c*/,
1089 full_len /*ldc*/);
1091 fullvec_offsetC += bspecC->n;
1093 fullvec_offsetR += bspecR->n;
1096 // diagonal part M[pi,pi] = +1
1097 for (size_t i = 0; i < full_len; ++i) target[full_len * i + i] = +1;
1099 free(tmp);
1100 return target;
1103 // Serial reference implementation.
1104 complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_serial(
1105 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1106 complex double *target_packed,
1107 const qpms_scatsys_at_omega_t *ssw,
1108 qpms_iri_t iri
1111 const qpms_scatsys_t *ss = ssw->ss;
1112 const complex double k = ssw->wavenumber;
1113 const size_t packedlen = ss->saecv_sizes[iri];
1114 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1115 return target_packed;
1116 if (target_packed == NULL)
1117 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
1118 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1120 // some of the following workspaces are probably redundant; TODO optimize later.
1122 // workspaces for the uncompressed particle<-particle tranlation matrix block
1123 // and the result of multiplying with a T-matrix (times -1)
1124 complex double *Sblock, *TSblock;
1125 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1126 QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
1128 // Workspace for the intermediate particle-orbit matrix result
1129 complex double *tmp;
1130 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1132 const complex double one = 1, zero = 0, minusone = -1;
1134 for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) { //Row loop
1135 const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t;
1136 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1137 const qpms_ss_osn_t osnR = ss->p_orbitinfo[piR].osn;
1138 const qpms_ss_orbit_pi_t opiR = ss->p_orbitinfo[piR].p;
1139 // This is where the particle's orbit starts in the "packed" vector:
1140 const size_t packed_orbit_offsetR =
1141 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1142 + osnR * otR->irbase_sizes[iri];
1143 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec;
1144 // Orbit coeff vector's full size:
1145 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1146 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1147 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1148 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1149 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1150 const cart3_t posR = ss->p[piR].pos;
1151 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1152 // dest particle T-matrix
1153 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1154 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1155 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1156 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1157 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1158 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1159 // This is where the particle's orbit starts in the "packed" vector:
1160 const size_t packed_orbit_offsetC =
1161 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1162 + osnC * otC->irbase_sizes[iri];
1163 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1164 // Orbit coeff vector's full size:
1165 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1166 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1167 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1168 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1169 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1171 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1172 if(piC != piR) { // non-diagonal, calculate TS
1173 const cart3_t posC = ss->p[piC].pos;
1174 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1175 Sblock, // Sblock is S(piR->piC)
1176 bspecR, bspecC->n, bspecC, 1,
1177 k, posR, posC, QPMS_HANKEL_PLUS));
1179 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1180 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1181 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1182 Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1183 TSblock /*c*/, bspecC->n /*ldc*/);
1184 } else { // diagonal, fill with diagonal +1
1185 for (size_t row = 0; row < bspecR->n; ++row)
1186 for (size_t col = 0; col < bspecC->n; ++col)
1187 TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
1190 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1191 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1192 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1193 &one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
1194 omC + opiC*particle_fullsizeC /*B*/,
1195 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1196 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1198 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1199 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1200 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1201 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1202 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1203 target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1204 packedlen /*ldC*/);
1209 free(tmp);
1210 free(Sblock);
1211 free(TSblock);
1212 return target_packed;
1215 complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR(
1216 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1217 complex double *target_packed,
1218 const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri
1221 const qpms_scatsys_t *ss = ssw->ss;
1222 const complex double k = ssw->wavenumber;
1223 const size_t packedlen = ss->saecv_sizes[iri];
1224 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1225 return target_packed;
1226 if (target_packed == NULL)
1227 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
1228 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1230 // some of the following workspaces are probably redundant; TODO optimize later.
1232 // workspaces for the uncompressed particle<-particle tranlation matrix block
1233 // and the result of multiplying with a T-matrix (times -1)
1234 complex double *Sblock, *TSblock;
1235 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1236 QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
1238 // Workspace for the intermediate particle-orbit matrix result
1239 complex double *tmp;
1240 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1242 const complex double one = 1, zero = 0, minusone = -1;
1244 for(qpms_ss_pi_t opistartR = 0; opistartR < ss->p_count;
1245 opistartR += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size //orbit_p_countR; might write a while() instead
1247 const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
1248 const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
1249 const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
1250 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1251 const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
1252 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1254 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1255 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1256 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[orbitstartpiR].tmatrix_id]->spec;
1257 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1258 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1259 // Orbit coeff vector's full size:
1260 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1261 // This is where the orbit starts in the "packed" vector:
1262 const size_t packed_orbit_offsetR =
1263 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1264 + osnR * otR->irbase_sizes[iri];
1265 for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
1266 qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
1267 assert(opiR == ss->p_orbitinfo[piR].p);
1268 assert(otiR == ss->p_orbitinfo[piR].t);
1269 assert(ss->p_orbitinfo[piR].osn == osnR);
1270 const cart3_t posR = ss->p[piR].pos;
1271 // dest particle T-matrix
1272 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1273 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1274 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1275 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1276 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1277 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1278 // This is where the particle's orbit starts in the "packed" vector:
1279 const size_t packed_orbit_offsetC =
1280 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1281 + osnC * otC->irbase_sizes[iri];
1282 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1283 // Orbit coeff vector's full size:
1284 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1285 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1286 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1287 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1288 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1290 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1291 if(piC != piR) { // non-diagonal, calculate TS
1292 const cart3_t posC = ss->p[piC].pos;
1293 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1294 Sblock, // Sblock is S(piR->piC)
1295 bspecR, bspecC->n, bspecC, 1,
1296 k, posR, posC, QPMS_HANKEL_PLUS));
1298 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1299 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1300 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1301 Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1302 TSblock /*c*/, bspecC->n /*ldc*/);
1303 } else { // diagonal, fill with diagonal +1
1304 for (size_t row = 0; row < bspecR->n; ++row)
1305 for (size_t col = 0; col < bspecC->n; ++col)
1306 TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
1309 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1310 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1311 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1312 &one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
1313 omC + opiC*particle_fullsizeC /*B*/,
1314 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1315 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1317 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1318 cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1319 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1320 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1321 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1322 target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1323 packedlen /*ldC*/);
1329 free(tmp);
1330 free(Sblock);
1331 free(TSblock);
1332 return target_packed;
1335 struct qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg{
1336 const qpms_scatsys_at_omega_t *ssw;
1337 qpms_ss_pi_t *opistartR_ptr;
1338 pthread_mutex_t *opistartR_mutex;
1339 qpms_iri_t iri;
1340 complex double *target_packed;
1343 static void *qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg)
1345 const struct qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1346 *a = arg;
1347 const qpms_scatsys_at_omega_t *ssw = a->ssw;
1348 const complex double k = ssw->wavenumber;
1349 const qpms_scatsys_t *ss = ssw->ss;
1350 const qpms_iri_t iri = a->iri;
1351 const size_t packedlen = ss->saecv_sizes[iri];
1353 // some of the following workspaces are probably redundant; TODO optimize later.
1355 // workspaces for the uncompressed particle<-particle tranlation matrix block
1356 // and the result of multiplying with a T-matrix (times -1)
1357 complex double *Sblock, *TSblock;
1358 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1359 QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
1361 // Workspace for the intermediate particle-orbit matrix result
1362 complex double *tmp;
1363 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1365 const complex double one = 1, zero = 0, minusone = -1;
1367 while(1) {
1368 // In the beginning, pick a target (row) orbit for this thread
1369 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a->opistartR_mutex));
1370 if(*(a->opistartR_ptr) >= ss->p_count) {// Everything is already done, end
1371 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1372 break;
1374 const qpms_ss_pi_t opistartR = *(a->opistartR_ptr);
1375 // Now increment it for another thread:
1376 *(a->opistartR_ptr) += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size;
1377 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1379 // Orbit picked (defined by opistartR), do the work.
1380 const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
1381 const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
1382 const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
1383 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1384 const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
1385 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1387 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1388 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1389 const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[orbitstartpiR].tmatrix_id]->spec;
1390 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1391 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1392 // Orbit coeff vector's full size:
1393 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1394 // This is where the orbit starts in the "packed" vector:
1395 const size_t packed_orbit_offsetR =
1396 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1397 + osnR * otR->irbase_sizes[iri];
1398 for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
1399 qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
1400 assert(opiR == ss->p_orbitinfo[piR].p);
1401 assert(otiR == ss->p_orbitinfo[piR].t);
1402 assert(ss->p_orbitinfo[piR].osn == osnR);
1403 const cart3_t posR = ss->p[piR].pos;
1404 // dest particle T-matrix
1405 const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
1406 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1407 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1408 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1409 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1410 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1411 // This is where the particle's orbit starts in the "packed" vector:
1412 const size_t packed_orbit_offsetC =
1413 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1414 + osnC * otC->irbase_sizes[iri];
1415 const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
1416 // Orbit coeff vector's full size:
1417 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1418 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1419 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1420 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1421 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1423 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1424 if(piC != piR) { // non-diagonal, calculate TS
1425 const cart3_t posC = ss->p[piC].pos;
1426 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1427 Sblock, // Sblock is S(piR->piC)
1428 bspecR, bspecC->n, bspecC, 1,
1429 k, posR, posC, QPMS_HANKEL_PLUS));
1431 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1432 bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
1433 &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
1434 Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
1435 TSblock /*c*/, bspecC->n /*ldc*/);
1436 } else { // diagonal, fill with diagonal +1
1437 for (size_t row = 0; row < bspecR->n; ++row)
1438 for (size_t col = 0; col < bspecC->n; ++col)
1439 TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
1442 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1443 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1444 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1445 &one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
1446 omC + opiC*particle_fullsizeC /*B*/,
1447 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1448 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1450 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1451 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1452 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1453 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1454 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1455 a->target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1456 packedlen /*ldC*/);
1465 free(tmp);
1466 free(Sblock);
1467 free(TSblock);
1468 return NULL;
1471 // this differs from the ...build_modeproblem_matrix... only by the `J`
1472 // maybe I should use this one there as well to save lines... TODO
1473 struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg{
1474 const qpms_scatsys_t *ss;
1475 qpms_ss_pi_t *opistartR_ptr;
1476 pthread_mutex_t *opistartR_mutex;
1477 qpms_iri_t iri;
1478 complex double *target_packed;
1479 complex double k;
1480 qpms_bessel_t J;
1483 static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread(void *arg)
1485 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1486 *a = arg;
1487 const qpms_scatsys_t *ss = a->ss;
1488 const qpms_iri_t iri = a->iri;
1489 const size_t packedlen = ss->saecv_sizes[iri];
1490 const qpms_bessel_t J = a->J;
1492 // some of the following workspaces are probably redundant; TODO optimize later.
1494 // workspace for the uncompressed particle<-particle tranlation matrix block
1495 complex double *Sblock;
1496 QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
1498 // Workspace for the intermediate particle-orbit matrix result
1499 complex double *tmp;
1500 QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
1502 const complex double one = 1, zero = 0;
1504 while(1) {
1505 // In the beginning, pick a target (row) orbit for this thread
1506 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a->opistartR_mutex));
1507 if(*(a->opistartR_ptr) >= ss->p_count) {// Everything is already done, end
1508 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1509 break;
1511 const qpms_ss_pi_t opistartR = *(a->opistartR_ptr);
1512 // Now increment it for another thread:
1513 *(a->opistartR_ptr) += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size;
1514 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
1516 // Orbit picked (defined by opistartR), do the work.
1517 const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
1518 const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
1519 const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
1520 const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
1521 const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
1522 const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
1524 if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
1525 const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
1526 const qpms_vswf_set_spec_t *bspecR = qpms_ss_bspec_pi(ss, orbitstartpiR);
1527 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1528 const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
1529 // Orbit coeff vector's full size:
1530 const size_t orbit_fullsizeR = otR->size * otR->bspecn;
1531 // This is where the orbit starts in the "packed" vector:
1532 const size_t packed_orbit_offsetR =
1533 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
1534 + osnR * otR->irbase_sizes[iri];
1535 for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
1536 qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
1537 assert(opiR == ss->p_orbitinfo[piR].p);
1538 assert(otiR == ss->p_orbitinfo[piR].t);
1539 assert(ss->p_orbitinfo[piR].osn == osnR);
1540 const cart3_t posR = ss->p[piR].pos;
1541 for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
1542 const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
1543 const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
1544 const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
1545 const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
1546 // This is where the particle's orbit starts in the "packed" vector:
1547 const size_t packed_orbit_offsetC =
1548 ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
1549 + osnC * otC->irbase_sizes[iri];
1550 const qpms_vswf_set_spec_t *bspecC = qpms_ss_bspec_pi(ss, piC);
1551 // Orbit coeff vector's full size:
1552 const size_t orbit_fullsizeC = otC->size * otC->bspecn;
1553 const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
1554 const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
1555 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1556 const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
1558 if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
1559 // THIS IS THE MAIN PART DIFFERENT FROM ...modeproblem...() TODO unify
1560 // somehow to save lines
1561 if(piC != piR) { // non-diagonal, calculate S
1562 const cart3_t posC = ss->p[piC].pos;
1563 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
1564 Sblock, // Sblock is S(piR->piC)
1565 bspecR, bspecC->n, bspecC, 1,
1566 a->k, posR, posC, J));
1567 } else { // diagonal, fill with zeros; TODO does this make sense?
1568 // would unit matrix be better? or unit only for QPMS_BESSEL_REGULAR?
1569 for (size_t row = 0; row < bspecR->n; ++row)
1570 for (size_t col = 0; col < bspecC->n; ++col)
1571 Sblock[row * bspecC->n + col] = 0; //(row == col)? 1 : 0;
1574 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1575 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasConjTrans,
1576 particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
1577 &one /*alpha*/, Sblock/*A*/, particle_fullsizeC/*ldA*/,
1578 omC + opiC*particle_fullsizeC /*B*/,
1579 orbit_fullsizeC/*ldB*/, &zero /*beta*/,
1580 tmp /*C*/, orbit_packedsizeC /*LDC*/);
1582 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1583 SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
1584 orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
1585 &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
1586 tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
1587 a->target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
1588 packedlen /*ldC*/);
1597 free(tmp);
1598 free(Sblock);
1599 return NULL;
1602 // Almost the same as ...build_modeproblem_matrix_...parallelR
1603 // --> TODO write this in a more generic way to save LoC.
1604 complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
1605 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1606 complex double *target_packed,
1607 const qpms_scatsys_t *ss,
1608 qpms_iri_t iri,
1609 const complex double k,
1610 qpms_bessel_t J
1613 QPMS_UNTESTED;
1614 const size_t packedlen = ss->saecv_sizes[iri];
1615 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1616 return target_packed;
1617 if (target_packed == NULL)
1618 QPMS_CRASHING_MALLOC(target_packed, SQ(packedlen)*sizeof(complex double));
1619 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1621 qpms_ss_pi_t opistartR = 0;
1622 pthread_mutex_t opistartR_mutex;
1623 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex, NULL));
1624 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1625 arg = {ss, &opistartR, &opistartR_mutex, iri, target_packed, J};
1627 // FIXME THIS IS NOT PORTABLE:
1628 long nthreads;
1629 if (qpms_scatsystem_nthreads_override > 0) {
1630 nthreads = qpms_scatsystem_nthreads_override;
1631 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "Using overriding value of %ld thread(s).",
1632 nthreads);
1633 } else {
1634 nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1635 if (nthreads < 1) {
1636 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1637 nthreads, qpms_scatsystem_nthreads_default);
1638 nthreads = qpms_scatsystem_nthreads_default;
1639 } else {
1640 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads);
1643 pthread_t thread_ids[nthreads];
1644 for(long thi = 0; thi < nthreads; ++thi)
1645 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids + thi, NULL,
1646 qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread,
1647 (void *) &arg));
1648 for(long thi = 0; thi < nthreads; ++thi) {
1649 void *retval;
1650 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids[thi], &retval));
1653 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex));
1654 return target_packed;
1658 // Parallel implementation, now default
1659 complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed(
1660 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1661 complex double *target_packed,
1662 const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri
1665 const size_t packedlen = ssw->ss->saecv_sizes[iri];
1666 if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1667 return target_packed;
1668 if (target_packed == NULL)
1669 QPMS_CRASHING_MALLOC(target_packed,SQ(packedlen)*sizeof(complex double));
1670 memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
1672 qpms_ss_pi_t opistartR = 0;
1673 pthread_mutex_t opistartR_mutex;
1674 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex, NULL));
1675 const struct qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1676 arg = {ssw, &opistartR, &opistartR_mutex, iri, target_packed};
1678 // FIXME THIS IS NOT PORTABLE:
1679 long nthreads;
1680 if (qpms_scatsystem_nthreads_override > 0) {
1681 nthreads = qpms_scatsystem_nthreads_override;
1682 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "Using overriding value of %ld thread(s).",
1683 nthreads);
1684 } else {
1685 nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1686 if (nthreads < 1) {
1687 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1688 nthreads, qpms_scatsystem_nthreads_default);
1689 nthreads = qpms_scatsystem_nthreads_default;
1690 } else {
1691 QPMS_DEBUG(QPMS_DBGMSG_THREADS, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads);
1694 pthread_t thread_ids[nthreads];
1695 for(long thi = 0; thi < nthreads; ++thi)
1696 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids + thi, NULL,
1697 qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread,
1698 (void *) &arg));
1699 for(long thi = 0; thi < nthreads; ++thi) {
1700 void *retval;
1701 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids[thi], &retval));
1704 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex));
1705 return target_packed;
1709 complex double *qpms_scatsys_incident_field_vector_full(
1710 complex double *target_full, const qpms_scatsys_t *ss,
1711 qpms_incfield_t f, const void *args, bool add ) {
1712 QPMS_UNTESTED;
1713 if (!target_full) QPMS_CRASHING_CALLOC(target_full, ss->fecv_size,
1714 sizeof(complex double));
1715 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
1716 complex double *ptarget = target_full + ss->fecv_pstarts[pi];
1717 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_pi(ss, pi);
1718 const cart3_t pos = ss->p[pi].pos;
1719 QPMS_ENSURE_SUCCESS(f(ptarget, bspec, pos, args, add));
1721 return target_full;
1725 #if 0
1726 complex double *qpms_scatsys_incident_field_vector_irrep_packed(
1727 complex double *target_full, const qpms_scatsys_t *ss,
1728 const qpms_iri_t iri, qpms_incfield_t f,
1729 const void *args, bool add) {
1730 TODO;
1732 #endif
1735 complex double *qpms_scatsys_apply_Tmatrices_full(
1736 complex double *target_full, const complex double *inc_full,
1737 const qpms_scatsys_at_omega_t *ssw) {
1738 QPMS_UNTESTED;
1739 const qpms_scatsys_t *ss = ssw->ss;
1740 if (!target_full) QPMS_CRASHING_CALLOC(target_full, ss->fecv_size,
1741 sizeof(complex double));
1742 for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
1743 complex double *ptarget = target_full + ss->fecv_pstarts[pi];
1744 const complex double *psrc = inc_full + ss->fecv_pstarts[pi];
1745 // TODO check whether T-matrix is non-virtual after virtual t-matrices are implemented.
1746 const qpms_tmatrix_t *T = ssw->tm[ss->p[pi].tmatrix_id];
1747 qpms_apply_tmatrix(ptarget, psrc, T);
1749 return target_full;
1754 ccart3_t qpms_scatsys_eval_E(const qpms_scatsys_t *ss,
1755 const complex double *cvf, const cart3_t where,
1756 const complex double k) {
1757 QPMS_UNTESTED;
1758 ccart3_t res = {0,0,0};
1759 ccart3_t res_kc = {0,0,0}; // kahan sum compensation
1761 for (qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
1762 const qpms_vswf_set_spec_t *bspec = qpms_ss_bspec_pi(ss, pi);
1763 const cart3_t particle_pos = ss->p[pi].pos;
1764 const complex double *particle_cv = cvf + ss->fecv_pstarts[pi];
1766 const csph_t kr = sph_cscale(k, cart2sph(
1767 cart3_substract(where, particle_pos)));
1768 const csphvec_t E_sph = qpms_eval_uvswf(bspec, particle_cv, kr,
1769 QPMS_HANKEL_PLUS);
1770 const ccart3_t E_cart = csphvec2ccart_csph(E_sph, kr);
1771 ckahanadd(&(res.x), &(res_kc.x), E_cart.x);
1772 ckahanadd(&(res.y), &(res_kc.y), E_cart.y);
1773 ckahanadd(&(res.z), &(res_kc.z), E_cart.z);
1775 return res;
1778 #if 0
1779 ccart3_t qpms_scatsys_eval_E_irrep(const qpms_scatsys_t *ss,
1780 qpms_iri_t iri, const complex double *cvr, cart3_t where) {
1781 TODO;
1783 #endif
1785 void qpms_ss_LU_free(qpms_ss_LU lu) {
1786 free(lu.a);
1787 free(lu.ipiv);
1790 qpms_ss_LU qpms_scatsysw_modeproblem_matrix_full_factorise(complex double *mpmatrix_full,
1791 int *target_piv, const qpms_scatsys_at_omega_t *ssw) {
1792 const qpms_scatsys_t *ss = ssw->ss;
1793 QPMS_ENSURE(mpmatrix_full, "A non-NULL pointer to the pre-calculated mode matrix is required");
1794 if (!target_piv) QPMS_CRASHING_MALLOC(target_piv, ss->fecv_size * sizeof(int));
1795 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR, ss->fecv_size, ss->fecv_size,
1796 mpmatrix_full, ss->fecv_size, target_piv));
1797 qpms_ss_LU lu;
1798 lu.a = mpmatrix_full;
1799 lu.ipiv = target_piv;
1800 lu.ssw = ssw;
1801 lu.full = true;
1802 lu.iri = -1;
1803 return lu;
1806 qpms_ss_LU qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(complex double *mpmatrix_packed,
1807 int *target_piv, const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri) {
1808 QPMS_ENSURE(mpmatrix_packed, "A non-NULL pointer to the pre-calculated mode matrix is required");
1809 size_t n = ssw->ss->saecv_sizes[iri];
1810 if (!target_piv) QPMS_CRASHING_MALLOC(target_piv, n * sizeof(int));
1811 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR, n, n,
1812 mpmatrix_packed, n, target_piv));
1813 qpms_ss_LU lu;
1814 lu.a = mpmatrix_packed;
1815 lu.ipiv = target_piv;
1816 lu.ssw = ssw;
1817 lu.full = false;
1818 lu.iri = iri;
1819 return lu;
1822 qpms_ss_LU qpms_scatsysw_build_modeproblem_matrix_full_LU(
1823 complex double *target, int *target_piv,
1824 const qpms_scatsys_at_omega_t *ssw){
1825 target = qpms_scatsysw_build_modeproblem_matrix_full(target, ssw);
1826 return qpms_scatsysw_modeproblem_matrix_full_factorise(target, target_piv, ssw);
1829 qpms_ss_LU qpms_scatsys_build_modeproblem_matrix_irrep_packed_LU(
1830 complex double *target, int *target_piv,
1831 const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri){
1832 target = qpms_scatsys_build_modeproblem_matrix_irrep_packed(target, ssw, iri);
1833 return qpms_scatsys_modeproblem_matrix_irrep_packed_factorise(target, target_piv, ssw, iri);
1836 complex double *qpms_scatsys_scatter_solve(
1837 complex double *f, const complex double *a_inc, qpms_ss_LU lu) {
1838 const size_t n = lu.full ? lu.ssw->ss->fecv_size : lu.ssw->ss->saecv_sizes[lu.iri];
1839 if (!f) QPMS_CRASHING_MALLOC(f, n * sizeof(complex double));
1840 memcpy(f, a_inc, n*sizeof(complex double)); // It will be rewritten by zgetrs
1841 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR, 'N' /*trans*/, n /*n*/, 1 /*nrhs number of right hand sides*/,
1842 lu.a /*a*/, n /*lda*/, lu.ipiv /*ipiv*/, f/*b*/, 1 /*ldb; CHECKME*/));
1843 return f;