3 #define lapack_complex_double complex double
4 #define lapack_complex_double_real(z) (creal(z))
5 #define lapack_complex_double_imag(z) (cimag(z))
9 #include "scatsystem.h"
13 #include "symmetries.h"
17 #include "quaternions.h"
19 #include "qpms_error.h"
20 #include "translations.h"
21 #include "tmatrices.h"
24 #include "tolerances.h"
26 #ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
28 #define SERIAL_ZGEMM qpms_zgemm
30 #define SERIAL_ZGEMM cblas_zgemm
33 #define SQ(x) ((x)*(x))
34 #define QPMS_SCATSYS_LEN_RTOL 1e-13
35 #define QPMS_SCATSYS_TMATRIX_ATOL 1e-12
36 #define QPMS_SCATSYS_TMATRIX_RTOL 1e-12
38 long qpms_scatsystem_nthreads_default
= 4;
39 long qpms_scatsystem_nthreads_override
= 0;
41 void qpms_scatsystem_set_nthreads(long n
) {
42 qpms_scatsystem_nthreads_override
= n
;
45 // ------------ Stupid implementation of qpms_scatsys_apply_symmetry() -------------
47 #define MIN(x,y) (((x)<(y))?(x):(y))
48 #define MAX(x,y) (((x)>(y))?(x):(y))
50 // The following functions are just to make qpms_scatsys_apply_symmetry more readable.
51 // They are not to be used elsewhere, as they do not perform any array capacity checks etc.
53 /// Compare two orbit types in a scattering system.
54 static bool orbit_types_equal(const qpms_ss_orbit_type_t
*a
, const qpms_ss_orbit_type_t
*b
) {
55 if (a
->size
!= b
->size
) return false;
56 if (memcmp(a
->action
, b
->action
, a
->size
*sizeof(qpms_ss_orbit_pi_t
))) return false;
57 if (memcmp(a
->tmatrices
, b
->tmatrices
, a
->size
*sizeof(qpms_ss_tmi_t
))) return false;
61 // Extend the action to all particles in orbit if only the action on the 0th
62 // particle has been filled.
63 static void extend_orbit_action(qpms_scatsys_t
*ss
, qpms_ss_orbit_type_t
*ot
) {
64 for(qpms_ss_orbit_pi_t src
= 1; src
< ot
->size
; ++src
) {
65 // find any element g that sends 0 to src:
67 for (g
= 0; g
< ss
->sym
->order
; ++g
)
68 if (ot
->action
[g
] == src
) break;
69 assert(g
< ss
->sym
->order
);
70 // invg sends src to 0
71 qpms_gmi_t invg
= qpms_finite_group_inv(ss
->sym
, g
);
72 for (qpms_gmi_t f
= 0; f
< ss
->sym
->order
; ++f
)
73 // if f sends 0 to dest, then f * invg sends src to dest
74 ot
->action
[src
* ss
->sym
->order
+
75 qpms_finite_group_mul(ss
->sym
,f
,invg
)] = ot
->action
[f
];
79 //Add orbit type to a scattering system, updating the ss->otspace_end pointer accordingly
80 static void add_orbit_type(qpms_scatsys_t
*ss
, const qpms_ss_orbit_type_t
*ot_current
) {
81 qpms_ss_orbit_type_t
* const ot_new
= & (ss
->orbit_types
[ss
->orbit_type_count
]);
82 ot_new
->size
= ot_current
->size
;
84 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_tmi(ss
, ot_current
->tmatrices
[0]);
85 const size_t bspecn
= bspec
->n
;
86 ot_new
->bspecn
= bspecn
;
88 const size_t actionsiz
= sizeof(ot_current
->action
[0]) * ot_current
->size
90 ot_new
->action
= (void *) (ss
->otspace_end
);
91 memcpy(ot_new
->action
, ot_current
->action
, actionsiz
);
92 // N.B. we copied mostly garbage ^^^, most of it is initialized just now:
93 extend_orbit_action(ss
, ot_new
);
94 #ifdef DUMP_ORBIT_ACTION
95 fprintf(stderr
, "Orbit action:\n");
96 for (qpms_gmi_t gmi
= 0; gmi
< ss
->sym
->order
; ++gmi
) {
97 const qpms_quat4d_t q
= qpms_quat_4d_from_2c(ss
->sym
->rep3d
[gmi
].rot
);
98 fprintf(stderr
, "%+d[%g %g %g %g] ", (int)ss
->sym
->rep3d
[gmi
].det
,
99 q
.c1
, q
.ci
, q
.cj
, q
.ck
);
100 fprintf(stderr
, "%s\t", (ss
->sym
->permrep
&& ss
->sym
->permrep
[gmi
])?
101 ss
->sym
->permrep
[gmi
] : "");
102 for (qpms_ss_orbit_pi_t pi
= 0; pi
< ot_new
->size
; ++pi
)
103 fprintf(stderr
, "%d\t", (int) ot_new
->action
[gmi
+ pi
*ss
->sym
->order
]);
104 fprintf(stderr
, "\n");
107 ss
->otspace_end
+= actionsiz
;
109 const size_t tmsiz
= sizeof(ot_current
->tmatrices
[0]) * ot_current
->size
;
110 ot_new
->tmatrices
= (void *) (ss
->otspace_end
);
111 memcpy(ot_new
->tmatrices
, ot_current
->tmatrices
, tmsiz
);
112 ss
->otspace_end
+= tmsiz
;
114 const size_t irbase_sizes_siz
= sizeof(ot_new
->irbase_sizes
[0]) * ss
->sym
->nirreps
;
115 ot_new
->irbase_sizes
= (void *) (ss
->otspace_end
);
116 ss
->otspace_end
+= irbase_sizes_siz
;
117 ot_new
->irbase_cumsizes
= (void *) (ss
->otspace_end
);
118 ss
->otspace_end
+= irbase_sizes_siz
;
119 ot_new
->irbase_offsets
= (void *) (ss
->otspace_end
);
120 ss
->otspace_end
+= irbase_sizes_siz
;
122 const size_t irbases_siz
= sizeof(ot_new
->irbases
[0]) * SQ(ot_new
->size
* bspecn
);
123 ot_new
->irbases
= (void *) (ss
->otspace_end
);
124 ss
->otspace_end
+= irbases_siz
;
126 size_t lastbs
, bs_cumsum
= 0;
127 for(qpms_iri_t iri
= 0; iri
< ss
->sym
->nirreps
; ++iri
) {
128 ot_new
->irbase_offsets
[iri
] = bs_cumsum
* bspecn
* ot_new
->size
;
129 qpms_orbit_irrep_basis(&lastbs
,
130 ot_new
->irbases
+ bs_cumsum
*ot_new
->size
*bspecn
,
131 ot_new
, bspec
, ss
->sym
, iri
);
132 ot_new
->irbase_sizes
[iri
] = lastbs
;
134 ot_new
->irbase_cumsizes
[iri
] = bs_cumsum
;
136 if(bs_cumsum
!= ot_new
->size
* bspecn
)
137 qpms_pr_error_at_flf(__FILE__
, __LINE__
, __func__
,
138 "The cumulative size of the symmetry-adapted bases is wrong; "
139 "expected %d = %d * %d, got %d.",
140 ot_new
->size
* bspecn
, ot_new
->size
, bspecn
, bs_cumsum
);
141 ot_new
->instance_count
= 0;
142 ss
->orbit_type_count
++;
146 // Almost 200 lines. This whole thing deserves a rewrite!
147 qpms_scatsys_at_omega_t
*qpms_scatsys_apply_symmetry(const qpms_scatsys_t
*orig
,
148 const qpms_finite_group_t
*sym
, complex double omega
,
149 const qpms_tolerance_spec_t
*tol
) {
150 // TODO check data sanity
152 qpms_l_t lMax
= 0; // the overall lMax of all base specs.
153 qpms_normalisation_t normalisation
= QPMS_NORMALISATION_UNDEF
;
155 // First, determine the rough radius of the array; it should be nonzero
156 // in order to particle position equivalence work correctly
159 double minx
= +INFINITY
, miny
= +INFINITY
, minz
= +INFINITY
;
160 double maxx
= -INFINITY
, maxy
= -INFINITY
, maxz
= -INFINITY
;
161 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
) {
162 minx
= MIN(minx
,orig
->p
[i
].pos
.x
);
163 miny
= MIN(miny
,orig
->p
[i
].pos
.y
);
164 minz
= MIN(minz
,orig
->p
[i
].pos
.z
);
165 maxx
= MAX(maxx
,orig
->p
[i
].pos
.x
);
166 maxy
= MAX(maxy
,orig
->p
[i
].pos
.y
);
167 maxz
= MAX(maxz
,orig
->p
[i
].pos
.z
);
169 lenscale
= (fabs(maxx
)+fabs(maxy
)+fabs(maxz
)+(maxx
-minx
)+(maxy
-miny
)+(maxz
-minz
)) / 3;
172 // Second, check that there are no duplicit positions in the input system.
173 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
)
174 for (qpms_ss_pi_t j
= 0; j
< i
; ++j
)
175 assert(!cart3_isclose(orig
->p
[i
].pos
, orig
->p
[j
].pos
, 0, QPMS_SCATSYS_LEN_RTOL
* lenscale
));
178 // Allocate T-matrix, particle and particle orbit info arrays
180 QPMS_CRASHING_MALLOC(ss
, sizeof(qpms_scatsys_t
));
181 ss
->lenscale
= lenscale
;
184 // Copy the qpms_tmatrix_fuction_t from orig
185 ss
->tmg_count
= orig
->tmg_count
;
186 QPMS_CRASHING_MALLOC(ss
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
187 memcpy(ss
->tmg
, orig
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
189 ss
->tm_capacity
= sym
->order
* orig
->tm_count
;
190 QPMS_CRASHING_MALLOC(ss
->tm
, ss
->tm_capacity
* sizeof(*(ss
->tm
)));
192 ss
->p_capacity
= sym
->order
* orig
->p_count
;
193 QPMS_CRASHING_MALLOC(ss
->p
, ss
->p_capacity
* sizeof(qpms_particle_tid_t
));
194 QPMS_CRASHING_MALLOC(ss
->p_orbitinfo
, ss
->p_capacity
* sizeof(qpms_ss_particle_orbitinfo_t
));
195 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_capacity
; ++pi
) {
196 ss
->p_orbitinfo
[pi
].t
= QPMS_SS_P_ORBITINFO_UNDEF
;
197 ss
->p_orbitinfo
[pi
].p
= QPMS_SS_P_ORBITINFO_UNDEF
;
200 // Evaluate the original T-matrices at omega
201 qpms_tmatrix_t
**tm_orig_omega
;
202 QPMS_CRASHING_MALLOC(tm_orig_omega
, orig
->tmg_count
* sizeof(*tm_orig_omega
));
203 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
204 tm_orig_omega
[i
] = qpms_tmatrix_init_from_function(orig
->tmg
[i
], omega
);
206 // Evaluate the medium and derived T-matrices at omega.
207 qpms_scatsys_at_omega_t
*ssw
;
208 QPMS_CRASHING_MALLOC(ssw
, sizeof(*ssw
)); // returned
211 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
212 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
213 // we will be using ss->tm_capacity also for ssw->tm
214 QPMS_CRASHING_MALLOC(ssw
->tm
, ss
->tm_capacity
* sizeof(*(ssw
->tm
))); // returned
216 // Evaluate T-matrices at omega; checking for duplicities
218 ss
->max_bspecn
= 0; // We'll need it later.for memory alloc estimates.
220 qpms_ss_tmi_t tm_dupl_remap
[ss
->tm_capacity
]; // Auxilliary array to label remapping the indices after ignoring t-matrix duplicities; VLA!
222 for (qpms_ss_tmi_t i
= 0; i
< orig
->tm_count
; ++i
) {
223 qpms_tmatrix_t
*ti
= qpms_tmatrix_apply_operation(&(orig
->tm
[i
].op
), tm_orig_omega
[orig
->tm
[i
].tmgi
]);
225 for (j
= 0; j
< ss
->tm_count
; ++j
)
226 if (qpms_tmatrix_isclose(ssw
->tm
[i
], ssw
->tm
[j
], tol
->rtol
, tol
->atol
)) {
229 if (j
== ss
->tm_count
) { // duplicity not found, save both the "abstract" and "at omega" T-matrices
230 qpms_tmatrix_operation_copy(&ss
->tm
[j
].op
, &orig
->tm
[j
].op
);
231 ss
->tm
[j
].tmgi
= orig
->tm
[j
].tmgi
; // T-matrix functions are preserved.
233 ss
->max_bspecn
= MAX(ssw
->tm
[j
]->spec
->n
, ss
->max_bspecn
);
234 lMax
= MAX(lMax
, ssw
->tm
[j
]->spec
->lMax
);
237 else qpms_tmatrix_free(ti
);
238 tm_dupl_remap
[i
] = j
;
239 if (normalisation
== QPMS_NORMALISATION_UNDEF
)
240 normalisation
= ssw
->tm
[i
]->spec
->norm
;
241 // We expect all bspec norms to be the same.
242 else QPMS_ENSURE(normalisation
== ssw
->tm
[j
]->spec
->norm
,
243 "Normalisation convention must be the same for all T-matrices."
244 " %d != %d\n", normalisation
, ssw
->tm
[j
]->spec
->norm
);
247 // Free the original T-matrices at omega
248 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
249 qpms_tmatrix_free(tm_orig_omega
[i
]);
252 // Copy particles, remapping the t-matrix indices
253 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++(i
)) {
254 ss
->p
[i
] = orig
->p
[i
];
255 ss
->p
[i
].tmatrix_id
= tm_dupl_remap
[ss
->p
[i
].tmatrix_id
];
257 ss
->p_count
= orig
->p_count
;
259 // allocate t-matrix symmetry map
260 QPMS_CRASHING_MALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* sym
->order
* ss
->tm_count
);
262 // Extend the T-matrices list by the symmetry operations
263 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
264 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
){
265 const size_t d
= ssw
->tm
[tmi
]->spec
->n
;
267 QPMS_CRASHING_MALLOC(m
, d
*d
*sizeof(complex double)); // ownership passed to ss->tm[ss->tm_count].op
268 qpms_irot3_uvswfi_dense(m
, ssw
->tm
[tmi
]->spec
, sym
->rep3d
[gmi
]);
269 qpms_tmatrix_t
*transformed
= qpms_tmatrix_apply_symop(ssw
->tm
[tmi
], m
);
271 for (tmj
= 0; tmj
< ss
->tm_count
; ++tmj
)
272 if (qpms_tmatrix_isclose(transformed
, ssw
->tm
[tmj
], tol
->rtol
, tol
->atol
))
274 if (tmj
< ss
->tm_count
) { // HIT, transformed T-matrix already exists
275 //TODO some "rounding error cleanup" symmetrisation could be performed here?
276 qpms_tmatrix_free(transformed
);
277 } else { // MISS, save the matrix (also the "abstract" one)
278 ssw
->tm
[ss
->tm_count
] = transformed
;
279 qpms_tmatrix_operation_compose_chain_init(&(ss
->tm
[ss
->tm_count
].op
), 2, 1);
280 struct qpms_tmatrix_operation_compose_chain
* const o
= &(ss
->tm
[ss
->tm_count
].op
.op
.compose_chain
);
281 o
->ops
[0] = & ss
->tm
[tmj
].op
; // Let's just borrow this
282 o
->ops_owned
[0] = false;
283 o
->opmem
[0].typ
= QPMS_TMATRIX_OPERATION_LRMATRIX
;
284 o
->opmem
[0].op
.lrmatrix
.m
= m
;
285 o
->opmem
[0].op
.lrmatrix
.m_size
= d
* d
;
286 o
->ops
[1] = o
->opmem
;
287 o
->ops_owned
[1] = true;
290 ss
->tm_sym_map
[gmi
+ tmi
* sym
->order
] = tmj
; // In any case, tmj now indexes the correct transformed matrix
292 // Possibly free some space using the new ss->tm_count instead of (old) ss->tm_count*sym->order
293 QPMS_CRASHING_REALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* ss
->tm_count
);
294 // tm could be realloc'd as well, but those are just pointers, not likely many.
297 // allocate particle symmetry map
298 QPMS_CRASHING_MALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* sym
->order
* ss
->p_count
);
299 // allocate orbit type array (TODO realloc later if too long)
300 ss
->orbit_type_count
= 0;
301 QPMS_CRASHING_CALLOC(ss
->orbit_types
, ss
->p_count
, sizeof(qpms_ss_orbit_type_t
));
303 QPMS_CRASHING_MALLOC(ss
->otspace
, // reallocate later
304 (sizeof(qpms_ss_orbit_pi_t
) * sym
->order
* sym
->order
305 + sizeof(qpms_ss_tmi_t
) * sym
->order
306 + 3*sizeof(size_t) * sym
->nirreps
307 + sizeof(complex double) * SQ(sym
->order
* ss
->max_bspecn
)) * ss
->p_count
309 ss
->otspace_end
= ss
->otspace
;
311 // Workspace for the orbit type determination
312 qpms_ss_orbit_type_t ot_current
;
313 qpms_ss_orbit_pi_t ot_current_action
[sym
->order
* sym
->order
];
314 qpms_ss_tmi_t ot_current_tmatrices
[sym
->order
];
316 qpms_ss_pi_t current_orbit
[sym
->order
];
317 ot_current
.action
= ot_current_action
;
318 ot_current
.tmatrices
= ot_current_tmatrices
;
321 // Extend the particle list by the symmetry operations, check that particles mapped by symmetry ops on themselves
322 // have the correct symmetry
323 // TODO this could be sped up to O(npart * log(npart)); let's see later whether needed.
324 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
325 const bool new_orbit
= (ss
->p_orbitinfo
[pi
].t
== QPMS_SS_P_ORBITINFO_UNDEF
); // TODO build the orbit!!!
327 current_orbit
[0] = pi
;
329 ot_current
.tmatrices
[0] = ss
->p
[pi
].tmatrix_id
;
330 #ifdef DUMP_PARTICLE_POSITIONS
331 cart3_t pos
= ss
->p
[pi
].pos
;
332 fprintf(stderr
, "An orbit [%.4g, %.4g, %.4g] => ", pos
.x
, pos
.y
, pos
.z
);
336 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
) {
337 cart3_t newpoint
= qpms_irot3_apply_cart3(sym
->rep3d
[gmi
], ss
->p
[pi
].pos
);
338 qpms_ss_tmi_t new_tmi
= ss
->tm_sym_map
[gmi
+ ss
->p
[pi
].tmatrix_id
* sym
->order
]; // transformed t-matrix index
340 for (pj
= 0; pj
< ss
->p_count
; ++pj
)
341 if (cart3_isclose(newpoint
, ss
->p
[pj
].pos
, 0, ss
->lenscale
* QPMS_SCATSYS_LEN_RTOL
)) {
342 if (new_tmi
!= ss
->p
[pj
].tmatrix_id
)
343 qpms_pr_error("The %d. particle with coords (%lg, %lg, %lg) "
344 "is mapped to %d. another (or itself) with cords (%lg, %lg, %lg) "
345 "without having the required symmetry", (int)pi
,
346 ss
->p
[pi
].pos
.x
, ss
->p
[pi
].pos
.y
, ss
->p
[pi
].pos
.z
,
347 (int)pj
, ss
->p
[pj
].pos
.x
, ss
->p
[pj
].pos
.y
, ss
->p
[pj
].pos
.z
);
350 if (pj
< ss
->p_count
) { // HIT, the particle is transformed to an "existing" one.
352 } else { // MISS, the symmetry transforms the particle to a new location, so add it.
353 qpms_particle_tid_t newparticle
= {newpoint
, new_tmi
};
354 ss
->p
[ss
->p_count
] = newparticle
;
356 #ifdef DUMP_PARTICLE_POSITIONS
358 fprintf(stderr
, "[%.4g, %.4g, %.4g] ", newpoint
.x
, newpoint
.y
, newpoint
.z
);
361 ss
->p_sym_map
[gmi
+ pi
* sym
->order
] = pj
;
364 // Now check whether the particle (result of the symmetry op) is already in the current orbit
365 qpms_ss_orbit_pi_t opj
;
366 for (opj
= 0; opj
< ot_current
.size
; ++opj
)
367 if (current_orbit
[opj
] == pj
) break; // HIT, pj already on current orbit
368 if (opj
== ot_current
.size
) { // MISS, pj is new on the orbit, extend the size and set the T-matrix id
369 current_orbit
[opj
] = pj
;
371 ot_current
.tmatrices
[opj
] = ss
->p
[pj
].tmatrix_id
;
373 ot_current
.action
[gmi
] = opj
;
376 if (new_orbit
) { // Now compare if the new orbit corresponds to some of the existing types.
377 #ifdef DUMP_PARTICLE_POSITIONS
381 for(oti
= 0; oti
< ss
->orbit_type_count
; ++oti
)
382 if (orbit_types_equal(&ot_current
, &(ss
->orbit_types
[oti
]))) break; // HIT, orbit type already exists
383 assert(0 == sym
->order
% ot_current
.size
);
384 if (oti
== ss
->orbit_type_count
) // MISS, add the new orbit type
385 add_orbit_type(ss
, &ot_current
);
387 // Walk through the orbit again and set up the orbit info of the particles
388 for (qpms_ss_orbit_pi_t opi
= 0; opi
< ot_current
.size
; ++opi
) {
389 const qpms_ss_pi_t pi_opi
= current_orbit
[opi
];
390 ss
->p_orbitinfo
[pi_opi
].t
= oti
;
391 ss
->p_orbitinfo
[pi_opi
].p
= opi
;
392 ss
->p_orbitinfo
[pi_opi
].osn
= ss
->orbit_types
[oti
].instance_count
;
394 ss
->orbit_types
[oti
].instance_count
++;
397 // Possibly free some space using the new ss->p_count instead of (old) ss->p_count*sym->order
398 QPMS_CRASHING_REALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* ss
->p_count
);
399 QPMS_CRASHING_REALLOC(ss
->p
, sizeof(qpms_particle_tid_t
) * ss
->p_count
);
400 QPMS_CRASHING_REALLOC(ss
->p_orbitinfo
, sizeof(qpms_ss_particle_orbitinfo_t
)*ss
->p_count
);
401 ss
->p_capacity
= ss
->p_count
;
403 { // Reallocate the orbit type data space and update the pointers if needed.
404 size_t otspace_sz
= ss
->otspace_end
- ss
->otspace
;
405 char *old_otspace
= ss
->otspace
;
406 QPMS_CRASHING_REALLOC(ss
->otspace
, otspace_sz
);
407 ptrdiff_t shift
= ss
->otspace
- old_otspace
;
409 for (size_t oi
= 0; oi
< ss
->orbit_type_count
; ++oi
) {
410 ss
->orbit_types
[oi
].action
= (void *)(((char *) (ss
->orbit_types
[oi
].action
)) + shift
);
411 ss
->orbit_types
[oi
].tmatrices
= (void *)(((char *) (ss
->orbit_types
[oi
].tmatrices
)) + shift
);
412 ss
->orbit_types
[oi
].irbase_sizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_sizes
)) + shift
);
413 ss
->orbit_types
[oi
].irbase_cumsizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_cumsizes
)) + shift
);
414 ss
->orbit_types
[oi
].irbase_offsets
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_offsets
)) + shift
);
415 ss
->orbit_types
[oi
].irbases
= (void *)(((char *) (ss
->orbit_types
[oi
].irbases
)) + shift
);
417 ss
->otspace_end
+= shift
;
421 // Set ss->fecv_size and ss->fecv_pstarts
423 QPMS_CRASHING_MALLOC(ss
->fecv_pstarts
, ss
->p_count
* sizeof(size_t));
424 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
425 ss
->fecv_pstarts
[pi
] = ss
->fecv_size
;
426 ss
->fecv_size
+= ssw
->tm
[ss
->p
[pi
].tmatrix_id
]->spec
->n
; // That's a lot of dereferencing!
429 QPMS_CRASHING_MALLOC(ss
->saecv_sizes
, sizeof(size_t) * sym
->nirreps
);
430 QPMS_CRASHING_MALLOC(ss
->saecv_ot_offsets
, sizeof(size_t) * sym
->nirreps
* ss
->orbit_type_count
);
431 for(qpms_iri_t iri
= 0; iri
< sym
->nirreps
; ++iri
) {
432 ss
->saecv_sizes
[iri
] = 0;
433 for(qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
434 ss
->saecv_ot_offsets
[iri
* ss
->orbit_type_count
+ oti
] = ss
->saecv_sizes
[iri
];
435 const qpms_ss_orbit_type_t
*ot
= &(ss
->orbit_types
[oti
]);
436 ss
->saecv_sizes
[iri
] += ot
->instance_count
* ot
->irbase_sizes
[iri
];
440 qpms_ss_pi_t p_ot_cumsum
= 0;
441 for (qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
442 qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
443 ot
->p_offset
= p_ot_cumsum
;
444 p_ot_cumsum
+= ot
->size
* ot
->instance_count
;
447 // Set ss->p_by_orbit[]
448 QPMS_CRASHING_MALLOC(ss
->p_by_orbit
, sizeof(qpms_ss_pi_t
) * ss
->p_count
);
449 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
450 const qpms_ss_particle_orbitinfo_t
*oi
= ss
->p_orbitinfo
+ pi
;
451 const qpms_ss_oti_t oti
= oi
->t
;
452 const qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
453 ss
->p_by_orbit
[ot
->p_offset
+ ot
->size
* oi
->osn
+ oi
->p
] = pi
;
456 ss
->c
= qpms_trans_calculator_init(lMax
, normalisation
);
461 void qpms_scatsys_free(qpms_scatsys_t
*ss
) {
466 free(ss
->fecv_pstarts
);
467 free(ss
->tm_sym_map
);
470 free(ss
->p_orbitinfo
);
471 free(ss
->orbit_types
);
472 free(ss
->saecv_sizes
);
473 free(ss
->p_by_orbit
);
474 qpms_trans_calculator_free(ss
->c
);
481 // (copypasta from symmetries.c)
482 // TODO at some point, maybe support also other norms.
483 // (perhaps just use qpms_normalisation_t_factor() at the right places)
484 static inline void check_norm_compat(const qpms_vswf_set_spec_t
*s
)
486 switch (s
->norm
& QPMS_NORMALISATION_NORM_BITS
) {
487 case QPMS_NORMALISATION_NORM_POWER
:
489 case QPMS_NORMALISATION_NORM_SPHARM
:
492 QPMS_WTF
; // Only SPHARM and POWER norms are supported right now.
496 complex double *qpms_orbit_action_matrix(complex double *target
,
497 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
498 const qpms_finite_group_t
*sym
, const qpms_gmi_t g
) {
499 assert(sym
); assert(g
< sym
->order
);
501 assert(ot
); assert(ot
->size
> 0);
502 // check_norm_compat(bspec); not needed here, the qpms_irot3_uvswfi_dense should complain if necessary
503 const size_t n
= bspec
->n
;
504 const qpms_gmi_t N
= ot
->size
;
506 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
507 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
508 complex double tmp
[n
][n
]; // this is the 'per-particle' action
509 qpms_irot3_uvswfi_dense(tmp
[0], bspec
, sym
->rep3d
[g
]);
510 for(qpms_ss_orbit_pi_t Col
= 0; Col
< ot
->size
; ++Col
) {
511 // Row is the 'destination' of the symmetry operation, Col is the 'source'
512 const qpms_ss_orbit_pi_t Row
= ot
->action
[sym
->order
* Col
+ g
];
513 for(size_t row
= 0; row
< bspec
->n
; ++row
)
514 for(size_t col
= 0; col
< bspec
->n
; ++col
)
515 target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
] = conj(tmp
[row
][col
]); //CHECKCONJ
517 #ifdef DUMP_ACTIONMATRIX
518 fprintf(stderr
,"%d: %s\n",
519 (int) g
, (sym
->permrep
&& sym
->permrep
[g
])?
520 sym
->permrep
[g
] : "");
521 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
522 fprintf(stderr
, "--------------------------\n");
523 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
524 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
525 fprintf(stderr
, "| ");
526 for (size_t col
= 0; col
< bspec
->n
; ++col
)
527 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
529 fprintf(stderr
, "|\n");
532 fprintf(stderr
, "-------------------------------\n\n");
538 complex double *qpms_orbit_irrep_projector_matrix(complex double *target
,
539 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
540 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
543 assert(ot
); assert(ot
->size
> 0);
544 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
545 // check_norm_compat(bspec); // probably not needed here, let the called functions complain if necessary, but CHEKME
546 const size_t n
= bspec
->n
;
547 const qpms_gmi_t N
= ot
->size
;
549 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
550 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
551 // Workspace for the orbit group action matrices
552 complex double *tmp
= malloc(n
*n
*N
*N
*sizeof(complex double));
553 const int d
= sym
->irreps
[iri
].dim
;
554 double prefac
= d
/ (double) sym
->order
;
555 for(int partner
= 0; partner
< d
; ++partner
) {
556 for(qpms_gmi_t g
= 0; g
< sym
->order
; ++g
) {
557 // We use the diagonal elements of D_g
558 complex double D_g_conj
= sym
->irreps
[iri
].m
[g
*d
*d
+ partner
*d
+ partner
];
559 #ifdef DUMP_ACTIONMATRIX
560 fprintf(stderr
,"(factor %+g%+gj) ", creal(D_g_conj
), cimag(D_g_conj
));
562 qpms_orbit_action_matrix(tmp
, ot
, bspec
, sym
, g
);
564 for(size_t i
= 0; i
< n
*n
*N
*N
; ++i
)
565 target
[i
] += prefac
* D_g_conj
* tmp
[i
];
569 #ifdef DUMP_PROJECTORMATRIX
570 fprintf(stderr
,"Projector %d (%s):\n", (int) iri
,
571 sym
->irreps
[iri
].name
?sym
->irreps
[iri
].name
:"");
572 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
573 fprintf(stderr
, "--------------------------\n");
574 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
575 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
576 fprintf(stderr
, "| ");
577 for (size_t col
= 0; col
< bspec
->n
; ++col
)
578 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
580 fprintf(stderr
, "|\n");
583 fprintf(stderr
, "-------------------------------\n\n");
588 #define SVD_ATOL 1e-8
590 complex double *qpms_orbit_irrep_basis(size_t *basis_size
,
591 complex double *target
,
592 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
593 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
596 assert(ot
); assert(ot
->size
> 0);
597 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
598 check_norm_compat(bspec
); // Here I'm not sure; CHECKME
599 const size_t n
= bspec
->n
;
600 const qpms_gmi_t N
= ot
->size
;
601 const bool newtarget
= (target
== NULL
);
603 QPMS_CRASHING_MALLOC(target
,n
*n
*N
*N
*sizeof(complex double));
604 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
606 // Get the projector (also workspace for right sg. vect.)
607 complex double *projector
= qpms_orbit_irrep_projector_matrix(NULL
,
608 ot
, bspec
, sym
, iri
);
609 if(!projector
) abort();
610 // Workspace for the right singular vectors.
611 complex double *V_H
; QPMS_CRASHING_MALLOC(V_H
, n
*n
*N
*N
*sizeof(complex double));
612 // THIS SHOULD NOT BE NECESSARY
613 complex double *U
; QPMS_CRASHING_MALLOC(U
, n
*n
*N
*N
*sizeof(complex double));
614 double *s
; QPMS_CRASHING_MALLOC(s
, n
*N
*sizeof(double));
616 int info
= LAPACKE_zgesdd(LAPACK_ROW_MAJOR
,
617 'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
618 n
*N
/* m */, n
*N
/* n */, projector
/* a */, n
*N
/* lda */,
619 s
/* s */, U
/* u */, n
*N
/* ldu, irrelev. */, V_H
/* vt */,
621 if (info
) qpms_pr_error_at_flf(__FILE__
, __LINE__
, __func__
,
622 "Something went wrong with the SVD.");
626 for(bs
= 0; bs
< n
*N
; ++bs
) {
628 qpms_pr_debug_at_flf(__FILE__
, __LINE__
, __func__
,
629 "%d. irrep, %zd. SV: %.16lf", (int) iri
, bs
, s
[bs
]);
631 if(s
[bs
] > 1 + SVD_ATOL
) qpms_pr_error_at_flf(__FILE__
, __LINE__
, __func__
,
632 "%zd. SV too large: %.16lf", bs
, s
[bs
]);
633 if(s
[bs
] > SVD_ATOL
&& fabs(1-s
[bs
]) > SVD_ATOL
)
634 qpms_pr_error_at_flf(__FILE__
, __LINE__
, __func__
,
635 "%zd. SV in the 'wrong' interval: %.16lf", bs
, s
[bs
]);
636 if(s
[bs
] < SVD_ATOL
) break;
639 memcpy(target
, V_H
, bs
*N
*n
*sizeof(complex double));
640 if(newtarget
) QPMS_CRASHING_REALLOC(target
, bs
*N
*n
*sizeof(complex double));
641 if(basis_size
) *basis_size
= bs
;
649 complex double *qpms_scatsys_irrep_transform_matrix(complex double *U
,
650 const qpms_scatsys_t
*ss
, qpms_iri_t iri
) {
651 const size_t packedlen
= ss
->saecv_sizes
[iri
];
652 const size_t full_len
= ss
->fecv_size
;
654 QPMS_CRASHING_MALLOC(U
,full_len
* packedlen
* sizeof(complex double));
655 memset(U
, 0, full_len
* packedlen
* sizeof(complex double));
657 size_t fullvec_offset
= 0;
659 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
660 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
661 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
662 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
663 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
664 // This is where the particle's orbit starts in the "packed" vector:
665 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
666 + osn
* ot
->irbase_sizes
[iri
];
667 // Orbit coeff vector's full size:
668 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
669 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
670 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
671 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
673 for (size_t prow
= 0; prow
< orbit_packedsize
; ++prow
)
674 for (size_t pcol
= 0; pcol
< ot
->bspecn
; ++pcol
)
675 U
[full_len
* (packed_orbit_offset
+ prow
) + (fullvec_offset
+ pcol
)]
676 = om
[orbit_fullsize
* prow
+ (opi
* ot
->bspecn
+ pcol
)];
677 fullvec_offset
+= ot
->bspecn
;
683 complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed
,
684 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
686 const size_t packedlen
= ss
->saecv_sizes
[iri
];
687 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
688 return target_packed
;
689 const size_t full_len
= ss
->fecv_size
;
690 if (target_packed
== NULL
)
691 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
692 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
694 // Workspace for the intermediate matrix
696 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
698 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
700 const complex double one
= 1, zero
= 0;
704 CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
705 full_len
/*M*/, packedlen
/*N*/, full_len
/*K*/,
706 &one
/*alpha*/, orig_full
/*A*/, full_len
/*ldA*/,
707 U
/*B*/, full_len
/*ldB*/,
708 &zero
/*beta*/, tmp
/*C*/, packedlen
/*LDC*/);
710 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
711 packedlen
/*M*/, packedlen
/*N*/, full_len
/*K*/,
712 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
713 tmp
/*B*/, packedlen
/*ldB*/, &zero
/*beta*/,
714 target_packed
/*C*/, packedlen
/*ldC*/);
718 return target_packed
;
721 /// Transforms a big "packed" matrix into the full basis (trivial implementation).
722 complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full
,
723 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
724 qpms_iri_t iri
, bool add
){
725 const size_t packedlen
= ss
->saecv_sizes
[iri
];
726 const size_t full_len
= ss
->fecv_size
;
727 if (target_full
== NULL
)
728 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
729 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
731 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
733 // Workspace for the intermediate matrix result
735 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
737 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
739 const complex double one
= 1, zero
= 0;
743 CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
744 packedlen
/*M*/, full_len
/*N*/, packedlen
/*K*/,
745 &one
/*alpha*/, orig_packed
/*A*/, packedlen
/*ldA*/,
746 U
/*B*/, full_len
/*ldB*/,
747 &zero
/*beta*/, tmp
/*C*/, full_len
/*LDC*/);
749 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
750 full_len
/*M*/, full_len
/*N*/, packedlen
/*K*/,
751 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
752 tmp
/*B*/, full_len
/*ldB*/, &one
/*beta*/,
753 target_full
/*C*/, full_len
/*ldC*/);
759 complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed
,
760 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
762 const size_t packedlen
= ss
->saecv_sizes
[iri
];
763 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
764 return target_packed
;
765 const size_t full_len
= ss
->fecv_size
;
766 if (target_packed
== NULL
)
767 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
768 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
770 // Workspace for the intermediate particle-orbit matrix result
772 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
774 const complex double one
= 1, zero
= 0;
776 size_t fullvec_offsetR
= 0;
777 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
778 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
779 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
780 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
781 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
782 // This is where the particle's orbit starts in the "packed" vector:
783 const size_t packed_orbit_offsetR
=
784 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
785 + osnR
* otR
->irbase_sizes
[iri
];
786 // Orbit coeff vector's full size:
787 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
788 const size_t particle_fullsizeR
= otR
->bspecn
;
789 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
790 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
791 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
793 size_t fullvec_offsetC
= 0;
794 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
795 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
796 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
797 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
798 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
799 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
800 // This is where the particle's orbit starts in the "packed" vector:
801 const size_t packed_orbit_offsetC
=
802 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
803 + osnC
* otC
->irbase_sizes
[iri
];
804 // Orbit coeff vector's full size:
805 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
806 const size_t particle_fullsizeC
= otC
->bspecn
;
807 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
808 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
809 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
811 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
812 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
813 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
814 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
815 &one
/*alpha*/, orig_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*A*/,
817 omC
+ opiC
*particle_fullsizeC
/*B*/,
818 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
819 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
821 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
822 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
823 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
824 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
825 orbit_fullsizeR
/*ldA*/,
826 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
827 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
830 fullvec_offsetC
+= otC
->bspecn
;
833 fullvec_offsetR
+= otR
->bspecn
;
836 return target_packed
;
840 /// Transforms a big "packed" matrix into the full basis.
842 complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full
,
843 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
844 qpms_iri_t iri
, bool add
){
845 const size_t packedlen
= ss
->saecv_sizes
[iri
];
846 const size_t full_len
= ss
->fecv_size
;
847 if (target_full
== NULL
)
848 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
849 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
851 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
853 // Workspace for the intermediate particle-orbit matrix result
855 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
857 const complex double one
= 1, zero
= 0;
859 size_t fullvec_offsetR
= 0;
860 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
861 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
862 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
863 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
864 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
865 // This is where the particle's orbit starts in the "packed" vector:
866 const size_t packed_orbit_offsetR
=
867 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
868 + osnR
* otR
->irbase_sizes
[iri
];
869 // Orbit coeff vector's full size:
870 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
871 const size_t particle_fullsizeR
= otR
->bspecn
;
872 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
873 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
874 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
876 size_t fullvec_offsetC
= 0;
877 if (orbit_packedsizeR
) // avoid crash on empty irrep
878 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
879 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
880 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
881 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
882 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
883 // This is where the particle's orbit starts in the "packed" vector:
884 const size_t packed_orbit_offsetC
=
885 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
886 + osnC
* otC
->irbase_sizes
[iri
];
887 // Orbit coeff vector's full size:
888 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
889 const size_t particle_fullsizeC
= otC
->bspecn
;
890 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
891 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
892 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
893 if (orbit_packedsizeC
) { // avoid crash on empty irrep
895 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC]
896 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
897 orbit_packedsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeC
/*K*/,
898 &one
/*alpha*/, orig_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*A*/,
900 omC
+ opiC
*particle_fullsizeC
/*B*/,
901 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
902 tmp
/*C*/, particle_fullsizeC
/*LDC*/);
904 // target[oiR|piR,oiC|piC] += U*[...] tmp[...]
905 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
906 particle_fullsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeR
/*K*/,
907 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
908 orbit_fullsizeR
/*ldA*/,
909 tmp
/*B*/, particle_fullsizeC
/*ldB*/, &one
/*beta*/,
910 target_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*C*/,
913 fullvec_offsetC
+= otC
->bspecn
;
915 fullvec_offsetR
+= otR
->bspecn
;
922 /// Projects a "big" vector onto an irrep.
924 complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed
,
925 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
926 const qpms_iri_t iri
) {
927 const size_t packedlen
= ss
->saecv_sizes
[iri
];
928 if (!packedlen
) return target_packed
; // Empty irrep
929 if (target_packed
== NULL
)
930 QPMS_CRASHING_MALLOC(target_packed
, packedlen
*sizeof(complex double));
931 memset(target_packed
, 0, packedlen
*sizeof(complex double));
933 const complex double one
= 1;
935 size_t fullvec_offset
= 0;
936 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
937 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
938 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
939 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
940 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
941 // This is where the particle's orbit starts in the "packed" vector:
942 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
943 + osn
* ot
->irbase_sizes
[iri
];
944 // Orbit coeff vector's full size:
945 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
946 const size_t particle_fullsize
= ot
->bspecn
;
947 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
948 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
949 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
950 if (orbit_packedsize
) // avoid crash on empty irrep
951 cblas_zgemv(CblasRowMajor
/*order*/, CblasNoTrans
/*transA*/,
952 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/,
953 om
+ opi
*particle_fullsize
/*A*/, orbit_fullsize
/*lda*/,
954 orig_full
+fullvec_offset
/*X*/, 1/*incX*/,
955 &one
/*beta*/, target_packed
+packed_orbit_offset
/*Y*/, 1/*incY*/);
956 fullvec_offset
+= ot
->bspecn
;
958 return target_packed
;
961 /// Transforms a big "packed" vector into the full basis.
963 complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full
,
964 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
965 const qpms_iri_t iri
, bool add
) {
966 const size_t full_len
= ss
->fecv_size
;
967 if (target_full
== NULL
)
968 QPMS_CRASHING_MALLOC(target_full
, full_len
*sizeof(complex double));
969 if (!add
) memset(target_full
, 0, full_len
*sizeof(complex double));
971 const complex double one
= 1;
972 const size_t packedlen
= ss
->saecv_sizes
[iri
];
973 if (!packedlen
) return target_full
; // Completely empty irrep
975 size_t fullvec_offset
= 0;
976 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
977 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
978 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
979 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
980 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
981 // This is where the particle's orbit starts in the "packed" vector:
982 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
983 + osn
* ot
->irbase_sizes
[iri
];
984 // Orbit coeff vector's full size:
985 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
986 const size_t particle_fullsize
= ot
->bspecn
;
987 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
988 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
989 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
991 if (orbit_packedsize
) // empty irrep, avoid zgemv crashing.
992 cblas_zgemv(CblasRowMajor
/*order*/, CblasConjTrans
/*transA*/,
993 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/, om
+ opi
*particle_fullsize
/*A*/,
994 orbit_fullsize
/*lda*/, orig_packed
+packed_orbit_offset
/*X*/, 1/*incX*/, &one
/*beta*/,
995 target_full
+fullvec_offset
/*Y*/, 1/*incY*/);
997 fullvec_offset
+= ot
->bspecn
;
1002 complex double *qpms_scatsys_build_translation_matrix_full(
1003 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1004 complex double *target
,
1005 const qpms_scatsys_t
*ss
,
1006 complex double k
///< Wave number to use in the translation matrix.
1009 return qpms_scatsys_build_translation_matrix_e_full(
1010 target
, ss
, k
, QPMS_HANKEL_PLUS
);
1013 complex double *qpms_scatsys_build_translation_matrix_e_full(
1014 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1015 complex double *target
,
1016 const qpms_scatsys_t
*ss
,
1017 complex double k
, ///< Wave number to use in the translation matrix.
1018 qpms_bessel_t J
///< Bessel function type.
1021 const size_t full_len
= ss
->fecv_size
;
1023 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1024 memset(target
, 0, SQ(full_len
) * sizeof(complex double)); //unnecessary?
1025 { // Non-diagonal part; M[piR, piC] = T[piR] S(piR<-piC)
1026 size_t fullvec_offsetR
= 0;
1027 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1028 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, piR
);
1029 const cart3_t posR
= ss
->p
[piR
].pos
;
1030 size_t fullvec_offsetC
= 0;
1031 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1032 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1033 if(piC
!= piR
) { // The diagonal will be dealt with later.
1034 const cart3_t posC
= ss
->p
[piC
].pos
;
1035 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1036 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
,
1037 bspecR
, full_len
, bspecC
, 1,
1040 fullvec_offsetC
+= bspecC
->n
;
1042 assert(fullvec_offsetC
= full_len
);
1043 fullvec_offsetR
+= bspecR
->n
;
1045 assert(fullvec_offsetR
== full_len
);
1053 complex double *qpms_scatsys_at_omega_build_modeproblem_matrix_full(
1054 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1055 complex double *target
,
1056 const qpms_scatsys_at_omega_t
*ssw
1059 const complex double k
= ssw
->wavenumber
;
1060 const qpms_scatsys_t
*ss
= ssw
->ss
;
1061 const size_t full_len
= ss
->fecv_size
;
1063 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1064 complex double *tmp
;
1065 QPMS_CRASHING_MALLOC(tmp
, SQ(ss
->max_bspecn
) * sizeof(complex double));
1066 memset(target
, 0, SQ(full_len
) * sizeof(complex double)); //unnecessary?
1067 const complex double zero
= 0, minusone
= -1;
1068 { // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
1069 size_t fullvec_offsetR
= 0;
1070 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1071 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1072 const cart3_t posR
= ss
->p
[piR
].pos
;
1073 size_t fullvec_offsetC
= 0;
1074 // dest particle T-matrix
1075 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1076 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1077 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1078 if(piC
!= piR
) { // The diagonal will be dealt with later.
1079 const cart3_t posC
= ss
->p
[piC
].pos
;
1080 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1081 tmp
, // tmp is S(piR<-piC)
1082 bspecR
, bspecC
->n
, bspecC
, 1,
1083 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1084 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1085 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1086 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1087 tmp
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1088 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
/*c*/,
1091 fullvec_offsetC
+= bspecC
->n
;
1093 fullvec_offsetR
+= bspecR
->n
;
1096 // diagonal part M[pi,pi] = +1
1097 for (size_t i
= 0; i
< full_len
; ++i
) target
[full_len
* i
+ i
] = +1;
1103 // Serial reference implementation.
1104 complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_serial(
1105 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1106 complex double *target_packed
,
1107 const qpms_scatsys_at_omega_t
*ssw
,
1111 const qpms_scatsys_t
*ss
= ssw
->ss
;
1112 const complex double k
= ssw
->wavenumber
;
1113 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1114 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1115 return target_packed
;
1116 if (target_packed
== NULL
)
1117 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1118 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1120 // some of the following workspaces are probably redundant; TODO optimize later.
1122 // workspaces for the uncompressed particle<-particle tranlation matrix block
1123 // and the result of multiplying with a T-matrix (times -1)
1124 complex double *Sblock
, *TSblock
;
1125 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1126 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1128 // Workspace for the intermediate particle-orbit matrix result
1129 complex double *tmp
;
1130 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1132 const complex double one
= 1, zero
= 0, minusone
= -1;
1134 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
1135 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
1136 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1137 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
1138 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
1139 // This is where the particle's orbit starts in the "packed" vector:
1140 const size_t packed_orbit_offsetR
=
1141 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1142 + osnR
* otR
->irbase_sizes
[iri
];
1143 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1144 // Orbit coeff vector's full size:
1145 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1146 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1147 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1148 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1149 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1150 const cart3_t posR
= ss
->p
[piR
].pos
;
1151 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1152 // dest particle T-matrix
1153 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1154 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1155 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1156 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1157 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1158 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1159 // This is where the particle's orbit starts in the "packed" vector:
1160 const size_t packed_orbit_offsetC
=
1161 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1162 + osnC
* otC
->irbase_sizes
[iri
];
1163 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1164 // Orbit coeff vector's full size:
1165 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1166 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1167 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1168 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1169 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1171 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1172 if(piC
!= piR
) { // non-diagonal, calculate TS
1173 const cart3_t posC
= ss
->p
[piC
].pos
;
1174 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1175 Sblock
, // Sblock is S(piR->piC)
1176 bspecR
, bspecC
->n
, bspecC
, 1,
1177 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1179 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1180 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1181 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1182 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1183 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1184 } else { // diagonal, fill with diagonal +1
1185 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1186 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1187 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1190 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1191 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1192 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1193 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1194 omC
+ opiC
*particle_fullsizeC
/*B*/,
1195 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1196 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1198 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1199 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1200 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1201 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1202 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1203 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1212 return target_packed
;
1215 complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR(
1216 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1217 complex double *target_packed
,
1218 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1221 const qpms_scatsys_t
*ss
= ssw
->ss
;
1222 const complex double k
= ssw
->wavenumber
;
1223 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1224 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1225 return target_packed
;
1226 if (target_packed
== NULL
)
1227 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1228 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1230 // some of the following workspaces are probably redundant; TODO optimize later.
1232 // workspaces for the uncompressed particle<-particle tranlation matrix block
1233 // and the result of multiplying with a T-matrix (times -1)
1234 complex double *Sblock
, *TSblock
;
1235 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1236 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1238 // Workspace for the intermediate particle-orbit matrix result
1239 complex double *tmp
;
1240 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1242 const complex double one
= 1, zero
= 0, minusone
= -1;
1244 for(qpms_ss_pi_t opistartR
= 0; opistartR
< ss
->p_count
;
1245 opistartR
+= ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
//orbit_p_countR; might write a while() instead
1247 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1248 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1249 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1250 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1251 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1252 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1254 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1255 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1256 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1257 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1258 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1259 // Orbit coeff vector's full size:
1260 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1261 // This is where the orbit starts in the "packed" vector:
1262 const size_t packed_orbit_offsetR
=
1263 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1264 + osnR
* otR
->irbase_sizes
[iri
];
1265 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1266 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1267 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1268 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1269 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1270 const cart3_t posR
= ss
->p
[piR
].pos
;
1271 // dest particle T-matrix
1272 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1273 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1274 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1275 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1276 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1277 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1278 // This is where the particle's orbit starts in the "packed" vector:
1279 const size_t packed_orbit_offsetC
=
1280 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1281 + osnC
* otC
->irbase_sizes
[iri
];
1282 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1283 // Orbit coeff vector's full size:
1284 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1285 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1286 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1287 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1288 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1290 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1291 if(piC
!= piR
) { // non-diagonal, calculate TS
1292 const cart3_t posC
= ss
->p
[piC
].pos
;
1293 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1294 Sblock
, // Sblock is S(piR->piC)
1295 bspecR
, bspecC
->n
, bspecC
, 1,
1296 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1298 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1299 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1300 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1301 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1302 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1303 } else { // diagonal, fill with diagonal +1
1304 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1305 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1306 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1309 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1310 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1311 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1312 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1313 omC
+ opiC
*particle_fullsizeC
/*B*/,
1314 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1315 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1317 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1318 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1319 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1320 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1321 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1322 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1332 return target_packed
;
1335 struct qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
{
1336 const qpms_scatsys_at_omega_t
*ssw
;
1337 qpms_ss_pi_t
*opistartR_ptr
;
1338 pthread_mutex_t
*opistartR_mutex
;
1340 complex double *target_packed
;
1343 static void *qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg
)
1345 const struct qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1347 const qpms_scatsys_at_omega_t
*ssw
= a
->ssw
;
1348 const complex double k
= ssw
->wavenumber
;
1349 const qpms_scatsys_t
*ss
= ssw
->ss
;
1350 const qpms_iri_t iri
= a
->iri
;
1351 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1353 // some of the following workspaces are probably redundant; TODO optimize later.
1355 // workspaces for the uncompressed particle<-particle tranlation matrix block
1356 // and the result of multiplying with a T-matrix (times -1)
1357 complex double *Sblock
, *TSblock
;
1358 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1359 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1361 // Workspace for the intermediate particle-orbit matrix result
1362 complex double *tmp
;
1363 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1365 const complex double one
= 1, zero
= 0, minusone
= -1;
1368 // In the beginning, pick a target (row) orbit for this thread
1369 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1370 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1371 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1374 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1375 // Now increment it for another thread:
1376 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1377 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1379 // Orbit picked (defined by opistartR), do the work.
1380 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1381 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1382 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1383 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1384 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1385 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1387 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1388 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1389 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1390 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1391 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1392 // Orbit coeff vector's full size:
1393 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1394 // This is where the orbit starts in the "packed" vector:
1395 const size_t packed_orbit_offsetR
=
1396 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1397 + osnR
* otR
->irbase_sizes
[iri
];
1398 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1399 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1400 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1401 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1402 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1403 const cart3_t posR
= ss
->p
[piR
].pos
;
1404 // dest particle T-matrix
1405 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1406 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1407 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1408 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1409 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1410 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1411 // This is where the particle's orbit starts in the "packed" vector:
1412 const size_t packed_orbit_offsetC
=
1413 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1414 + osnC
* otC
->irbase_sizes
[iri
];
1415 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1416 // Orbit coeff vector's full size:
1417 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1418 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1419 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1420 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1421 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1423 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1424 if(piC
!= piR
) { // non-diagonal, calculate TS
1425 const cart3_t posC
= ss
->p
[piC
].pos
;
1426 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1427 Sblock
, // Sblock is S(piR->piC)
1428 bspecR
, bspecC
->n
, bspecC
, 1,
1429 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1431 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1432 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1433 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1434 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1435 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1436 } else { // diagonal, fill with diagonal +1
1437 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1438 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1439 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1442 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1443 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1444 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1445 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1446 omC
+ opiC
*particle_fullsizeC
/*B*/,
1447 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1448 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1450 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1451 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1452 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1453 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1454 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1455 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1471 // this differs from the ...build_modeproblem_matrix... only by the `J`
1472 // maybe I should use this one there as well to save lines... TODO
1473 struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
{
1474 const qpms_scatsys_t
*ss
;
1475 qpms_ss_pi_t
*opistartR_ptr
;
1476 pthread_mutex_t
*opistartR_mutex
;
1478 complex double *target_packed
;
1483 static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread(void *arg
)
1485 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1487 const qpms_scatsys_t
*ss
= a
->ss
;
1488 const qpms_iri_t iri
= a
->iri
;
1489 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1490 const qpms_bessel_t J
= a
->J
;
1492 // some of the following workspaces are probably redundant; TODO optimize later.
1494 // workspace for the uncompressed particle<-particle tranlation matrix block
1495 complex double *Sblock
;
1496 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1498 // Workspace for the intermediate particle-orbit matrix result
1499 complex double *tmp
;
1500 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1502 const complex double one
= 1, zero
= 0;
1505 // In the beginning, pick a target (row) orbit for this thread
1506 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1507 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1508 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1511 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1512 // Now increment it for another thread:
1513 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1514 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1516 // Orbit picked (defined by opistartR), do the work.
1517 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1518 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1519 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1520 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1521 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1522 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1524 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1525 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1526 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, orbitstartpiR
);
1527 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1528 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1529 // Orbit coeff vector's full size:
1530 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1531 // This is where the orbit starts in the "packed" vector:
1532 const size_t packed_orbit_offsetR
=
1533 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1534 + osnR
* otR
->irbase_sizes
[iri
];
1535 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1536 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1537 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1538 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1539 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1540 const cart3_t posR
= ss
->p
[piR
].pos
;
1541 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1542 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1543 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1544 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1545 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1546 // This is where the particle's orbit starts in the "packed" vector:
1547 const size_t packed_orbit_offsetC
=
1548 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1549 + osnC
* otC
->irbase_sizes
[iri
];
1550 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1551 // Orbit coeff vector's full size:
1552 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1553 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1554 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1555 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1556 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1558 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1559 // THIS IS THE MAIN PART DIFFERENT FROM ...modeproblem...() TODO unify
1560 // somehow to save lines
1561 if(piC
!= piR
) { // non-diagonal, calculate S
1562 const cart3_t posC
= ss
->p
[piC
].pos
;
1563 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1564 Sblock
, // Sblock is S(piR->piC)
1565 bspecR
, bspecC
->n
, bspecC
, 1,
1566 a
->k
, posR
, posC
, J
));
1567 } else { // diagonal, fill with zeros; TODO does this make sense?
1568 // would unit matrix be better? or unit only for QPMS_BESSEL_REGULAR?
1569 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1570 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1571 Sblock
[row
* bspecC
->n
+ col
] = 0; //(row == col)? 1 : 0;
1574 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1575 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1576 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1577 &one
/*alpha*/, Sblock
/*A*/, particle_fullsizeC
/*ldA*/,
1578 omC
+ opiC
*particle_fullsizeC
/*B*/,
1579 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1580 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1582 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1583 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1584 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1585 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1586 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1587 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1602 // Almost the same as ...build_modeproblem_matrix_...parallelR
1603 // --> TODO write this in a more generic way to save LoC.
1604 complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
1605 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1606 complex double *target_packed
,
1607 const qpms_scatsys_t
*ss
,
1609 const complex double k
,
1614 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1615 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1616 return target_packed
;
1617 if (target_packed
== NULL
)
1618 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1619 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1621 qpms_ss_pi_t opistartR
= 0;
1622 pthread_mutex_t opistartR_mutex
;
1623 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1624 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1625 arg
= {ss
, &opistartR
, &opistartR_mutex
, iri
, target_packed
, J
};
1627 // FIXME THIS IS NOT PORTABLE:
1629 if (qpms_scatsystem_nthreads_override
> 0) {
1630 nthreads
= qpms_scatsystem_nthreads_override
;
1631 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1634 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1636 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1637 nthreads
, qpms_scatsystem_nthreads_default
);
1638 nthreads
= qpms_scatsystem_nthreads_default
;
1640 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1643 pthread_t thread_ids
[nthreads
];
1644 for(long thi
= 0; thi
< nthreads
; ++thi
)
1645 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1646 qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread
,
1648 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1650 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1653 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1654 return target_packed
;
1658 // Parallel implementation, now default
1659 complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed(
1660 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1661 complex double *target_packed
,
1662 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1665 const size_t packedlen
= ssw
->ss
->saecv_sizes
[iri
];
1666 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1667 return target_packed
;
1668 if (target_packed
== NULL
)
1669 QPMS_CRASHING_MALLOC(target_packed
,SQ(packedlen
)*sizeof(complex double));
1670 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1672 qpms_ss_pi_t opistartR
= 0;
1673 pthread_mutex_t opistartR_mutex
;
1674 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1675 const struct qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1676 arg
= {ssw
, &opistartR
, &opistartR_mutex
, iri
, target_packed
};
1678 // FIXME THIS IS NOT PORTABLE:
1680 if (qpms_scatsystem_nthreads_override
> 0) {
1681 nthreads
= qpms_scatsystem_nthreads_override
;
1682 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1685 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1687 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1688 nthreads
, qpms_scatsystem_nthreads_default
);
1689 nthreads
= qpms_scatsystem_nthreads_default
;
1691 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1694 pthread_t thread_ids
[nthreads
];
1695 for(long thi
= 0; thi
< nthreads
; ++thi
)
1696 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1697 qpms_scatsys_build_modeproblem_matrix_irrep_packed_parallelR_thread
,
1699 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1701 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1704 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1705 return target_packed
;
1709 complex double *qpms_scatsys_incident_field_vector_full(
1710 complex double *target_full
, const qpms_scatsys_t
*ss
,
1711 qpms_incfield_t f
, const void *args
, bool add
) {
1713 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
1714 sizeof(complex double));
1715 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1716 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
1717 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
1718 const cart3_t pos
= ss
->p
[pi
].pos
;
1719 QPMS_ENSURE_SUCCESS(f(ptarget
, bspec
, pos
, args
, add
));
1726 complex double *qpms_scatsys_incident_field_vector_irrep_packed(
1727 complex double *target_full
, const qpms_scatsys_t
*ss
,
1728 const qpms_iri_t iri
, qpms_incfield_t f
,
1729 const void *args
, bool add
) {
1735 complex double *qpms_scatsys_apply_Tmatrices_full(
1736 complex double *target_full
, const complex double *inc_full
,
1737 const qpms_scatsys_at_omega_t
*ssw
) {
1739 const qpms_scatsys_t
*ss
= ssw
->ss
;
1740 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
1741 sizeof(complex double));
1742 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1743 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
1744 const complex double *psrc
= inc_full
+ ss
->fecv_pstarts
[pi
];
1745 // TODO check whether T-matrix is non-virtual after virtual t-matrices are implemented.
1746 const qpms_tmatrix_t
*T
= ssw
->tm
[ss
->p
[pi
].tmatrix_id
];
1747 qpms_apply_tmatrix(ptarget
, psrc
, T
);
1754 ccart3_t
qpms_scatsys_eval_E(const qpms_scatsys_t
*ss
,
1755 const complex double *cvf
, const cart3_t where
,
1756 const complex double k
) {
1758 ccart3_t res
= {0,0,0};
1759 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
1761 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1762 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
1763 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
1764 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
1766 const csph_t kr
= sph_cscale(k
, cart2sph(
1767 cart3_substract(where
, particle_pos
)));
1768 const csphvec_t E_sph
= qpms_eval_uvswf(bspec
, particle_cv
, kr
,
1770 const ccart3_t E_cart
= csphvec2ccart_csph(E_sph
, kr
);
1771 ckahanadd(&(res
.x
), &(res_kc
.x
), E_cart
.x
);
1772 ckahanadd(&(res
.y
), &(res_kc
.y
), E_cart
.y
);
1773 ckahanadd(&(res
.z
), &(res_kc
.z
), E_cart
.z
);
1779 ccart3_t
qpms_scatsys_eval_E_irrep(const qpms_scatsys_t
*ss
,
1780 qpms_iri_t iri
, const complex double *cvr
, cart3_t where
) {
1785 void qpms_ss_LU_free(qpms_ss_LU lu
) {
1790 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_full_factorise(complex double *mpmatrix_full
,
1791 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
) {
1792 const qpms_scatsys_t
*ss
= ssw
->ss
;
1793 QPMS_ENSURE(mpmatrix_full
, "A non-NULL pointer to the pre-calculated mode matrix is required");
1794 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, ss
->fecv_size
* sizeof(int));
1795 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, ss
->fecv_size
, ss
->fecv_size
,
1796 mpmatrix_full
, ss
->fecv_size
, target_piv
));
1798 lu
.a
= mpmatrix_full
;
1799 lu
.ipiv
= target_piv
;
1806 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(complex double *mpmatrix_packed
,
1807 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
) {
1808 QPMS_ENSURE(mpmatrix_packed
, "A non-NULL pointer to the pre-calculated mode matrix is required");
1809 size_t n
= ssw
->ss
->saecv_sizes
[iri
];
1810 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, n
* sizeof(int));
1811 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, n
, n
,
1812 mpmatrix_packed
, n
, target_piv
));
1814 lu
.a
= mpmatrix_packed
;
1815 lu
.ipiv
= target_piv
;
1822 qpms_ss_LU
qpms_scatsysw_build_modeproblem_matrix_full_LU(
1823 complex double *target
, int *target_piv
,
1824 const qpms_scatsys_at_omega_t
*ssw
){
1825 target
= qpms_scatsysw_build_modeproblem_matrix_full(target
, ssw
);
1826 return qpms_scatsysw_modeproblem_matrix_full_factorise(target
, target_piv
, ssw
);
1829 qpms_ss_LU
qpms_scatsys_build_modeproblem_matrix_irrep_packed_LU(
1830 complex double *target
, int *target_piv
,
1831 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
){
1832 target
= qpms_scatsys_build_modeproblem_matrix_irrep_packed(target
, ssw
, iri
);
1833 return qpms_scatsys_modeproblem_matrix_irrep_packed_factorise(target
, target_piv
, ssw
, iri
);
1836 complex double *qpms_scatsys_scatter_solve(
1837 complex double *f
, const complex double *a_inc
, qpms_ss_LU lu
) {
1838 const size_t n
= lu
.full
? lu
.ssw
->ss
->fecv_size
: lu
.ssw
->ss
->saecv_sizes
[lu
.iri
];
1839 if (!f
) QPMS_CRASHING_MALLOC(f
, n
* sizeof(complex double));
1840 memcpy(f
, a_inc
, n
*sizeof(complex double)); // It will be rewritten by zgetrs
1841 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR
, 'N' /*trans*/, n
/*n*/, 1 /*nrhs number of right hand sides*/,
1842 lu
.a
/*a*/, n
/*lda*/, lu
.ipiv
/*ipiv*/, f
/*b*/, 1 /*ldb; CHECKME*/));