3 #define lapack_complex_double complex double
4 #define lapack_complex_double_real(z) (creal(z))
5 #define lapack_complex_double_imag(z) (cimag(z))
9 #include "scatsystem.h"
13 #include "symmetries.h"
17 #include "quaternions.h"
19 #include "qpms_error.h"
20 #include "translations.h"
21 #include "tmatrices.h"
24 #include "tolerances.h"
26 #include "tiny_inlines.h"
28 #ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
30 #define SERIAL_ZGEMM qpms_zgemm
32 #define SERIAL_ZGEMM cblas_zgemm
35 #define QPMS_SCATSYS_LEN_RTOL 1e-13
36 #define QPMS_SCATSYS_TMATRIX_ATOL 1e-12
37 #define QPMS_SCATSYS_TMATRIX_RTOL 1e-12
39 // This is used in adjustment of Ewald parameter to avoid high frequency breakdown.
40 // Very roughly, the value of 16 should lead to allowing terms containing incomplete Gamma
41 // functions with magnitudes around exp(16) == 8.9e6
42 static const double QPMS_SCATSYS_EWALD_MAX_EXPONENT
= 16.;
44 long qpms_scatsystem_nthreads_default
= 4;
45 long qpms_scatsystem_nthreads_override
= 0;
47 void qpms_scatsystem_set_nthreads(long n
) {
48 qpms_scatsystem_nthreads_override
= n
;
51 static inline void qpms_ss_ensure_periodic(const qpms_scatsys_t
*ss
) {
52 QPMS_ENSURE(ss
->lattice_dimension
> 0, "This method is applicable only to periodic systems.");
55 static inline void qpms_ss_ensure_periodic_a(const qpms_scatsys_t
*ss
, const char *s
) {
56 QPMS_ENSURE(ss
->lattice_dimension
> 0, "This method is applicable only to periodic systems. Use %s instead.", s
);
59 static inline void qpms_ss_ensure_nonperiodic(const qpms_scatsys_t
*ss
) {
60 QPMS_ENSURE(ss
->lattice_dimension
== 0, "This method is applicable only to nonperiodic systems.");
63 static inline void qpms_ss_ensure_nonperiodic_a(const qpms_scatsys_t
*ss
, const char *s
) {
64 QPMS_ENSURE(ss
->lattice_dimension
== 0, "This method is applicable only to nonperiodic systems. Use %s instead.", s
);
67 // Adjust Ewald parameter to avoid high-frequency breakdown
68 double qpms_ss_adjusted_eta(const qpms_scatsys_t
*ss
, complex double wavenumber
, const double k
[3]) {
69 qpms_ss_ensure_periodic(ss
);
70 const double eta_default
= ss
->per
.eta
;
71 // FIXME here we silently assume that k lies in the first Brillioun zone, we should ensure that.
72 const double k2
= k
[0]*k
[0] + k
[1]*k
[1] + k
[2] * k
[2];
73 const double kappa2
= SQ(cabs(wavenumber
)); // maybe creal would be enough
74 if(kappa2
< k2
) // This should happen only for pretty low frequencies
76 const qpms_l_t maxj
= ss
->c
->lMax
; // Based on ewald.c:301, note that VSWF (c's) lMax is already half of corresponding translation matrix Ewald factors' (c->e3c's) lMax
77 const double mina
= 0.5 * (ss
->lattice_dimension
- 1) - maxj
; // minimum incomplete Gamma first argument, based on ewald.c:301; CHECKME whether this is fine also for 3D lattice
78 const double eta_min
= sqrt(fabs((kappa2
- k2
) * (mina
- 1.) / QPMS_SCATSYS_EWALD_MAX_EXPONENT
));
79 return MAX(eta_default
, eta_min
);
82 // ------------ Stupid implementation of qpms_scatsys_apply_symmetry() -------------
84 // The following functions are just to make qpms_scatsys_apply_symmetry more readable.
85 // They are not to be used elsewhere, as they do not perform any array capacity checks etc.
87 /// Compare two orbit types in a scattering system.
88 static bool orbit_types_equal(const qpms_ss_orbit_type_t
*a
, const qpms_ss_orbit_type_t
*b
) {
89 if (a
->size
!= b
->size
) return false;
90 if (memcmp(a
->action
, b
->action
, a
->size
*sizeof(qpms_ss_orbit_pi_t
))) return false;
91 if (memcmp(a
->tmatrices
, b
->tmatrices
, a
->size
*sizeof(qpms_ss_tmi_t
))) return false;
95 // Extend the action to all particles in orbit if only the action on the 0th
96 // particle has been filled.
97 static void extend_orbit_action(qpms_scatsys_t
*ss
, qpms_ss_orbit_type_t
*ot
) {
98 for(qpms_ss_orbit_pi_t src
= 1; src
< ot
->size
; ++src
) {
99 // find any element g that sends 0 to src:
101 for (g
= 0; g
< ss
->sym
->order
; ++g
)
102 if (ot
->action
[g
] == src
) break;
103 assert(g
< ss
->sym
->order
);
104 // invg sends src to 0
105 qpms_gmi_t invg
= qpms_finite_group_inv(ss
->sym
, g
);
106 for (qpms_gmi_t f
= 0; f
< ss
->sym
->order
; ++f
)
107 // if f sends 0 to dest, then f * invg sends src to dest
108 ot
->action
[src
* ss
->sym
->order
+
109 qpms_finite_group_mul(ss
->sym
,f
,invg
)] = ot
->action
[f
];
113 //Add orbit type to a scattering system, updating the ss->otspace_end pointer accordingly
114 static void add_orbit_type(qpms_scatsys_t
*ss
, const qpms_ss_orbit_type_t
*ot_current
) {
115 qpms_ss_orbit_type_t
* const ot_new
= & (ss
->orbit_types
[ss
->orbit_type_count
]);
116 ot_new
->size
= ot_current
->size
;
118 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_tmi(ss
, ot_current
->tmatrices
[0]);
119 const size_t bspecn
= bspec
->n
;
120 ot_new
->bspecn
= bspecn
;
122 const size_t actionsiz
= sizeof(ot_current
->action
[0]) * ot_current
->size
124 ot_new
->action
= (void *) (ss
->otspace_end
);
125 memcpy(ot_new
->action
, ot_current
->action
, actionsiz
);
126 // N.B. we copied mostly garbage ^^^, most of it is initialized just now:
127 extend_orbit_action(ss
, ot_new
);
128 #ifdef DUMP_ORBIT_ACTION
129 fprintf(stderr
, "Orbit action:\n");
130 for (qpms_gmi_t gmi
= 0; gmi
< ss
->sym
->order
; ++gmi
) {
131 const qpms_quat4d_t q
= qpms_quat_4d_from_2c(ss
->sym
->rep3d
[gmi
].rot
);
132 fprintf(stderr
, "%+d[%g %g %g %g] ", (int)ss
->sym
->rep3d
[gmi
].det
,
133 q
.c1
, q
.ci
, q
.cj
, q
.ck
);
134 fprintf(stderr
, "%s\t", (ss
->sym
->permrep
&& ss
->sym
->permrep
[gmi
])?
135 ss
->sym
->permrep
[gmi
] : "");
136 for (qpms_ss_orbit_pi_t pi
= 0; pi
< ot_new
->size
; ++pi
)
137 fprintf(stderr
, "%d\t", (int) ot_new
->action
[gmi
+ pi
*ss
->sym
->order
]);
138 fprintf(stderr
, "\n");
141 ss
->otspace_end
+= actionsiz
;
143 const size_t tmsiz
= sizeof(ot_current
->tmatrices
[0]) * ot_current
->size
;
144 ot_new
->tmatrices
= (void *) (ss
->otspace_end
);
145 memcpy(ot_new
->tmatrices
, ot_current
->tmatrices
, tmsiz
);
146 ss
->otspace_end
+= tmsiz
;
148 const size_t irbase_sizes_siz
= sizeof(ot_new
->irbase_sizes
[0]) * ss
->sym
->nirreps
;
149 ot_new
->irbase_sizes
= (void *) (ss
->otspace_end
);
150 ss
->otspace_end
+= irbase_sizes_siz
;
151 ot_new
->irbase_cumsizes
= (void *) (ss
->otspace_end
);
152 ss
->otspace_end
+= irbase_sizes_siz
;
153 ot_new
->irbase_offsets
= (void *) (ss
->otspace_end
);
154 ss
->otspace_end
+= irbase_sizes_siz
;
156 const size_t irbases_siz
= sizeof(ot_new
->irbases
[0]) * SQ(ot_new
->size
* bspecn
);
157 ot_new
->irbases
= (void *) (ss
->otspace_end
);
158 ss
->otspace_end
+= irbases_siz
;
160 size_t lastbs
, bs_cumsum
= 0;
161 for(qpms_iri_t iri
= 0; iri
< ss
->sym
->nirreps
; ++iri
) {
162 ot_new
->irbase_offsets
[iri
] = bs_cumsum
* bspecn
* ot_new
->size
;
163 qpms_orbit_irrep_basis(&lastbs
,
164 ot_new
->irbases
+ bs_cumsum
*ot_new
->size
*bspecn
,
165 ot_new
, bspec
, ss
->sym
, iri
);
166 ot_new
->irbase_sizes
[iri
] = lastbs
;
168 ot_new
->irbase_cumsizes
[iri
] = bs_cumsum
;
170 QPMS_ENSURE(bs_cumsum
== ot_new
->size
* bspecn
,
171 "The cumulative size of the symmetry-adapted bases is wrong; "
172 "expected %d = %d * %d, got %d.",
173 ot_new
->size
* bspecn
, ot_new
->size
, bspecn
, bs_cumsum
);
174 ot_new
->instance_count
= 0;
175 ss
->orbit_type_count
++;
178 // Standardises the lattice base vectors, and fills the other contents of ss->per[0].
179 // LPTODO split the functionality in smaller functions, these might be useful elsewhere.
180 static void process_lattice_bases(qpms_scatsys_t
*ss
, const qpms_tolerance_spec_t
*tol
) {
181 switch(ss
->lattice_dimension
) {
184 normsq
= cart3_normsq(ss
->per
.lattice_basis
[0]);
185 ss
->per
.unitcell_volume
= sqrt(normsq
);
186 ss
->per
.reciprocal_basis
[0] = cart3_divscale(ss
->per
.lattice_basis
[0], normsq
);
187 ss
->per
.eta
= 2. / M_2_SQRTPI
/ ss
->per
.unitcell_volume
;
190 // (I) Create orthonormal basis of the plane in which the basis vectors lie
192 double norm0
= cart3norm(ss
->per
.lattice_basis
[0]);
193 // 0: Just normalised 0. basis vector
194 onbasis
[0] = cart3_divscale(ss
->per
.lattice_basis
[0], norm0
);
195 // 1: Gram-Schmidt complement of the other basis vector
196 const double b0norm_dot_b1
= cart3_dot(onbasis
[0], ss
->per
.lattice_basis
[1]);
197 onbasis
[1] = cart3_substract(ss
->per
.lattice_basis
[1],
198 cart3_scale(b0norm_dot_b1
, onbasis
[0]));
199 onbasis
[1] = cart3_divscale(onbasis
[1], cart3norm(onbasis
[1]));
200 // (II) Express the lattice basis in terms of the new 2d plane vector basis onbasis
201 cart2_t b2d
[2] = {{.x
= norm0
, .y
= 0},
202 {.x
= b0norm_dot_b1
, .y
= cart3_dot(onbasis
[1], ss
->per
.lattice_basis
[1])}};
203 // (III) Reduce lattice basis and get reciprocal bases in the 2D plane
204 l2d_reduceBasis(b2d
[0], b2d
[1], &b2d
[0], &b2d
[1]);
205 ss
->per
.unitcell_volume
= l2d_unitcell_area(b2d
[0], b2d
[1]);
206 ss
->per
.eta
= 2. / M_2_SQRTPI
/ sqrt(ss
->per
.unitcell_volume
);
208 QPMS_ENSURE_SUCCESS(l2d_reciprocalBasis1(b2d
[0], b2d
[1], &r2d
[0], &r2d
[1]));
209 // (IV) Rotate everything back to the original 3D space.
210 for(int i
= 0; i
< 2; ++i
) {
211 ss
->per
.lattice_basis
[i
] = cart3_add(
212 cart3_scale(b2d
[i
].x
, onbasis
[0]),
213 cart3_scale(b2d
[i
].y
, onbasis
[1]));
214 ss
->per
.reciprocal_basis
[i
] = cart3_add(
215 cart3_scale(r2d
[i
].x
, onbasis
[0]),
216 cart3_scale(r2d
[i
].y
, onbasis
[1]));
220 l3d_reduceBasis(ss
->per
.lattice_basis
, ss
->per
.lattice_basis
);
221 ss
->per
.unitcell_volume
= fabs(l3d_reciprocalBasis1(ss
->per
.lattice_basis
,
222 ss
->per
.reciprocal_basis
));
223 ss
->per
.eta
= 2. / M_2_SQRTPI
/ cbrt(ss
->per
.unitcell_volume
);
224 // TODO check unitcell_volume for sanity w.r.t tolerance
232 // Almost 200 lines. This whole thing deserves a rewrite!
233 qpms_scatsys_at_omega_t
*qpms_scatsys_apply_symmetry(const qpms_scatsys_t
*orig
,
234 const qpms_finite_group_t
*sym
, complex double omega
,
235 const qpms_tolerance_spec_t
*tol
) {
237 QPMS_WARN("No symmetry group pointer provided, assuming trivial symmetry"
238 " (this is currently the only option for periodic systems).");
239 sym
= &QPMS_FINITE_GROUP_TRIVIAL
;
242 // TODO check data sanity
244 qpms_l_t lMax
= 0; // the overall lMax of all base specs.
245 qpms_normalisation_t normalisation
= QPMS_NORMALISATION_UNDEF
;
247 // First, determine the rough radius of the array; it should be nonzero
248 // in order to particle position equivalence work correctly
251 double minx
= +INFINITY
, miny
= +INFINITY
, minz
= +INFINITY
;
252 double maxx
= -INFINITY
, maxy
= -INFINITY
, maxz
= -INFINITY
;
253 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
) {
254 minx
= MIN(minx
,orig
->p
[i
].pos
.x
);
255 miny
= MIN(miny
,orig
->p
[i
].pos
.y
);
256 minz
= MIN(minz
,orig
->p
[i
].pos
.z
);
257 maxx
= MAX(maxx
,orig
->p
[i
].pos
.x
);
258 maxy
= MAX(maxy
,orig
->p
[i
].pos
.y
);
259 maxz
= MAX(maxz
,orig
->p
[i
].pos
.z
);
261 lenscale
= (fabs(maxx
)+fabs(maxy
)+fabs(maxz
)+(maxx
-minx
)+(maxy
-miny
)+(maxz
-minz
)) / 3;
264 // Second, check that there are no duplicit positions in the input system.
265 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
)
266 for (qpms_ss_pi_t j
= 0; j
< i
; ++j
)
267 assert(!cart3_isclose(orig
->p
[i
].pos
, orig
->p
[j
].pos
, 0, QPMS_SCATSYS_LEN_RTOL
* lenscale
));
270 // Allocate T-matrix, particle and particle orbit info arrays
272 QPMS_CRASHING_MALLOC(ss
, sizeof(*ss
));
273 ss
->lattice_dimension
= orig
->lattice_dimension
;
274 // TODO basic periodic lattices related stuff here.
275 if (ss
->lattice_dimension
) {
277 process_lattice_bases(ss
, tol
);
279 for(int i
= 0; i
< ss
->lattice_dimension
; ++i
) // extend lenscale by basis vectors
280 lenscale
= MAX(lenscale
, cart3norm(ss
->per
.lattice_basis
[i
]));
281 ss
->lenscale
= lenscale
;
283 ss
->medium
= orig
->medium
;
285 // Copy the qpms_tmatrix_fuction_t from orig
286 ss
->tmg_count
= orig
->tmg_count
;
287 QPMS_CRASHING_MALLOC(ss
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
288 memcpy(ss
->tmg
, orig
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
290 ss
->tm_capacity
= sym
->order
* orig
->tm_count
;
291 QPMS_CRASHING_MALLOC(ss
->tm
, ss
->tm_capacity
* sizeof(*(ss
->tm
)));
293 ss
->p_capacity
= sym
->order
* orig
->p_count
;
294 QPMS_CRASHING_MALLOC(ss
->p
, ss
->p_capacity
* sizeof(qpms_particle_tid_t
));
295 QPMS_CRASHING_MALLOC(ss
->p_orbitinfo
, ss
->p_capacity
* sizeof(qpms_ss_particle_orbitinfo_t
));
296 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_capacity
; ++pi
) {
297 ss
->p_orbitinfo
[pi
].t
= QPMS_SS_P_ORBITINFO_UNDEF
;
298 ss
->p_orbitinfo
[pi
].p
= QPMS_SS_P_ORBITINFO_UNDEF
;
301 // Evaluate the original T-matrices at omega
302 qpms_tmatrix_t
**tm_orig_omega
;
303 QPMS_CRASHING_MALLOC(tm_orig_omega
, orig
->tmg_count
* sizeof(*tm_orig_omega
));
304 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
305 tm_orig_omega
[i
] = qpms_tmatrix_init_from_function(orig
->tmg
[i
], omega
);
307 // Evaluate the medium and derived T-matrices at omega.
308 qpms_scatsys_at_omega_t
*ssw
;
309 QPMS_CRASHING_MALLOC(ssw
, sizeof(*ssw
)); // returned
312 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
313 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
314 // we will be using ss->tm_capacity also for ssw->tm
315 QPMS_CRASHING_MALLOC(ssw
->tm
, ss
->tm_capacity
* sizeof(*(ssw
->tm
))); // returned
317 // Evaluate T-matrices at omega; checking for duplicities
319 ss
->max_bspecn
= 0; // We'll need it later.for memory alloc estimates.
321 qpms_ss_tmi_t tm_dupl_remap
[ss
->tm_capacity
]; // Auxilliary array to label remapping the indices after ignoring t-matrix duplicities; VLA!
323 for (qpms_ss_tmi_t i
= 0; i
< orig
->tm_count
; ++i
) {
324 qpms_tmatrix_t
*ti
= qpms_tmatrix_apply_operation(&(orig
->tm
[i
].op
), tm_orig_omega
[orig
->tm
[i
].tmgi
]);
326 for (j
= 0; j
< ss
->tm_count
; ++j
)
327 if (qpms_tmatrix_isclose(ti
, ssw
->tm
[j
], tol
->rtol
, tol
->atol
)) {
330 if (j
== ss
->tm_count
) { // duplicity not found, save both the "abstract" and "at omega" T-matrices
331 qpms_tmatrix_operation_copy(&ss
->tm
[j
].op
, &orig
->tm
[i
].op
);
332 ss
->tm
[j
].tmgi
= orig
->tm
[i
].tmgi
; // T-matrix functions are preserved.
334 ss
->max_bspecn
= MAX(ssw
->tm
[j
]->spec
->n
, ss
->max_bspecn
);
335 lMax
= MAX(lMax
, ssw
->tm
[j
]->spec
->lMax
);
338 else qpms_tmatrix_free(ti
);
339 tm_dupl_remap
[i
] = j
;
340 if (normalisation
== QPMS_NORMALISATION_UNDEF
)
341 normalisation
= ssw
->tm
[i
]->spec
->norm
;
342 // We expect all bspec norms to be the same.
343 else QPMS_ENSURE(normalisation
== ssw
->tm
[j
]->spec
->norm
,
344 "Normalisation convention must be the same for all T-matrices."
345 " %d != %d\n", normalisation
, ssw
->tm
[j
]->spec
->norm
);
348 // Free the original T-matrices at omega
349 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
350 qpms_tmatrix_free(tm_orig_omega
[i
]);
353 // Copy particles, remapping the t-matrix indices
354 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++(i
)) {
355 ss
->p
[i
] = orig
->p
[i
];
356 ss
->p
[i
].tmatrix_id
= tm_dupl_remap
[ss
->p
[i
].tmatrix_id
];
358 ss
->p_count
= orig
->p_count
;
360 // allocate t-matrix symmetry map
361 QPMS_CRASHING_MALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* sym
->order
* ss
->tm_count
);
363 // Extend the T-matrices list by the symmetry operations
364 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
365 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
){
366 const size_t d
= ssw
->tm
[tmi
]->spec
->n
;
368 QPMS_CRASHING_MALLOC(m
, d
*d
*sizeof(complex double)); // ownership passed to ss->tm[ss->tm_count].op
369 qpms_irot3_uvswfi_dense(m
, ssw
->tm
[tmi
]->spec
, sym
->rep3d
[gmi
]);
370 qpms_tmatrix_t
*transformed
= qpms_tmatrix_apply_symop(ssw
->tm
[tmi
], m
);
372 for (tmj
= 0; tmj
< ss
->tm_count
; ++tmj
)
373 if (qpms_tmatrix_isclose(transformed
, ssw
->tm
[tmj
], tol
->rtol
, tol
->atol
))
375 if (tmj
< ss
->tm_count
) { // HIT, transformed T-matrix already exists
376 //TODO some "rounding error cleanup" symmetrisation could be performed here?
377 qpms_tmatrix_free(transformed
);
379 } else { // MISS, save the matrix (also the "abstract" one)
380 ssw
->tm
[ss
->tm_count
] = transformed
;
381 ss
->tm
[ss
->tm_count
].tmgi
= ss
->tm
[tmi
].tmgi
;
382 qpms_tmatrix_operation_compose_chain_init(&(ss
->tm
[ss
->tm_count
].op
), 2, 1); // FIXME MEMLEAK
383 struct qpms_tmatrix_operation_compose_chain
* const o
= &(ss
->tm
[ss
->tm_count
].op
.op
.compose_chain
);
384 o
->ops
[0] = & ss
->tm
[tmi
].op
; // Let's just borrow this
385 o
->ops_owned
[0] = false;
386 o
->opmem
[0].typ
= QPMS_TMATRIX_OPERATION_LRMATRIX
;
387 o
->opmem
[0].op
.lrmatrix
.m
= m
;
388 o
->opmem
[0].op
.lrmatrix
.owns_m
= true;
389 o
->opmem
[0].op
.lrmatrix
.m_size
= d
* d
;
390 o
->ops
[1] = o
->opmem
;
391 o
->ops_owned
[1] = true;
394 ss
->tm_sym_map
[gmi
+ tmi
* sym
->order
] = tmj
; // In any case, tmj now indexes the correct transformed matrix
396 // Possibly free some space using the new ss->tm_count instead of (old) ss->tm_count*sym->order
397 QPMS_CRASHING_REALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* ss
->tm_count
);
398 // tm could be realloc'd as well, but those are just pointers, not likely many.
401 // allocate particle symmetry map
402 QPMS_CRASHING_MALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* sym
->order
* ss
->p_count
);
403 // allocate orbit type array (TODO realloc later if too long)
404 ss
->orbit_type_count
= 0;
405 QPMS_CRASHING_CALLOC(ss
->orbit_types
, ss
->p_count
, sizeof(qpms_ss_orbit_type_t
));
407 QPMS_CRASHING_MALLOC(ss
->otspace
, // reallocate later
408 (sizeof(qpms_ss_orbit_pi_t
) * sym
->order
* sym
->order
409 + sizeof(qpms_ss_tmi_t
) * sym
->order
410 + 3*sizeof(size_t) * sym
->nirreps
411 + sizeof(complex double) * SQ(sym
->order
* ss
->max_bspecn
)) * ss
->p_count
413 ss
->otspace_end
= ss
->otspace
;
415 // Workspace for the orbit type determination
416 qpms_ss_orbit_type_t ot_current
;
417 qpms_ss_orbit_pi_t ot_current_action
[sym
->order
* sym
->order
];
418 qpms_ss_tmi_t ot_current_tmatrices
[sym
->order
];
420 qpms_ss_pi_t current_orbit
[sym
->order
];
421 ot_current
.action
= ot_current_action
;
422 ot_current
.tmatrices
= ot_current_tmatrices
;
425 // Extend the particle list by the symmetry operations, check that particles mapped by symmetry ops on themselves
426 // have the correct symmetry
427 // TODO this could be sped up to O(npart * log(npart)); let's see later whether needed.
428 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
429 const bool new_orbit
= (ss
->p_orbitinfo
[pi
].t
== QPMS_SS_P_ORBITINFO_UNDEF
); // TODO build the orbit!!!
431 current_orbit
[0] = pi
;
433 ot_current
.tmatrices
[0] = ss
->p
[pi
].tmatrix_id
;
434 #ifdef DUMP_PARTICLE_POSITIONS
435 cart3_t pos
= ss
->p
[pi
].pos
;
436 fprintf(stderr
, "An orbit [%.4g, %.4g, %.4g] => ", pos
.x
, pos
.y
, pos
.z
);
440 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
) {
441 cart3_t newpoint
= qpms_irot3_apply_cart3(sym
->rep3d
[gmi
], ss
->p
[pi
].pos
);
442 qpms_ss_tmi_t new_tmi
= ss
->tm_sym_map
[gmi
+ ss
->p
[pi
].tmatrix_id
* sym
->order
]; // transformed t-matrix index
444 for (pj
= 0; pj
< ss
->p_count
; ++pj
)
445 if (cart3_isclose(newpoint
, ss
->p
[pj
].pos
, 0, ss
->lenscale
* QPMS_SCATSYS_LEN_RTOL
)) {
446 if (new_tmi
!= ss
->p
[pj
].tmatrix_id
)
447 qpms_pr_error("The %d. particle with coords (%lg, %lg, %lg) "
448 "is mapped to %d. another (or itself) with cords (%lg, %lg, %lg) "
449 "without having the required symmetry", (int)pi
,
450 ss
->p
[pi
].pos
.x
, ss
->p
[pi
].pos
.y
, ss
->p
[pi
].pos
.z
,
451 (int)pj
, ss
->p
[pj
].pos
.x
, ss
->p
[pj
].pos
.y
, ss
->p
[pj
].pos
.z
);
454 if (pj
< ss
->p_count
) { // HIT, the particle is transformed to an "existing" one.
456 } else { // MISS, the symmetry transforms the particle to a new location, so add it.
457 qpms_particle_tid_t newparticle
= {newpoint
, new_tmi
};
458 ss
->p
[ss
->p_count
] = newparticle
;
460 #ifdef DUMP_PARTICLE_POSITIONS
462 fprintf(stderr
, "[%.4g, %.4g, %.4g] ", newpoint
.x
, newpoint
.y
, newpoint
.z
);
465 ss
->p_sym_map
[gmi
+ pi
* sym
->order
] = pj
;
468 // Now check whether the particle (result of the symmetry op) is already in the current orbit
469 qpms_ss_orbit_pi_t opj
;
470 for (opj
= 0; opj
< ot_current
.size
; ++opj
)
471 if (current_orbit
[opj
] == pj
) break; // HIT, pj already on current orbit
472 if (opj
== ot_current
.size
) { // MISS, pj is new on the orbit, extend the size and set the T-matrix id
473 current_orbit
[opj
] = pj
;
475 ot_current
.tmatrices
[opj
] = ss
->p
[pj
].tmatrix_id
;
477 ot_current
.action
[gmi
] = opj
;
480 if (new_orbit
) { // Now compare if the new orbit corresponds to some of the existing types.
481 #ifdef DUMP_PARTICLE_POSITIONS
485 for(oti
= 0; oti
< ss
->orbit_type_count
; ++oti
)
486 if (orbit_types_equal(&ot_current
, &(ss
->orbit_types
[oti
]))) break; // HIT, orbit type already exists
487 assert(0 == sym
->order
% ot_current
.size
);
488 if (oti
== ss
->orbit_type_count
) // MISS, add the new orbit type
489 add_orbit_type(ss
, &ot_current
);
491 // Walk through the orbit again and set up the orbit info of the particles
492 for (qpms_ss_orbit_pi_t opi
= 0; opi
< ot_current
.size
; ++opi
) {
493 const qpms_ss_pi_t pi_opi
= current_orbit
[opi
];
494 ss
->p_orbitinfo
[pi_opi
].t
= oti
;
495 ss
->p_orbitinfo
[pi_opi
].p
= opi
;
496 ss
->p_orbitinfo
[pi_opi
].osn
= ss
->orbit_types
[oti
].instance_count
;
498 ss
->orbit_types
[oti
].instance_count
++;
501 // Possibly free some space using the new ss->p_count instead of (old) ss->p_count*sym->order
502 QPMS_CRASHING_REALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* ss
->p_count
);
503 QPMS_CRASHING_REALLOC(ss
->p
, sizeof(qpms_particle_tid_t
) * ss
->p_count
);
504 QPMS_CRASHING_REALLOC(ss
->p_orbitinfo
, sizeof(qpms_ss_particle_orbitinfo_t
)*ss
->p_count
);
505 ss
->p_capacity
= ss
->p_count
;
507 { // Reallocate the orbit type data space and update the pointers if needed.
508 size_t otspace_sz
= ss
->otspace_end
- ss
->otspace
;
509 char *old_otspace
= ss
->otspace
;
510 QPMS_CRASHING_REALLOC(ss
->otspace
, otspace_sz
);
511 ptrdiff_t shift
= ss
->otspace
- old_otspace
;
513 for (size_t oi
= 0; oi
< ss
->orbit_type_count
; ++oi
) {
514 ss
->orbit_types
[oi
].action
= (void *)(((char *) (ss
->orbit_types
[oi
].action
)) + shift
);
515 ss
->orbit_types
[oi
].tmatrices
= (void *)(((char *) (ss
->orbit_types
[oi
].tmatrices
)) + shift
);
516 ss
->orbit_types
[oi
].irbase_sizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_sizes
)) + shift
);
517 ss
->orbit_types
[oi
].irbase_cumsizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_cumsizes
)) + shift
);
518 ss
->orbit_types
[oi
].irbase_offsets
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_offsets
)) + shift
);
519 ss
->orbit_types
[oi
].irbases
= (void *)(((char *) (ss
->orbit_types
[oi
].irbases
)) + shift
);
521 ss
->otspace_end
+= shift
;
525 // Set ss->fecv_size and ss->fecv_pstarts
527 QPMS_CRASHING_MALLOC(ss
->fecv_pstarts
, ss
->p_count
* sizeof(size_t));
528 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
529 ss
->fecv_pstarts
[pi
] = ss
->fecv_size
;
530 ss
->fecv_size
+= ssw
->tm
[ss
->p
[pi
].tmatrix_id
]->spec
->n
; // That's a lot of dereferencing!
533 QPMS_CRASHING_MALLOC(ss
->saecv_sizes
, sizeof(size_t) * sym
->nirreps
);
534 QPMS_CRASHING_MALLOC(ss
->saecv_ot_offsets
, sizeof(size_t) * sym
->nirreps
* ss
->orbit_type_count
);
535 for(qpms_iri_t iri
= 0; iri
< sym
->nirreps
; ++iri
) {
536 ss
->saecv_sizes
[iri
] = 0;
537 for(qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
538 ss
->saecv_ot_offsets
[iri
* ss
->orbit_type_count
+ oti
] = ss
->saecv_sizes
[iri
];
539 const qpms_ss_orbit_type_t
*ot
= &(ss
->orbit_types
[oti
]);
540 ss
->saecv_sizes
[iri
] += ot
->instance_count
* ot
->irbase_sizes
[iri
];
544 qpms_ss_pi_t p_ot_cumsum
= 0;
545 for (qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
546 qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
547 ot
->p_offset
= p_ot_cumsum
;
548 p_ot_cumsum
+= ot
->size
* ot
->instance_count
;
551 // Set ss->p_by_orbit[]
552 QPMS_CRASHING_MALLOC(ss
->p_by_orbit
, sizeof(qpms_ss_pi_t
) * ss
->p_count
);
553 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
554 const qpms_ss_particle_orbitinfo_t
*oi
= ss
->p_orbitinfo
+ pi
;
555 const qpms_ss_oti_t oti
= oi
->t
;
556 const qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
557 ss
->p_by_orbit
[ot
->p_offset
+ ot
->size
* oi
->osn
+ oi
->p
] = pi
;
560 ss
->c
= qpms_trans_calculator_init(lMax
, normalisation
);
566 void qpms_scatsys_free(qpms_scatsys_t
*ss
) {
569 for(qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
570 qpms_tmatrix_operation_clear(&ss
->tm
[tmi
].op
);
574 free(ss
->fecv_pstarts
);
575 free(ss
->tm_sym_map
);
578 free(ss
->p_orbitinfo
);
579 free(ss
->orbit_types
);
580 free(ss
->saecv_ot_offsets
);
581 free(ss
->saecv_sizes
);
582 free(ss
->p_by_orbit
);
583 qpms_trans_calculator_free(ss
->c
);
588 void qpms_scatsys_at_omega_refill(qpms_scatsys_at_omega_t
*ssw
,
589 complex double omega
) {
590 const qpms_scatsys_t
* const ss
= ssw
->ss
;
592 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
593 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
594 qpms_tmatrix_t
**tmatrices_preop
;
595 QPMS_CRASHING_CALLOC(tmatrices_preop
, ss
->tmg_count
, sizeof(*tmatrices_preop
));
596 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
597 tmatrices_preop
[tmgi
] = qpms_tmatrix_init_from_function(ss
->tmg
[tmgi
], omega
);
598 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
599 qpms_tmatrix_apply_operation_replace(ssw
->tm
[tmi
], &ss
->tm
[tmi
].op
,
600 tmatrices_preop
[ss
->tm
[tmi
].tmgi
]);
601 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
602 qpms_tmatrix_free(tmatrices_preop
[tmgi
]);
603 free(tmatrices_preop
);
606 qpms_scatsys_at_omega_t
*qpms_scatsys_at_omega(const qpms_scatsys_t
*ss
,
607 complex double omega
) {
609 qpms_scatsys_at_omega_t
*ssw
;
610 QPMS_CRASHING_MALLOC(ssw
, sizeof(*ssw
));
613 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
614 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
615 QPMS_CRASHING_CALLOC(ssw
->tm
, ss
->tm_count
, sizeof(*ssw
->tm
));
616 qpms_tmatrix_t
**tmatrices_preop
;
617 QPMS_CRASHING_CALLOC(tmatrices_preop
, ss
->tmg_count
, sizeof(*tmatrices_preop
));
618 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
619 tmatrices_preop
[tmgi
] = qpms_tmatrix_init_from_function(ss
->tmg
[tmgi
], omega
);
620 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
) {
621 ssw
->tm
[tmi
] = qpms_tmatrix_apply_operation(&ss
->tm
[tmi
].op
,
622 tmatrices_preop
[ss
->tm
[tmi
].tmgi
]); //<- main difference to .._refill()
623 QPMS_ENSURE(ssw
->tm
[tmi
],
624 "Got NULL pointer from qpms_tmatrix_apply_operation");
626 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
627 qpms_tmatrix_free(tmatrices_preop
[tmgi
]);
628 free(tmatrices_preop
);
632 void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t
*ssw
) {
635 for(qpms_ss_tmi_t i
= 0; i
< ssw
->ss
->tm_count
; ++i
)
636 qpms_tmatrix_free(ssw
->tm
[i
]);
642 // (copypasta from symmetries.c)
643 // TODO at some point, maybe support also other norms.
644 // (perhaps just use qpms_normalisation_t_factor() at the right places)
645 static inline void check_norm_compat(const qpms_vswf_set_spec_t
*s
)
647 switch (s
->norm
& QPMS_NORMALISATION_NORM_BITS
) {
648 case QPMS_NORMALISATION_NORM_POWER
:
650 case QPMS_NORMALISATION_NORM_SPHARM
:
653 QPMS_WTF
; // Only SPHARM and POWER norms are supported right now.
657 complex double *qpms_orbit_action_matrix(complex double *target
,
658 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
659 const qpms_finite_group_t
*sym
, const qpms_gmi_t g
) {
660 assert(sym
); assert(g
< sym
->order
);
662 assert(ot
); assert(ot
->size
> 0);
663 // check_norm_compat(bspec); not needed here, the qpms_irot3_uvswfi_dense should complain if necessary
664 const size_t n
= bspec
->n
;
665 const qpms_gmi_t N
= ot
->size
;
667 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
668 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
669 complex double tmp
[n
][n
]; // this is the 'per-particle' action
670 qpms_irot3_uvswfi_dense(tmp
[0], bspec
, sym
->rep3d
[g
]);
671 for(qpms_ss_orbit_pi_t Col
= 0; Col
< ot
->size
; ++Col
) {
672 // Row is the 'destination' of the symmetry operation, Col is the 'source'
673 const qpms_ss_orbit_pi_t Row
= ot
->action
[sym
->order
* Col
+ g
];
674 for(size_t row
= 0; row
< bspec
->n
; ++row
)
675 for(size_t col
= 0; col
< bspec
->n
; ++col
)
676 target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
] = conj(tmp
[row
][col
]); //CHECKCONJ
678 #ifdef DUMP_ACTIONMATRIX
679 fprintf(stderr
,"%d: %s\n",
680 (int) g
, (sym
->permrep
&& sym
->permrep
[g
])?
681 sym
->permrep
[g
] : "");
682 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
683 fprintf(stderr
, "--------------------------\n");
684 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
685 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
686 fprintf(stderr
, "| ");
687 for (size_t col
= 0; col
< bspec
->n
; ++col
)
688 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
690 fprintf(stderr
, "|\n");
693 fprintf(stderr
, "-------------------------------\n\n");
699 complex double *qpms_orbit_irrep_projector_matrix(complex double *target
,
700 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
701 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
704 assert(ot
); assert(ot
->size
> 0);
705 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
706 // check_norm_compat(bspec); // probably not needed here, let the called functions complain if necessary, but CHEKME
707 const size_t n
= bspec
->n
;
708 const qpms_gmi_t N
= ot
->size
;
710 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
711 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
712 // Workspace for the orbit group action matrices
713 complex double *tmp
= malloc(n
*n
*N
*N
*sizeof(complex double));
714 const int d
= sym
->irreps
[iri
].dim
;
715 double prefac
= d
/ (double) sym
->order
;
716 for(int partner
= 0; partner
< d
; ++partner
) {
717 for(qpms_gmi_t g
= 0; g
< sym
->order
; ++g
) {
718 // We use the diagonal elements of D_g
719 complex double D_g_conj
= sym
->irreps
[iri
].m
[g
*d
*d
+ partner
*d
+ partner
];
720 #ifdef DUMP_ACTIONMATRIX
721 fprintf(stderr
,"(factor %+g%+gj) ", creal(D_g_conj
), cimag(D_g_conj
));
723 qpms_orbit_action_matrix(tmp
, ot
, bspec
, sym
, g
);
725 for(size_t i
= 0; i
< n
*n
*N
*N
; ++i
)
726 target
[i
] += prefac
* D_g_conj
* tmp
[i
];
730 #ifdef DUMP_PROJECTORMATRIX
731 fprintf(stderr
,"Projector %d (%s):\n", (int) iri
,
732 sym
->irreps
[iri
].name
?sym
->irreps
[iri
].name
:"");
733 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
734 fprintf(stderr
, "--------------------------\n");
735 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
736 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
737 fprintf(stderr
, "| ");
738 for (size_t col
= 0; col
< bspec
->n
; ++col
)
739 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
741 fprintf(stderr
, "|\n");
744 fprintf(stderr
, "-------------------------------\n\n");
749 #define SVD_ATOL 1e-8
751 complex double *qpms_orbit_irrep_basis(size_t *basis_size
,
752 complex double *target
,
753 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
754 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
757 assert(ot
); assert(ot
->size
> 0);
758 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
759 check_norm_compat(bspec
); // Here I'm not sure; CHECKME
760 const size_t n
= bspec
->n
;
761 const qpms_gmi_t N
= ot
->size
;
762 const bool newtarget
= (target
== NULL
);
764 QPMS_CRASHING_MALLOC(target
,n
*n
*N
*N
*sizeof(complex double));
765 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
767 // Get the projector (also workspace for right sg. vect.)
768 complex double *projector
= qpms_orbit_irrep_projector_matrix(NULL
,
769 ot
, bspec
, sym
, iri
);
770 QPMS_ENSURE(projector
!= NULL
, "got NULL from qpms_orbit_irrep_projector_matrix()");
771 // Workspace for the right singular vectors.
772 complex double *V_H
; QPMS_CRASHING_MALLOC(V_H
, n
*n
*N
*N
*sizeof(complex double));
773 // THIS SHOULD NOT BE NECESSARY
774 complex double *U
; QPMS_CRASHING_MALLOC(U
, n
*n
*N
*N
*sizeof(complex double));
775 double *s
; QPMS_CRASHING_MALLOC(s
, n
*N
*sizeof(double));
777 QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR
,
778 'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
779 n
*N
/* m */, n
*N
/* n */, projector
/* a */, n
*N
/* lda */,
780 s
/* s */, U
/* u */, n
*N
/* ldu, irrelev. */, V_H
/* vt */,
784 for(bs
= 0; bs
< n
*N
; ++bs
) {
785 QPMS_ENSURE(s
[bs
] <= 1 + SVD_ATOL
, "%zd. SV too large: %.16lf", bs
, s
[bs
]);
786 QPMS_ENSURE(!(s
[bs
] > SVD_ATOL
&& fabs(1-s
[bs
]) > SVD_ATOL
),
787 "%zd. SV in the 'wrong' interval: %.16lf", bs
, s
[bs
]);
788 if(s
[bs
] < SVD_ATOL
) break;
791 memcpy(target
, V_H
, bs
*N
*n
*sizeof(complex double));
792 if(newtarget
) QPMS_CRASHING_REALLOC(target
, bs
*N
*n
*sizeof(complex double));
793 if(basis_size
) *basis_size
= bs
;
802 complex double *qpms_scatsys_irrep_transform_matrix(complex double *U
,
803 const qpms_scatsys_t
*ss
, qpms_iri_t iri
) {
804 const size_t packedlen
= ss
->saecv_sizes
[iri
];
805 const size_t full_len
= ss
->fecv_size
;
807 QPMS_CRASHING_MALLOC(U
,full_len
* packedlen
* sizeof(complex double));
808 memset(U
, 0, full_len
* packedlen
* sizeof(complex double));
810 size_t fullvec_offset
= 0;
812 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
813 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
814 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
815 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
816 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
817 // This is where the particle's orbit starts in the "packed" vector:
818 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
819 + osn
* ot
->irbase_sizes
[iri
];
820 // Orbit coeff vector's full size:
821 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
822 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
823 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
824 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
826 for (size_t prow
= 0; prow
< orbit_packedsize
; ++prow
)
827 for (size_t pcol
= 0; pcol
< ot
->bspecn
; ++pcol
)
828 U
[full_len
* (packed_orbit_offset
+ prow
) + (fullvec_offset
+ pcol
)]
829 = om
[orbit_fullsize
* prow
+ (opi
* ot
->bspecn
+ pcol
)];
830 fullvec_offset
+= ot
->bspecn
;
836 complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed
,
837 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
839 const size_t packedlen
= ss
->saecv_sizes
[iri
];
840 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
841 return target_packed
;
842 const size_t full_len
= ss
->fecv_size
;
843 if (target_packed
== NULL
)
844 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
845 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
847 // Workspace for the intermediate matrix
849 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
851 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
853 const complex double one
= 1, zero
= 0;
857 CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
858 full_len
/*M*/, packedlen
/*N*/, full_len
/*K*/,
859 &one
/*alpha*/, orig_full
/*A*/, full_len
/*ldA*/,
860 U
/*B*/, full_len
/*ldB*/,
861 &zero
/*beta*/, tmp
/*C*/, packedlen
/*LDC*/);
863 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
864 packedlen
/*M*/, packedlen
/*N*/, full_len
/*K*/,
865 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
866 tmp
/*B*/, packedlen
/*ldB*/, &zero
/*beta*/,
867 target_packed
/*C*/, packedlen
/*ldC*/);
871 return target_packed
;
874 /// Transforms a big "packed" matrix into the full basis (trivial implementation).
875 complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full
,
876 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
877 qpms_iri_t iri
, bool add
){
878 const size_t packedlen
= ss
->saecv_sizes
[iri
];
879 const size_t full_len
= ss
->fecv_size
;
880 if (target_full
== NULL
)
881 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
882 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
884 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
886 // Workspace for the intermediate matrix result
888 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
890 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
892 const complex double one
= 1, zero
= 0;
896 CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
897 packedlen
/*M*/, full_len
/*N*/, packedlen
/*K*/,
898 &one
/*alpha*/, orig_packed
/*A*/, packedlen
/*ldA*/,
899 U
/*B*/, full_len
/*ldB*/,
900 &zero
/*beta*/, tmp
/*C*/, full_len
/*LDC*/);
902 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
903 full_len
/*M*/, full_len
/*N*/, packedlen
/*K*/,
904 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
905 tmp
/*B*/, full_len
/*ldB*/, &one
/*beta*/,
906 target_full
/*C*/, full_len
/*ldC*/);
912 complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed
,
913 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
915 const size_t packedlen
= ss
->saecv_sizes
[iri
];
916 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
917 return target_packed
;
918 const size_t full_len
= ss
->fecv_size
;
919 if (target_packed
== NULL
)
920 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
921 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
923 // Workspace for the intermediate particle-orbit matrix result
925 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
927 const complex double one
= 1, zero
= 0;
929 size_t fullvec_offsetR
= 0;
930 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
931 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
932 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
933 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
934 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
935 // This is where the particle's orbit starts in the "packed" vector:
936 const size_t packed_orbit_offsetR
=
937 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
938 + osnR
* otR
->irbase_sizes
[iri
];
939 // Orbit coeff vector's full size:
940 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
941 const size_t particle_fullsizeR
= otR
->bspecn
;
942 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
943 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
944 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
946 size_t fullvec_offsetC
= 0;
947 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
948 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
949 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
950 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
951 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
952 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
953 // This is where the particle's orbit starts in the "packed" vector:
954 const size_t packed_orbit_offsetC
=
955 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
956 + osnC
* otC
->irbase_sizes
[iri
];
957 // Orbit coeff vector's full size:
958 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
959 const size_t particle_fullsizeC
= otC
->bspecn
;
960 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
961 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
962 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
964 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
965 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
966 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
967 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
968 &one
/*alpha*/, orig_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*A*/,
970 omC
+ opiC
*particle_fullsizeC
/*B*/,
971 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
972 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
974 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
975 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
976 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
977 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
978 orbit_fullsizeR
/*ldA*/,
979 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
980 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
983 fullvec_offsetC
+= otC
->bspecn
;
986 fullvec_offsetR
+= otR
->bspecn
;
989 return target_packed
;
993 /// Transforms a big "packed" matrix into the full basis.
995 complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full
,
996 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
997 qpms_iri_t iri
, bool add
){
998 const size_t packedlen
= ss
->saecv_sizes
[iri
];
999 const size_t full_len
= ss
->fecv_size
;
1000 if (target_full
== NULL
)
1001 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
1002 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
1004 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
1006 // Workspace for the intermediate particle-orbit matrix result
1007 complex double *tmp
;
1008 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1010 const complex double one
= 1, zero
= 0;
1012 size_t fullvec_offsetR
= 0;
1013 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
1014 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
1015 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1016 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
1017 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
1018 // This is where the particle's orbit starts in the "packed" vector:
1019 const size_t packed_orbit_offsetR
=
1020 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1021 + osnR
* otR
->irbase_sizes
[iri
];
1022 // Orbit coeff vector's full size:
1023 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1024 const size_t particle_fullsizeR
= otR
->bspecn
;
1025 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1026 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1027 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1029 size_t fullvec_offsetC
= 0;
1030 if (orbit_packedsizeR
) // avoid crash on empty irrep
1031 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1032 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1033 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1034 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1035 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1036 // This is where the particle's orbit starts in the "packed" vector:
1037 const size_t packed_orbit_offsetC
=
1038 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1039 + osnC
* otC
->irbase_sizes
[iri
];
1040 // Orbit coeff vector's full size:
1041 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1042 const size_t particle_fullsizeC
= otC
->bspecn
;
1043 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1044 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1045 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1046 if (orbit_packedsizeC
) { // avoid crash on empty irrep
1048 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC]
1049 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1050 orbit_packedsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeC
/*K*/,
1051 &one
/*alpha*/, orig_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*A*/,
1053 omC
+ opiC
*particle_fullsizeC
/*B*/,
1054 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1055 tmp
/*C*/, particle_fullsizeC
/*LDC*/);
1057 // target[oiR|piR,oiC|piC] += U*[...] tmp[...]
1058 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
1059 particle_fullsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeR
/*K*/,
1060 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
1061 orbit_fullsizeR
/*ldA*/,
1062 tmp
/*B*/, particle_fullsizeC
/*ldB*/, &one
/*beta*/,
1063 target_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*C*/,
1066 fullvec_offsetC
+= otC
->bspecn
;
1068 fullvec_offsetR
+= otR
->bspecn
;
1075 /// Projects a "big" vector onto an irrep.
1077 complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed
,
1078 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
1079 const qpms_iri_t iri
) {
1080 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1081 if (!packedlen
) return target_packed
; // Empty irrep
1082 if (target_packed
== NULL
)
1083 QPMS_CRASHING_MALLOC(target_packed
, packedlen
*sizeof(complex double));
1084 memset(target_packed
, 0, packedlen
*sizeof(complex double));
1086 const complex double one
= 1;
1088 size_t fullvec_offset
= 0;
1089 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1090 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
1091 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
1092 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
1093 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
1094 // This is where the particle's orbit starts in the "packed" vector:
1095 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
1096 + osn
* ot
->irbase_sizes
[iri
];
1097 // Orbit coeff vector's full size:
1098 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
1099 const size_t particle_fullsize
= ot
->bspecn
;
1100 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
1101 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1102 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
1103 if (orbit_packedsize
) // avoid crash on empty irrep
1104 cblas_zgemv(CblasRowMajor
/*order*/, CblasNoTrans
/*transA*/,
1105 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/,
1106 om
+ opi
*particle_fullsize
/*A*/, orbit_fullsize
/*lda*/,
1107 orig_full
+fullvec_offset
/*X*/, 1/*incX*/,
1108 &one
/*beta*/, target_packed
+packed_orbit_offset
/*Y*/, 1/*incY*/);
1109 fullvec_offset
+= ot
->bspecn
;
1111 return target_packed
;
1114 /// Transforms a big "packed" vector into the full basis.
1116 complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full
,
1117 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
1118 const qpms_iri_t iri
, bool add
) {
1119 const size_t full_len
= ss
->fecv_size
;
1120 if (target_full
== NULL
)
1121 QPMS_CRASHING_MALLOC(target_full
, full_len
*sizeof(complex double));
1122 if (!add
) memset(target_full
, 0, full_len
*sizeof(complex double));
1124 const complex double one
= 1;
1125 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1126 if (!packedlen
) return target_full
; // Completely empty irrep
1128 size_t fullvec_offset
= 0;
1129 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1130 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
1131 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
1132 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
1133 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
1134 // This is where the particle's orbit starts in the "packed" vector:
1135 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
1136 + osn
* ot
->irbase_sizes
[iri
];
1137 // Orbit coeff vector's full size:
1138 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
1139 const size_t particle_fullsize
= ot
->bspecn
;
1140 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
1141 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1142 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
1144 if (orbit_packedsize
) // empty irrep, avoid zgemv crashing.
1145 cblas_zgemv(CblasRowMajor
/*order*/, CblasConjTrans
/*transA*/,
1146 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/, om
+ opi
*particle_fullsize
/*A*/,
1147 orbit_fullsize
/*lda*/, orig_packed
+packed_orbit_offset
/*X*/, 1/*incX*/, &one
/*beta*/,
1148 target_full
+fullvec_offset
/*Y*/, 1/*incY*/);
1150 fullvec_offset
+= ot
->bspecn
;
1155 complex double *qpms_scatsys_build_translation_matrix_full(
1156 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1157 complex double *target
,
1158 const qpms_scatsys_t
*ss
,
1159 complex double k
///< Wave number to use in the translation matrix.
1162 return qpms_scatsys_build_translation_matrix_e_full(
1163 target
, ss
, k
, QPMS_HANKEL_PLUS
);
1166 complex double *qpms_scatsyswk_build_translation_matrix_full(
1167 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1168 complex double *target
,
1169 const qpms_scatsys_at_omega_k_t
*sswk
1172 const qpms_scatsys_at_omega_t
*ssw
= sswk
->ssw
;
1173 const complex double wavenumber
= ssw
->wavenumber
;
1174 const qpms_scatsys_t
*ss
= ssw
->ss
;
1175 qpms_ss_ensure_periodic(ss
);
1176 const cart3_t k_cart3
= cart3_from_double_array(sswk
->k
);
1177 return qpms_scatsys_periodic_build_translation_matrix_full(target
, ss
, wavenumber
, &k_cart3
, sswk
->eta
);
1180 complex double *qpms_scatsys_build_translation_matrix_e_full(
1181 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1182 complex double *target
,
1183 const qpms_scatsys_t
*ss
,
1184 complex double k
, ///< Wave number to use in the translation matrix.
1185 qpms_bessel_t J
///< Bessel function type.
1188 qpms_ss_ensure_nonperiodic(ss
);
1189 const size_t full_len
= ss
->fecv_size
;
1191 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1192 memset(target
, 0, SQ(full_len
) * sizeof(complex double)); //unnecessary?
1193 { // Non-diagonal part; M[piR, piC] = T[piR] S(piR<-piC)
1194 size_t fullvec_offsetR
= 0;
1195 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1196 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, piR
);
1197 const cart3_t posR
= ss
->p
[piR
].pos
;
1198 size_t fullvec_offsetC
= 0;
1199 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1200 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1201 if(piC
!= piR
) { // The diagonal will be dealt with later.
1202 const cart3_t posC
= ss
->p
[piC
].pos
;
1203 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1204 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
,
1205 bspecR
, full_len
, bspecC
, 1,
1208 fullvec_offsetC
+= bspecC
->n
;
1210 assert(fullvec_offsetC
== full_len
);
1211 fullvec_offsetR
+= bspecR
->n
;
1213 assert(fullvec_offsetR
== full_len
);
1219 static inline int qpms_ss_ppair_W32xy(const qpms_scatsys_t
*ss
,
1220 qpms_ss_pi_t pdest
, qpms_ss_pi_t psrc
, complex double wavenumber
, const cart2_t kvector
,
1221 complex double *target
, const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
1222 qpms_ewald_part parts
, double eta
) {
1223 const qpms_vswf_set_spec_t
*srcspec
= qpms_ss_bspec_pi(ss
, psrc
);
1224 const qpms_vswf_set_spec_t
*destspec
= qpms_ss_bspec_pi(ss
, pdest
);
1226 // This might be a bit arbitrary, roughly "copied" from Unitcell constructor. TODO review.
1227 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
1228 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
1230 return qpms_trans_calculator_get_trans_array_e32_e(ss
->c
,
1231 target
, NULL
/*err*/, destspec
, deststride
, srcspec
, srcstride
,
1233 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
1235 cart3_substract(ss
->p
[pdest
].pos
, ss
->p
[psrc
].pos
),
1239 static inline int qpms_ss_ppair_W(const qpms_scatsys_t
*ss
,
1240 qpms_ss_pi_t pdest
, qpms_ss_pi_t psrc
, complex double wavenumber
, const double wavevector
[],
1241 complex double *target
, const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
1242 qpms_ewald_part parts
, double eta
) {
1243 if(ss
->lattice_dimension
== 2 && // Currently, we can only the xy-plane
1244 !ss
->per
.lattice_basis
[0].z
&& !ss
->per
.lattice_basis
[1].z
&&
1246 return qpms_ss_ppair_W32xy(ss
, pdest
, psrc
, wavenumber
, cart2_from_double_array(wavevector
),
1247 target
, deststride
, srcstride
, parts
, eta
);
1249 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1252 complex double *qpms_scatsys_periodic_build_translation_matrix_full(
1253 complex double *target
, const qpms_scatsys_t
*ss
,
1254 complex double wavenumber
, const cart3_t
*wavevector
, double eta
) {
1256 qpms_ss_ensure_periodic(ss
);
1257 if (eta
== 0 || isnan(eta
)) {
1259 cart3_to_double_array(tmp
, *wavevector
);
1260 eta
= qpms_ss_adjusted_eta(ss
, wavenumber
, tmp
);
1263 const size_t full_len
= ss
->fecv_size
;
1265 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1266 const ptrdiff_t deststride
= ss
->fecv_size
, srcstride
= 1;
1267 // We have some limitations in the current implementation
1268 if(ss
->lattice_dimension
== 2 && // Currently, we can only the xy-plane
1269 !ss
->per
.lattice_basis
[0].z
&& !ss
->per
.lattice_basis
[1].z
&&
1271 for (qpms_ss_pi_t pd
= 0; pd
< ss
->p_count
; ++pd
)
1272 for (qpms_ss_pi_t ps
= 0; ps
< ss
->p_count
; ++ps
) {
1273 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W32xy(ss
, pd
, ps
, wavenumber
, cart3xy2cart2(*wavevector
),
1274 target
+ deststride
* ss
->fecv_pstarts
[pd
] + srcstride
* ss
->fecv_pstarts
[ps
],
1275 deststride
, srcstride
, QPMS_EWALD_FULL
, eta
));
1278 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1282 // Common implementation of qpms_scatsysw[k]_build_modeproblem_matrix_full
1283 static inline complex double *qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1284 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1285 complex double *target
,
1286 const qpms_scatsys_at_omega_t
*ssw
,
1287 const double k
[], // NULL if non-periodic
1288 const double eta
// ignored if non-periodic
1291 const complex double wavenumber
= ssw
->wavenumber
;
1292 const qpms_scatsys_t
*ss
= ssw
->ss
;
1293 qpms_ss_ensure_periodic(ss
);
1294 const size_t full_len
= ss
->fecv_size
;
1296 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1297 complex double *tmp
; // translation matrix, S or W
1298 QPMS_CRASHING_MALLOC(tmp
, SQ(ss
->max_bspecn
) * sizeof(complex double));
1299 memset(target
, 0, SQ(full_len
) * sizeof(complex double));
1300 const complex double zero
= 0, minusone
= -1;
1301 { // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
1302 size_t fullvec_offsetR
= 0;
1303 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1304 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1305 const cart3_t posR
= ss
->p
[piR
].pos
;
1306 size_t fullvec_offsetC
= 0;
1307 // dest particle T-matrix
1308 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1309 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1310 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1311 if (k
== NULL
) { // non-periodic case
1312 if(piC
!= piR
) { // No "self-interaction" in non-periodic case
1313 const cart3_t posC
= ss
->p
[piC
].pos
;
1314 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1315 tmp
, // tmp is S(piR<-piC)
1316 bspecR
, bspecC
->n
, bspecC
, 1,
1317 wavenumber
, posR
, posC
, QPMS_HANKEL_PLUS
));
1319 } else { // periodic case
1320 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W(ss
, piR
, piC
, wavenumber
, k
,
1321 tmp
/*target*/, bspecC
->n
/*deststride*/, 1 /*srcstride*/,
1322 QPMS_EWALD_FULL
, eta
));
1325 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1326 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1327 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1328 tmp
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1329 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
/*c*/,
1332 fullvec_offsetC
+= bspecC
->n
;
1334 fullvec_offsetR
+= bspecR
->n
;
1338 // Add the identity, diagonal part M[pi,pi] += 1
1339 for (size_t i
= 0; i
< full_len
; ++i
) target
[full_len
* i
+ i
] += 1;
1345 complex double *qpms_scatsysw_build_modeproblem_matrix_full(
1346 complex double *target
, const qpms_scatsys_at_omega_t
*ssw
) {
1347 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_build_modeproblem_matrix_full()");
1348 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1349 target
, ssw
, NULL
, NAN
);
1352 complex double *qpms_scatsyswk_build_modeproblem_matrix_full(
1353 complex double *target
, const qpms_scatsys_at_omega_k_t
*sswk
)
1355 qpms_ss_ensure_periodic_a(sswk
->ssw
->ss
, "qpms_scatsysw_build_modeproblem_matrix_full()");
1356 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(target
, sswk
->ssw
, sswk
->k
, sswk
->eta
);
1360 // Serial reference implementation.
1361 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_serial(
1362 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1363 complex double *target_packed
,
1364 const qpms_scatsys_at_omega_t
*ssw
,
1368 const qpms_scatsys_t
*ss
= ssw
->ss
;
1369 qpms_ss_ensure_nonperiodic(ss
);
1370 const complex double k
= ssw
->wavenumber
;
1371 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1372 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1373 return target_packed
;
1374 if (target_packed
== NULL
)
1375 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1376 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1378 // some of the following workspaces are probably redundant; TODO optimize later.
1380 // workspaces for the uncompressed particle<-particle tranlation matrix block
1381 // and the result of multiplying with a T-matrix (times -1)
1382 complex double *Sblock
, *TSblock
;
1383 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1384 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1386 // Workspace for the intermediate particle-orbit matrix result
1387 complex double *tmp
;
1388 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1390 const complex double one
= 1, zero
= 0, minusone
= -1;
1392 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
1393 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
1394 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1395 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
1396 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
1397 // This is where the particle's orbit starts in the "packed" vector:
1398 const size_t packed_orbit_offsetR
=
1399 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1400 + osnR
* otR
->irbase_sizes
[iri
];
1401 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1402 // Orbit coeff vector's full size:
1403 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1404 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1405 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1406 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1407 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1408 const cart3_t posR
= ss
->p
[piR
].pos
;
1409 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1410 // dest particle T-matrix
1411 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1412 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1413 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1414 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1415 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1416 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1417 // This is where the particle's orbit starts in the "packed" vector:
1418 const size_t packed_orbit_offsetC
=
1419 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1420 + osnC
* otC
->irbase_sizes
[iri
];
1421 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1422 // Orbit coeff vector's full size:
1423 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1424 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1425 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1426 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1427 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1429 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1430 if(piC
!= piR
) { // non-diagonal, calculate TS
1431 const cart3_t posC
= ss
->p
[piC
].pos
;
1432 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1433 Sblock
, // Sblock is S(piR->piC)
1434 bspecR
, bspecC
->n
, bspecC
, 1,
1435 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1437 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1438 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1439 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1440 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1441 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1442 } else { // diagonal, fill with diagonal +1
1443 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1444 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1445 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1448 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1449 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1450 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1451 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1452 omC
+ opiC
*particle_fullsizeC
/*B*/,
1453 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1454 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1456 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1457 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1458 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1459 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1460 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1461 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1470 return target_packed
;
1473 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
1474 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1475 complex double *target_packed
,
1476 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1479 const qpms_scatsys_t
*ss
= ssw
->ss
;
1480 qpms_ss_ensure_nonperiodic(ss
);
1481 const complex double k
= ssw
->wavenumber
;
1482 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1483 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1484 return target_packed
;
1485 if (target_packed
== NULL
)
1486 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1487 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1489 // some of the following workspaces are probably redundant; TODO optimize later.
1491 // workspaces for the uncompressed particle<-particle tranlation matrix block
1492 // and the result of multiplying with a T-matrix (times -1)
1493 complex double *Sblock
, *TSblock
;
1494 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1495 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1497 // Workspace for the intermediate particle-orbit matrix result
1498 complex double *tmp
;
1499 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1501 const complex double one
= 1, zero
= 0, minusone
= -1;
1503 for(qpms_ss_pi_t opistartR
= 0; opistartR
< ss
->p_count
;
1504 opistartR
+= ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
//orbit_p_countR; might write a while() instead
1506 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1507 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1508 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1509 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1510 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1511 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1513 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1514 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1515 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1516 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1517 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1518 // Orbit coeff vector's full size:
1519 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1520 // This is where the orbit starts in the "packed" vector:
1521 const size_t packed_orbit_offsetR
=
1522 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1523 + osnR
* otR
->irbase_sizes
[iri
];
1524 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1525 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1526 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1527 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1528 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1529 const cart3_t posR
= ss
->p
[piR
].pos
;
1530 // dest particle T-matrix
1531 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1532 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1533 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1534 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1535 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1536 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1537 // This is where the particle's orbit starts in the "packed" vector:
1538 const size_t packed_orbit_offsetC
=
1539 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1540 + osnC
* otC
->irbase_sizes
[iri
];
1541 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1542 // Orbit coeff vector's full size:
1543 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1544 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1545 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1546 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1547 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1549 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1550 if(piC
!= piR
) { // non-diagonal, calculate TS
1551 const cart3_t posC
= ss
->p
[piC
].pos
;
1552 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1553 Sblock
, // Sblock is S(piR->piC)
1554 bspecR
, bspecC
->n
, bspecC
, 1,
1555 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1557 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1558 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1559 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1560 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1561 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1562 } else { // diagonal, fill with diagonal +1
1563 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1564 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1565 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1568 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1569 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1570 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1571 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1572 omC
+ opiC
*particle_fullsizeC
/*B*/,
1573 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1574 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1576 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1577 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1578 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1579 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1580 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1581 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1591 return target_packed
;
1594 struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
{
1595 const qpms_scatsys_at_omega_t
*ssw
;
1596 qpms_ss_pi_t
*opistartR_ptr
;
1597 pthread_mutex_t
*opistartR_mutex
;
1599 complex double *target_packed
;
1602 static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg
)
1604 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1606 const qpms_scatsys_at_omega_t
*ssw
= a
->ssw
;
1607 const complex double k
= ssw
->wavenumber
;
1608 const qpms_scatsys_t
*ss
= ssw
->ss
;
1609 const qpms_iri_t iri
= a
->iri
;
1610 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1612 // some of the following workspaces are probably redundant; TODO optimize later.
1614 // workspaces for the uncompressed particle<-particle tranlation matrix block
1615 // and the result of multiplying with a T-matrix (times -1)
1616 complex double *Sblock
, *TSblock
;
1617 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1618 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1620 // Workspace for the intermediate particle-orbit matrix result
1621 complex double *tmp
;
1622 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1624 const complex double one
= 1, zero
= 0, minusone
= -1;
1627 // In the beginning, pick a target (row) orbit for this thread
1628 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1629 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1630 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1633 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1634 // Now increment it for another thread:
1635 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1636 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1638 // Orbit picked (defined by opistartR), do the work.
1639 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1640 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1641 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1642 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1643 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1644 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1646 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1647 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1648 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1649 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1650 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1651 // Orbit coeff vector's full size:
1652 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1653 // This is where the orbit starts in the "packed" vector:
1654 const size_t packed_orbit_offsetR
=
1655 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1656 + osnR
* otR
->irbase_sizes
[iri
];
1657 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1658 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1659 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1660 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1661 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1662 const cart3_t posR
= ss
->p
[piR
].pos
;
1663 // dest particle T-matrix
1664 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1665 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1666 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1667 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1668 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1669 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1670 // This is where the particle's orbit starts in the "packed" vector:
1671 const size_t packed_orbit_offsetC
=
1672 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1673 + osnC
* otC
->irbase_sizes
[iri
];
1674 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1675 // Orbit coeff vector's full size:
1676 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1677 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1678 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1679 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1680 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1682 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1683 if(piC
!= piR
) { // non-diagonal, calculate TS
1684 const cart3_t posC
= ss
->p
[piC
].pos
;
1685 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1686 Sblock
, // Sblock is S(piR->piC)
1687 bspecR
, bspecC
->n
, bspecC
, 1,
1688 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1690 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1691 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1692 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1693 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1694 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1695 } else { // diagonal, fill with diagonal +1
1696 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1697 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1698 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1701 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1702 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1703 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1704 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1705 omC
+ opiC
*particle_fullsizeC
/*B*/,
1706 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1707 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1709 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1710 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1711 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1712 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1713 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1714 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1730 // this differs from the ...build_modeproblem_matrix... only by the `J`
1731 // maybe I should use this one there as well to save lines... TODO
1732 struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
{
1733 const qpms_scatsys_t
*ss
;
1734 qpms_ss_pi_t
*opistartR_ptr
;
1735 pthread_mutex_t
*opistartR_mutex
;
1737 complex double *target_packed
;
1742 static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread(void *arg
)
1744 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1746 const qpms_scatsys_t
*ss
= a
->ss
;
1747 const qpms_iri_t iri
= a
->iri
;
1748 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1749 const qpms_bessel_t J
= a
->J
;
1751 // some of the following workspaces are probably redundant; TODO optimize later.
1753 // workspace for the uncompressed particle<-particle tranlation matrix block
1754 complex double *Sblock
;
1755 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1757 // Workspace for the intermediate particle-orbit matrix result
1758 complex double *tmp
;
1759 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1761 const complex double one
= 1, zero
= 0;
1764 // In the beginning, pick a target (row) orbit for this thread
1765 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1766 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1767 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1770 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1771 // Now increment it for another thread:
1772 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1773 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1775 // Orbit picked (defined by opistartR), do the work.
1776 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1777 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1778 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1779 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1780 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1781 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1783 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1784 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1785 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, orbitstartpiR
);
1786 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1787 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1788 // Orbit coeff vector's full size:
1789 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1790 // This is where the orbit starts in the "packed" vector:
1791 const size_t packed_orbit_offsetR
=
1792 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1793 + osnR
* otR
->irbase_sizes
[iri
];
1794 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1795 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1796 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1797 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1798 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1799 const cart3_t posR
= ss
->p
[piR
].pos
;
1800 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1801 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1802 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1803 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1804 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1805 // This is where the particle's orbit starts in the "packed" vector:
1806 const size_t packed_orbit_offsetC
=
1807 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1808 + osnC
* otC
->irbase_sizes
[iri
];
1809 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1810 // Orbit coeff vector's full size:
1811 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1812 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1813 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1814 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1815 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1817 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1818 // THIS IS THE MAIN PART DIFFERENT FROM ...modeproblem...() TODO unify
1819 // somehow to save lines
1820 if(piC
!= piR
) { // non-diagonal, calculate S
1821 const cart3_t posC
= ss
->p
[piC
].pos
;
1822 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1823 Sblock
, // Sblock is S(piR->piC)
1824 bspecR
, bspecC
->n
, bspecC
, 1,
1825 a
->k
, posR
, posC
, J
));
1826 } else { // diagonal, fill with zeros; TODO does this make sense?
1827 // would unit matrix be better? or unit only for QPMS_BESSEL_REGULAR?
1828 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1829 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1830 Sblock
[row
* bspecC
->n
+ col
] = 0; //(row == col)? 1 : 0;
1833 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1834 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1835 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1836 &one
/*alpha*/, Sblock
/*A*/, particle_fullsizeC
/*ldA*/,
1837 omC
+ opiC
*particle_fullsizeC
/*B*/,
1838 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1839 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1841 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1842 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1843 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1844 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1845 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1846 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1861 // Almost the same as ...build_modeproblem_matrix_...parallelR
1862 // --> TODO write this in a more generic way to save LoC.
1863 complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
1864 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1865 complex double *target_packed
,
1866 const qpms_scatsys_t
*ss
,
1868 const complex double k
,
1872 qpms_ss_ensure_nonperiodic(ss
);
1874 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1875 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1876 return target_packed
;
1877 if (target_packed
== NULL
)
1878 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1879 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1881 qpms_ss_pi_t opistartR
= 0;
1882 pthread_mutex_t opistartR_mutex
;
1883 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1884 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1885 arg
= {ss
, &opistartR
, &opistartR_mutex
, iri
, target_packed
, k
, J
};
1887 // FIXME THIS IS NOT PORTABLE:
1889 if (qpms_scatsystem_nthreads_override
> 0) {
1890 nthreads
= qpms_scatsystem_nthreads_override
;
1891 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1894 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1896 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1897 nthreads
, qpms_scatsystem_nthreads_default
);
1898 nthreads
= qpms_scatsystem_nthreads_default
;
1900 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1903 pthread_t thread_ids
[nthreads
];
1904 for(long thi
= 0; thi
< nthreads
; ++thi
)
1905 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1906 qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread
,
1908 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1910 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1913 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1914 return target_packed
;
1918 // Parallel implementation, now default
1919 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
1920 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1921 complex double *target_packed
,
1922 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1925 qpms_ss_ensure_nonperiodic(ssw
->ss
);
1926 const size_t packedlen
= ssw
->ss
->saecv_sizes
[iri
];
1927 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1928 return target_packed
;
1929 if (target_packed
== NULL
)
1930 QPMS_CRASHING_MALLOC(target_packed
,SQ(packedlen
)*sizeof(complex double));
1931 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1933 qpms_ss_pi_t opistartR
= 0;
1934 pthread_mutex_t opistartR_mutex
;
1935 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1936 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1937 arg
= {ssw
, &opistartR
, &opistartR_mutex
, iri
, target_packed
};
1939 // FIXME THIS IS NOT PORTABLE:
1941 if (qpms_scatsystem_nthreads_override
> 0) {
1942 nthreads
= qpms_scatsystem_nthreads_override
;
1943 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1946 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1948 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1949 nthreads
, qpms_scatsystem_nthreads_default
);
1950 nthreads
= qpms_scatsystem_nthreads_default
;
1952 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1955 pthread_t thread_ids
[nthreads
];
1956 for(long thi
= 0; thi
< nthreads
; ++thi
)
1957 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1958 qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread
,
1960 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1962 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1965 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1966 return target_packed
;
1970 complex double *qpms_scatsys_incident_field_vector_full(
1971 complex double *target_full
, const qpms_scatsys_t
*ss
,
1972 qpms_incfield_t f
, const void *args
, bool add
) {
1974 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
1975 sizeof(complex double));
1976 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1977 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
1978 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
1979 const cart3_t pos
= ss
->p
[pi
].pos
;
1980 QPMS_ENSURE_SUCCESS(f(ptarget
, bspec
, pos
, args
, add
));
1987 complex double *qpms_scatsys_incident_field_vector_irrep_packed(
1988 complex double *target_full
, const qpms_scatsys_t
*ss
,
1989 const qpms_iri_t iri
, qpms_incfield_t f
,
1990 const void *args
, bool add
) {
1996 complex double *qpms_scatsysw_apply_Tmatrices_full(
1997 complex double *target_full
, const complex double *inc_full
,
1998 const qpms_scatsys_at_omega_t
*ssw
) {
2000 const qpms_scatsys_t
*ss
= ssw
->ss
;
2001 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
2002 sizeof(complex double));
2003 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2004 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
2005 const complex double *psrc
= inc_full
+ ss
->fecv_pstarts
[pi
];
2006 // TODO check whether T-matrix is non-virtual after virtual t-matrices are implemented.
2007 const qpms_tmatrix_t
*T
= ssw
->tm
[ss
->p
[pi
].tmatrix_id
];
2008 qpms_apply_tmatrix(ptarget
, psrc
, T
);
2013 ccart3_t
qpms_scatsys_scattered_E(const qpms_scatsys_t
*ss
,
2015 const complex double k
,
2016 const complex double *cvf
,
2020 ccart3_t res
= {0,0,0};
2021 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2023 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2024 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2025 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2026 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2028 const csph_t kr
= sph_cscale(k
, cart2sph(
2029 cart3_substract(where
, particle_pos
)));
2030 const csphvec_t E_sph
= qpms_eval_uvswf(bspec
, particle_cv
, kr
, btyp
);
2031 const ccart3_t E_cart
= csphvec2ccart_csph(E_sph
, kr
);
2032 ckahanadd(&(res
.x
), &(res_kc
.x
), E_cart
.x
);
2033 ckahanadd(&(res
.y
), &(res_kc
.y
), E_cart
.y
);
2034 ckahanadd(&(res
.z
), &(res_kc
.z
), E_cart
.z
);
2039 ccart3_t
qpms_scatsysw_scattered_E(const qpms_scatsys_at_omega_t
*ssw
,
2041 const complex double *cvf
, const cart3_t where
) {
2042 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_scattered_E()");
2043 return qpms_scatsys_scattered_E(ssw
->ss
, btyp
, ssw
->wavenumber
,
2047 // Alternative implementation, using translation operator and regular dipole waves at zero
2048 ccart3_t
qpms_scatsys_scattered_E__alt(const qpms_scatsys_t
*ss
,
2050 const complex double k
,
2051 const complex double *cvf
,
2055 qpms_ss_ensure_nonperiodic(ss
);
2056 ccart3_t res
= {0,0,0};
2057 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2059 static const int dipspecn
= 3; // We have three basis vectors
2060 // bspec containing only electric dipoles
2061 const qpms_vswf_set_spec_t dipspec
= {
2063 .ilist
= (qpms_uvswfi_t
[]){
2064 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, -1, 1),
2065 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, 0, 1),
2066 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, +1, 1),
2068 .lMax
=1, .lMax_M
=0, .lMax_N
=1, .lMax_L
=-1,
2070 .norm
= ss
->c
->normalisation
,
2073 ccart3_t regdipoles_0
[dipspecn
]; {
2074 const sph_t origin_sph
= {.r
= 0, .theta
= M_PI_2
, .phi
=0}; // Should work with any theta/phi (TESTWORTHY)
2075 csphvec_t regdipoles_0_sph
[dipspecn
];
2076 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph
, &dipspec
,
2077 sph2csph(origin_sph
), QPMS_BESSEL_REGULAR
));
2078 for(int i
= 0; i
< dipspecn
; ++i
)
2079 regdipoles_0
[i
] = csphvec2ccart(regdipoles_0_sph
[i
], origin_sph
);
2082 complex double *s
; // Translation matrix
2083 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2085 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2086 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2087 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2088 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2090 const cart3_t origin_cart
= {0, 0, 0};
2092 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(
2093 ss
->c
, s
, &dipspec
, 1, bspec
, dipspecn
, k
, particle_pos
, where
, btyp
));
2095 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2096 for(size_t j
= 0; j
< dipspecn
; ++j
){
2097 ccart3_t summand
= ccart3_scale(particle_cv
[i
] * s
[dipspecn
*i
+j
], regdipoles_0
[j
]);
2098 ckahanadd(&(res
.x
), &(res_kc
.x
), summand
.x
);
2099 ckahanadd(&(res
.y
), &(res_kc
.y
), summand
.y
);
2100 ckahanadd(&(res
.z
), &(res_kc
.z
), summand
.z
);
2107 ccart3_t
qpms_scatsysw_scattered_E__alt(const qpms_scatsys_at_omega_t
*ssw
,
2108 qpms_bessel_t btyp
, const complex double *cvf
, const cart3_t where
) {
2109 return qpms_scatsys_scattered_E__alt(ssw
->ss
, btyp
, ssw
->wavenumber
,
2113 // For periodic lattices, we use directly the "alternative" implementation,
2114 // using translation operator and regular dipole waves at zero
2115 ccart3_t
qpms_scatsyswk_scattered_E(const qpms_scatsys_at_omega_k_t
*sswk
,
2117 const complex double *cvf
,
2121 if (btyp
!= QPMS_HANKEL_PLUS
)
2122 QPMS_NOT_IMPLEMENTED("Only scattered field with first kind Hankel functions currently implemented.");
2123 const qpms_scatsys_t
*ss
= sswk
->ssw
->ss
;
2124 if (ss
->lattice_dimension
!= 2)
2125 QPMS_NOT_IMPLEMENTED("Only 2D-periodic lattices implemented");
2126 ccart3_t res
= {0,0,0};
2127 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2129 static const int dipspecn
= 3; // We have three basis vectors
2130 // bspec containing only electric dipoles
2131 const qpms_vswf_set_spec_t dipspec
= {
2133 .ilist
= (qpms_uvswfi_t
[]){
2134 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, -1, 1),
2135 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, 0, 1),
2136 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, +1, 1),
2138 .lMax
=1, .lMax_M
=0, .lMax_N
=1, .lMax_L
=-1,
2140 .norm
= ss
->c
->normalisation
,
2143 ccart3_t regdipoles_0
[dipspecn
]; {
2144 const sph_t origin_sph
= {.r
= 0, .theta
= M_PI_2
, .phi
=0}; // Should work with any theta/phi (TESTWORTHY)
2145 csphvec_t regdipoles_0_sph
[dipspecn
];
2146 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph
, &dipspec
,
2147 sph2csph(origin_sph
), QPMS_BESSEL_REGULAR
));
2148 for(int i
= 0; i
< dipspecn
; ++i
)
2149 regdipoles_0
[i
] = csphvec2ccart(regdipoles_0_sph
[i
], origin_sph
);
2152 complex double *s
; // Translation matrix
2153 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2155 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2156 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2157 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2158 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2160 const cart3_t origin_cart
= {0, 0, 0};
2162 QPMS_ASSERT(sswk
->k
[2] == 0); // At least not implemented now
2163 QPMS_ASSERT(ss
->per
.lattice_basis
[0].z
== 0);
2164 QPMS_ASSERT(ss
->per
.lattice_basis
[1].z
== 0);
2166 // Same choices as in qpms_ss_ppair_W32xy; TODO make it more dynamic
2167 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
2168 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
2170 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_e32(
2172 &dipspec
, 1, bspec
, dipspecn
,
2173 sswk
->eta
, sswk
->ssw
->wavenumber
,
2174 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
2175 cart2_from_double_array(sswk
->k
), cart3_substract(where
, particle_pos
) /*CHECKSIGN*/,
2178 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2179 for(size_t j
= 0; j
< dipspecn
; ++j
){
2180 ccart3_t summand
= ccart3_scale(particle_cv
[i
] * s
[dipspecn
*i
+j
], regdipoles_0
[j
]);
2181 ckahanadd(&(res
.x
), &(res_kc
.x
), summand
.x
);
2182 ckahanadd(&(res
.y
), &(res_kc
.y
), summand
.y
);
2183 ckahanadd(&(res
.z
), &(res_kc
.z
), summand
.z
);
2191 ccart3_t
qpms_scatsys_scattered_E_irrep(const qpms_scatsys_t
*ss
,
2192 qpms_iri_t iri
, const complex double *cvr
, cart3_t where
) {
2197 void qpms_ss_LU_free(qpms_ss_LU lu
) {
2202 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_full_factorise(complex double *mpmatrix_full
,
2203 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
, const qpms_scatsys_at_omega_k_t
*sswk
) {
2205 QPMS_ASSERT(sswk
->ssw
== ssw
|| !ssw
);
2207 QPMS_ASSERT(ssw
->ss
->lattice_dimension
> 0);
2209 QPMS_ASSERT(ssw
->ss
->lattice_dimension
== 0);
2211 const qpms_scatsys_t
*ss
= ssw
->ss
;
2212 QPMS_ENSURE(mpmatrix_full
, "A non-NULL pointer to the pre-calculated mode matrix is required");
2213 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, ss
->fecv_size
* sizeof(int));
2214 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, ss
->fecv_size
, ss
->fecv_size
,
2215 mpmatrix_full
, ss
->fecv_size
, target_piv
));
2217 lu
.a
= mpmatrix_full
;
2218 lu
.ipiv
= target_piv
;
2226 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(complex double *mpmatrix_packed
,
2227 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
) {
2228 QPMS_ENSURE(mpmatrix_packed
, "A non-NULL pointer to the pre-calculated mode matrix is required");
2229 qpms_ss_ensure_nonperiodic(ssw
->ss
);
2230 size_t n
= ssw
->ss
->saecv_sizes
[iri
];
2231 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, n
* sizeof(int));
2232 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, n
, n
,
2233 mpmatrix_packed
, n
, target_piv
));
2235 lu
.a
= mpmatrix_packed
;
2236 lu
.ipiv
= target_piv
;
2243 qpms_ss_LU
qpms_scatsysw_build_modeproblem_matrix_full_LU(
2244 complex double *target
, int *target_piv
,
2245 const qpms_scatsys_at_omega_t
*ssw
){
2246 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_build_modeproblem_matrix_full_LU()");
2247 target
= qpms_scatsysw_build_modeproblem_matrix_full(target
, ssw
);
2248 return qpms_scatsysw_modeproblem_matrix_full_factorise(target
, target_piv
, ssw
, NULL
);
2251 qpms_ss_LU
qpms_scatsyswk_build_modeproblem_matrix_full_LU(
2252 complex double *target
, int *target_piv
,
2253 const qpms_scatsys_at_omega_k_t
*sswk
){
2254 target
= qpms_scatsyswk_build_modeproblem_matrix_full(target
, sswk
);
2255 return qpms_scatsysw_modeproblem_matrix_full_factorise(target
, target_piv
, sswk
->ssw
, sswk
);
2258 qpms_ss_LU
qpms_scatsysw_build_modeproblem_matrix_irrep_packed_LU(
2259 complex double *target
, int *target_piv
,
2260 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
){
2261 target
= qpms_scatsysw_build_modeproblem_matrix_irrep_packed(target
, ssw
, iri
);
2262 return qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(target
, target_piv
, ssw
, iri
);
2265 complex double *qpms_scatsys_scatter_solve(
2266 complex double *f
, const complex double *a_inc
, qpms_ss_LU lu
) {
2267 const size_t n
= lu
.full
? lu
.ssw
->ss
->fecv_size
: lu
.ssw
->ss
->saecv_sizes
[lu
.iri
];
2268 if (!f
) QPMS_CRASHING_MALLOC(f
, n
* sizeof(complex double));
2269 memcpy(f
, a_inc
, n
*sizeof(complex double)); // It will be rewritten by zgetrs
2270 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR
, 'N' /*trans*/, n
/*n*/, 1 /*nrhs number of right hand sides*/,
2271 lu
.a
/*a*/, n
/*lda*/, lu
.ipiv
/*ipiv*/, f
/*b*/, 1 /*ldb; CHECKME*/));
2275 struct qpms_scatsys_finite_eval_Beyn_ImTS_param
{
2276 const qpms_scatsys_t
*ss
;
2280 /// Wrapper for Beyn algorithm (non-periodic system)
2281 static int qpms_scatsys_finite_eval_Beyn_ImTS(complex double *target
,
2282 size_t m
, complex double omega
, void *params
) {
2283 const struct qpms_scatsys_finite_eval_Beyn_ImTS_param
*p
= params
;
2284 qpms_scatsys_at_omega_t
*ssw
= qpms_scatsys_at_omega(p
->ss
, omega
);
2285 QPMS_ENSURE(ssw
!= NULL
, "qpms_scatsys_at_omega() returned NULL");
2286 if (p
->iri
== QPMS_NO_IRREP
) {
2287 QPMS_ASSERT(m
== p
->ss
->fecv_size
);
2288 QPMS_ENSURE(NULL
!= qpms_scatsysw_build_modeproblem_matrix_full(
2290 "qpms_scatsysw_build_modeproblem_matrix_full() returned NULL");
2292 QPMS_ASSERT(m
== p
->ss
->saecv_sizes
[p
->iri
]);
2293 QPMS_ENSURE(NULL
!= qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
2294 target
, ssw
, p
->iri
),
2295 "qpms_scatsysw_build_modeproblem_matrix_irrep_packed() returned NULL");
2297 qpms_scatsys_at_omega_free(ssw
);
2298 return QPMS_SUCCESS
;
2301 beyn_result_t
*qpms_scatsys_finite_find_eigenmodes(
2302 const qpms_scatsys_t
* const ss
, const qpms_iri_t iri
,
2303 complex double omega_centre
, double omega_rr
, double omega_ri
,
2304 size_t contour_npoints
,
2305 double rank_tol
, size_t rank_sel_min
, double res_tol
) {
2306 qpms_ss_ensure_nonperiodic_a(ss
, "qpms_scatsys_periodic_find_eigenmodes()");
2307 size_t n
; // matrix dimension
2308 if (qpms_iri_is_valid(ss
->sym
, iri
)) {
2309 n
= ss
->saecv_sizes
[iri
];
2310 } else if (iri
== QPMS_NO_IRREP
) {
2314 beyn_contour_t
*contour
= beyn_contour_ellipse(omega_centre
,
2315 omega_rr
, omega_ri
, contour_npoints
);
2317 struct qpms_scatsys_finite_eval_Beyn_ImTS_param p
= {ss
, iri
};
2318 beyn_result_t
*result
= beyn_solve(n
, n
/* possibly make smaller? */,
2319 qpms_scatsys_finite_eval_Beyn_ImTS
, NULL
, (void *) &p
,
2320 contour
, rank_tol
, rank_sel_min
, res_tol
);
2321 QPMS_ENSURE(result
!= NULL
, "beyn_solve() returned NULL");
2327 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param
{
2328 const qpms_scatsys_t
*ss
;
2329 const double *k
; ///< Wavevector in cartesian coordinates.
2332 /// Wrapper for Beyn algorithm (periodic system)
2333 static int qpms_scatsys_periodic_eval_Beyn_ImTW(complex double *target
,
2334 size_t m
, complex double omega
, void *params
){
2335 const struct qpms_scatsys_periodic_eval_Beyn_ImTW_param
*p
= params
;
2336 qpms_scatsys_at_omega_t
*ssw
= qpms_scatsys_at_omega(p
->ss
, omega
);
2337 QPMS_ENSURE(ssw
!= NULL
, "qpms_scatsys_at_omega() returned NULL");
2338 qpms_scatsys_at_omega_k_t sswk
= {
2340 .k
= {p
->k
[0], p
->k
[1], p
->k
[2]},
2341 .eta
= qpms_ss_adjusted_eta(p
->ss
, ssw
->wavenumber
, p
->k
)
2343 QPMS_ASSERT(m
== p
->ss
->fecv_size
);
2345 qpms_scatsyswk_build_modeproblem_matrix_full(target
, &sswk
),
2346 "qpms_scatsyswk_build_modeproblem_matrix_full() returned NULL");
2347 qpms_scatsys_at_omega_free(ssw
);
2348 return QPMS_SUCCESS
;
2351 beyn_result_t
*qpms_scatsys_periodic_find_eigenmodes(
2352 const qpms_scatsys_t
* const ss
, const double k
[3],
2353 complex double omega_centre
, double omega_rr
, double omega_ri
,
2354 size_t contour_npoints
,
2355 double rank_tol
, size_t rank_sel_min
, double res_tol
) {
2356 qpms_ss_ensure_periodic_a(ss
, "qpms_scatsys_finite_find_eigenmodes()");
2357 size_t n
= ss
->fecv_size
; // matrix dimension
2359 beyn_contour_t
*contour
= beyn_contour_ellipse(omega_centre
,
2360 omega_rr
, omega_ri
, contour_npoints
);
2362 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param p
= {ss
, k
};
2363 beyn_result_t
*result
= beyn_solve(n
, n
,
2364 qpms_scatsys_periodic_eval_Beyn_ImTW
, NULL
, (void *) &p
,
2365 contour
, rank_tol
, rank_sel_min
, res_tol
);
2366 QPMS_ENSURE(result
!= NULL
, "beyn_solve() returned NULL");