3 #define lapack_complex_double complex double
4 #define lapack_complex_double_real(z) (creal(z))
5 #define lapack_complex_double_imag(z) (cimag(z))
9 #include "scatsystem.h"
13 #include "symmetries.h"
17 #include "quaternions.h"
19 #include "qpms_error.h"
20 #include "translations.h"
21 #include "tmatrices.h"
24 #include "tolerances.h"
26 #include "tiny_inlines.h"
28 #ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
30 #define SERIAL_ZGEMM qpms_zgemm
32 #define SERIAL_ZGEMM cblas_zgemm
35 #define QPMS_SCATSYS_LEN_RTOL 1e-13
36 #define QPMS_SCATSYS_TMATRIX_ATOL 1e-12
37 #define QPMS_SCATSYS_TMATRIX_RTOL 1e-12
39 // This is used in adjustment of Ewald parameter to avoid high frequency breakdown.
40 // Very roughly, the value of 16 should lead to allowing terms containing incomplete Gamma
41 // functions with magnitudes around exp(16) == 8.9e6
42 static const double QPMS_SCATSYS_EWALD_MAX_EXPONENT
= 16.;
44 long qpms_scatsystem_nthreads_default
= 4;
45 long qpms_scatsystem_nthreads_override
= 0;
47 void qpms_scatsystem_set_nthreads(long n
) {
48 qpms_scatsystem_nthreads_override
= n
;
51 static inline void qpms_ss_ensure_periodic(const qpms_scatsys_t
*ss
) {
52 QPMS_ENSURE(ss
->lattice_dimension
> 0, "This method is applicable only to periodic systems.");
55 static inline void qpms_ss_ensure_periodic_a(const qpms_scatsys_t
*ss
, const char *s
) {
56 QPMS_ENSURE(ss
->lattice_dimension
> 0, "This method is applicable only to periodic systems. Use %s instead.", s
);
59 static inline void qpms_ss_ensure_nonperiodic(const qpms_scatsys_t
*ss
) {
60 QPMS_ENSURE(ss
->lattice_dimension
== 0, "This method is applicable only to nonperiodic systems.");
63 static inline void qpms_ss_ensure_nonperiodic_a(const qpms_scatsys_t
*ss
, const char *s
) {
64 QPMS_ENSURE(ss
->lattice_dimension
== 0, "This method is applicable only to nonperiodic systems. Use %s instead.", s
);
67 // Adjust Ewald parameter to avoid high-frequency breakdown
68 double qpms_ss_adjusted_eta(const qpms_scatsys_t
*ss
, complex double wavenumber
, const double k
[3]) {
69 qpms_ss_ensure_periodic(ss
);
70 const double eta_default
= ss
->per
.eta
;
71 // FIXME here we silently assume that k lies in the first Brillioun zone, we should ensure that.
72 const double k2
= k
[0]*k
[0] + k
[1]*k
[1] + k
[2] * k
[2];
73 const double kappa2
= SQ(cabs(wavenumber
)); // maybe creal would be enough
74 if(kappa2
< k2
) // This should happen only for pretty low frequencies
76 const qpms_l_t maxj
= ss
->c
->lMax
; // Based on ewald.c:301, note that VSWF (c's) lMax is already half of corresponding translation matrix Ewald factors' (c->e3c's) lMax
77 const double mina
= 0.5 * (ss
->lattice_dimension
- 1) - maxj
; // minimum incomplete Gamma first argument, based on ewald.c:301; CHECKME whether this is fine also for 3D lattice
78 const double eta_min
= sqrt(fabs((kappa2
- k2
) * (mina
- 1.) / QPMS_SCATSYS_EWALD_MAX_EXPONENT
));
79 return MAX(eta_default
, eta_min
);
82 // ------------ Stupid implementation of qpms_scatsys_apply_symmetry() -------------
84 // The following functions are just to make qpms_scatsys_apply_symmetry more readable.
85 // They are not to be used elsewhere, as they do not perform any array capacity checks etc.
87 /// Compare two orbit types in a scattering system.
88 static bool orbit_types_equal(const qpms_ss_orbit_type_t
*a
, const qpms_ss_orbit_type_t
*b
) {
89 if (a
->size
!= b
->size
) return false;
90 if (memcmp(a
->action
, b
->action
, a
->size
*sizeof(qpms_ss_orbit_pi_t
))) return false;
91 if (memcmp(a
->tmatrices
, b
->tmatrices
, a
->size
*sizeof(qpms_ss_tmi_t
))) return false;
95 // Extend the action to all particles in orbit if only the action on the 0th
96 // particle has been filled.
97 static void extend_orbit_action(qpms_scatsys_t
*ss
, qpms_ss_orbit_type_t
*ot
) {
98 for(qpms_ss_orbit_pi_t src
= 1; src
< ot
->size
; ++src
) {
99 // find any element g that sends 0 to src:
101 for (g
= 0; g
< ss
->sym
->order
; ++g
)
102 if (ot
->action
[g
] == src
) break;
103 assert(g
< ss
->sym
->order
);
104 // invg sends src to 0
105 qpms_gmi_t invg
= qpms_finite_group_inv(ss
->sym
, g
);
106 for (qpms_gmi_t f
= 0; f
< ss
->sym
->order
; ++f
)
107 // if f sends 0 to dest, then f * invg sends src to dest
108 ot
->action
[src
* ss
->sym
->order
+
109 qpms_finite_group_mul(ss
->sym
,f
,invg
)] = ot
->action
[f
];
113 //Add orbit type to a scattering system, updating the ss->otspace_end pointer accordingly
114 static void add_orbit_type(qpms_scatsys_t
*ss
, const qpms_ss_orbit_type_t
*ot_current
) {
115 qpms_ss_orbit_type_t
* const ot_new
= & (ss
->orbit_types
[ss
->orbit_type_count
]);
116 ot_new
->size
= ot_current
->size
;
118 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_tmi(ss
, ot_current
->tmatrices
[0]);
119 const size_t bspecn
= bspec
->n
;
120 ot_new
->bspecn
= bspecn
;
122 const size_t actionsiz
= sizeof(ot_current
->action
[0]) * ot_current
->size
124 ot_new
->action
= (void *) (ss
->otspace_end
);
125 memcpy(ot_new
->action
, ot_current
->action
, actionsiz
);
126 // N.B. we copied mostly garbage ^^^, most of it is initialized just now:
127 extend_orbit_action(ss
, ot_new
);
128 #ifdef DUMP_ORBIT_ACTION
129 fprintf(stderr
, "Orbit action:\n");
130 for (qpms_gmi_t gmi
= 0; gmi
< ss
->sym
->order
; ++gmi
) {
131 const qpms_quat4d_t q
= qpms_quat_4d_from_2c(ss
->sym
->rep3d
[gmi
].rot
);
132 fprintf(stderr
, "%+d[%g %g %g %g] ", (int)ss
->sym
->rep3d
[gmi
].det
,
133 q
.c1
, q
.ci
, q
.cj
, q
.ck
);
134 fprintf(stderr
, "%s\t", (ss
->sym
->permrep
&& ss
->sym
->permrep
[gmi
])?
135 ss
->sym
->permrep
[gmi
] : "");
136 for (qpms_ss_orbit_pi_t pi
= 0; pi
< ot_new
->size
; ++pi
)
137 fprintf(stderr
, "%d\t", (int) ot_new
->action
[gmi
+ pi
*ss
->sym
->order
]);
138 fprintf(stderr
, "\n");
141 ss
->otspace_end
+= actionsiz
;
143 const size_t tmsiz
= sizeof(ot_current
->tmatrices
[0]) * ot_current
->size
;
144 ot_new
->tmatrices
= (void *) (ss
->otspace_end
);
145 memcpy(ot_new
->tmatrices
, ot_current
->tmatrices
, tmsiz
);
146 ss
->otspace_end
+= tmsiz
;
148 const size_t irbase_sizes_siz
= sizeof(ot_new
->irbase_sizes
[0]) * ss
->sym
->nirreps
;
149 ot_new
->irbase_sizes
= (void *) (ss
->otspace_end
);
150 ss
->otspace_end
+= irbase_sizes_siz
;
151 ot_new
->irbase_cumsizes
= (void *) (ss
->otspace_end
);
152 ss
->otspace_end
+= irbase_sizes_siz
;
153 ot_new
->irbase_offsets
= (void *) (ss
->otspace_end
);
154 ss
->otspace_end
+= irbase_sizes_siz
;
156 const size_t irbases_siz
= sizeof(ot_new
->irbases
[0]) * SQ(ot_new
->size
* bspecn
);
157 ot_new
->irbases
= (void *) (ss
->otspace_end
);
158 ss
->otspace_end
+= irbases_siz
;
160 size_t lastbs
, bs_cumsum
= 0;
161 for(qpms_iri_t iri
= 0; iri
< ss
->sym
->nirreps
; ++iri
) {
162 ot_new
->irbase_offsets
[iri
] = bs_cumsum
* bspecn
* ot_new
->size
;
163 qpms_orbit_irrep_basis(&lastbs
,
164 ot_new
->irbases
+ bs_cumsum
*ot_new
->size
*bspecn
,
165 ot_new
, bspec
, ss
->sym
, iri
);
166 ot_new
->irbase_sizes
[iri
] = lastbs
;
168 ot_new
->irbase_cumsizes
[iri
] = bs_cumsum
;
170 QPMS_ENSURE(bs_cumsum
== ot_new
->size
* bspecn
,
171 "The cumulative size of the symmetry-adapted bases is wrong; "
172 "expected %d = %d * %d, got %d.",
173 ot_new
->size
* bspecn
, ot_new
->size
, bspecn
, bs_cumsum
);
174 ot_new
->instance_count
= 0;
175 ss
->orbit_type_count
++;
178 // Standardises the lattice base vectors, and fills the other contents of ss->per[0].
179 // LPTODO split the functionality in smaller functions, these might be useful elsewhere.
180 static void process_lattice_bases(qpms_scatsys_t
*ss
, const qpms_tolerance_spec_t
*tol
) {
181 switch(ss
->lattice_dimension
) {
184 normsq
= cart3_normsq(ss
->per
.lattice_basis
[0]);
185 ss
->per
.unitcell_volume
= sqrt(normsq
);
186 ss
->per
.reciprocal_basis
[0] = cart3_divscale(ss
->per
.lattice_basis
[0], normsq
);
187 ss
->per
.eta
= 2. / M_2_SQRTPI
/ ss
->per
.unitcell_volume
;
190 // (I) Create orthonormal basis of the plane in which the basis vectors lie
192 double norm0
= cart3norm(ss
->per
.lattice_basis
[0]);
193 // 0: Just normalised 0. basis vector
194 onbasis
[0] = cart3_divscale(ss
->per
.lattice_basis
[0], norm0
);
195 // 1: Gram-Schmidt complement of the other basis vector
196 const double b0norm_dot_b1
= cart3_dot(onbasis
[0], ss
->per
.lattice_basis
[1]);
197 onbasis
[1] = cart3_substract(ss
->per
.lattice_basis
[1],
198 cart3_scale(b0norm_dot_b1
, onbasis
[0]));
199 onbasis
[1] = cart3_divscale(onbasis
[1], cart3norm(onbasis
[1]));
200 // (II) Express the lattice basis in terms of the new 2d plane vector basis onbasis
201 cart2_t b2d
[2] = {{.x
= norm0
, .y
= 0},
202 {.x
= b0norm_dot_b1
, .y
= cart3_dot(onbasis
[1], ss
->per
.lattice_basis
[1])}};
203 // (III) Reduce lattice basis and get reciprocal bases in the 2D plane
204 l2d_reduceBasis(b2d
[0], b2d
[1], &b2d
[0], &b2d
[1]);
205 ss
->per
.unitcell_volume
= l2d_unitcell_area(b2d
[0], b2d
[1]);
206 ss
->per
.eta
= 2. / M_2_SQRTPI
/ sqrt(ss
->per
.unitcell_volume
);
208 QPMS_ENSURE_SUCCESS(l2d_reciprocalBasis1(b2d
[0], b2d
[1], &r2d
[0], &r2d
[1]));
209 // (IV) Rotate everything back to the original 3D space.
210 for(int i
= 0; i
< 2; ++i
) {
211 ss
->per
.lattice_basis
[i
] = cart3_add(
212 cart3_scale(b2d
[i
].x
, onbasis
[0]),
213 cart3_scale(b2d
[i
].y
, onbasis
[1]));
214 ss
->per
.reciprocal_basis
[i
] = cart3_add(
215 cart3_scale(r2d
[i
].x
, onbasis
[0]),
216 cart3_scale(r2d
[i
].y
, onbasis
[1]));
220 l3d_reduceBasis(ss
->per
.lattice_basis
, ss
->per
.lattice_basis
);
221 ss
->per
.unitcell_volume
= fabs(l3d_reciprocalBasis1(ss
->per
.lattice_basis
,
222 ss
->per
.reciprocal_basis
));
223 ss
->per
.eta
= 2. / M_2_SQRTPI
/ cbrt(ss
->per
.unitcell_volume
);
224 // TODO check unitcell_volume for sanity w.r.t tolerance
232 // Almost 200 lines. This whole thing deserves a rewrite!
233 qpms_scatsys_at_omega_t
*qpms_scatsys_apply_symmetry(const qpms_scatsys_t
*orig
,
234 const qpms_finite_group_t
*sym
, complex double omega
,
235 const qpms_tolerance_spec_t
*tol
) {
237 QPMS_WARN("No symmetry group pointer provided, assuming trivial symmetry"
238 " (this is currently the only option for periodic systems).");
239 sym
= &QPMS_FINITE_GROUP_TRIVIAL
;
242 // TODO check data sanity
244 qpms_l_t lMax
= 0; // the overall lMax of all base specs.
245 qpms_normalisation_t normalisation
= QPMS_NORMALISATION_UNDEF
;
247 // First, determine the rough radius of the array; it should be nonzero
248 // in order to particle position equivalence work correctly
251 double minx
= +INFINITY
, miny
= +INFINITY
, minz
= +INFINITY
;
252 double maxx
= -INFINITY
, maxy
= -INFINITY
, maxz
= -INFINITY
;
253 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
) {
254 minx
= MIN(minx
,orig
->p
[i
].pos
.x
);
255 miny
= MIN(miny
,orig
->p
[i
].pos
.y
);
256 minz
= MIN(minz
,orig
->p
[i
].pos
.z
);
257 maxx
= MAX(maxx
,orig
->p
[i
].pos
.x
);
258 maxy
= MAX(maxy
,orig
->p
[i
].pos
.y
);
259 maxz
= MAX(maxz
,orig
->p
[i
].pos
.z
);
261 lenscale
= (fabs(maxx
)+fabs(maxy
)+fabs(maxz
)+(maxx
-minx
)+(maxy
-miny
)+(maxz
-minz
)) / 3;
264 // Second, check that there are no duplicit positions in the input system.
265 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
)
266 for (qpms_ss_pi_t j
= 0; j
< i
; ++j
)
267 assert(!cart3_isclose(orig
->p
[i
].pos
, orig
->p
[j
].pos
, 0, QPMS_SCATSYS_LEN_RTOL
* lenscale
));
270 // Allocate T-matrix, particle and particle orbit info arrays
272 QPMS_CRASHING_MALLOC(ss
, sizeof(*ss
));
273 ss
->lattice_dimension
= orig
->lattice_dimension
;
274 // TODO basic periodic lattices related stuff here.
275 if (ss
->lattice_dimension
) {
277 process_lattice_bases(ss
, tol
);
279 for(int i
= 0; i
< ss
->lattice_dimension
; ++i
) // extend lenscale by basis vectors
280 lenscale
= MAX(lenscale
, cart3norm(ss
->per
.lattice_basis
[i
]));
281 ss
->lenscale
= lenscale
;
283 ss
->medium
= orig
->medium
;
285 // Copy the qpms_tmatrix_fuction_t from orig
286 ss
->tmg_count
= orig
->tmg_count
;
287 QPMS_CRASHING_MALLOC(ss
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
288 memcpy(ss
->tmg
, orig
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
290 ss
->tm_capacity
= sym
->order
* orig
->tm_count
;
291 QPMS_CRASHING_MALLOC(ss
->tm
, ss
->tm_capacity
* sizeof(*(ss
->tm
)));
293 ss
->p_capacity
= sym
->order
* orig
->p_count
;
294 QPMS_CRASHING_MALLOC(ss
->p
, ss
->p_capacity
* sizeof(qpms_particle_tid_t
));
295 QPMS_CRASHING_MALLOC(ss
->p_orbitinfo
, ss
->p_capacity
* sizeof(qpms_ss_particle_orbitinfo_t
));
296 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_capacity
; ++pi
) {
297 ss
->p_orbitinfo
[pi
].t
= QPMS_SS_P_ORBITINFO_UNDEF
;
298 ss
->p_orbitinfo
[pi
].p
= QPMS_SS_P_ORBITINFO_UNDEF
;
301 // Evaluate the original T-matrices at omega
302 qpms_tmatrix_t
**tm_orig_omega
;
303 QPMS_CRASHING_MALLOC(tm_orig_omega
, orig
->tmg_count
* sizeof(*tm_orig_omega
));
304 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
305 tm_orig_omega
[i
] = qpms_tmatrix_init_from_function(orig
->tmg
[i
], omega
);
307 // Evaluate the medium and derived T-matrices at omega.
308 qpms_scatsys_at_omega_t
*ssw
;
309 QPMS_CRASHING_MALLOC(ssw
, sizeof(*ssw
)); // returned
312 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
313 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
314 // we will be using ss->tm_capacity also for ssw->tm
315 QPMS_CRASHING_MALLOC(ssw
->tm
, ss
->tm_capacity
* sizeof(*(ssw
->tm
))); // returned
317 // Evaluate T-matrices at omega; checking for duplicities
319 ss
->max_bspecn
= 0; // We'll need it later.for memory alloc estimates.
321 qpms_ss_tmi_t tm_dupl_remap
[ss
->tm_capacity
]; // Auxilliary array to label remapping the indices after ignoring t-matrix duplicities; VLA!
323 for (qpms_ss_tmi_t i
= 0; i
< orig
->tm_count
; ++i
) {
324 qpms_tmatrix_t
*ti
= qpms_tmatrix_apply_operation(&(orig
->tm
[i
].op
), tm_orig_omega
[orig
->tm
[i
].tmgi
]);
326 for (j
= 0; j
< ss
->tm_count
; ++j
)
327 if (qpms_tmatrix_isclose(ti
, ssw
->tm
[j
], tol
->rtol
, tol
->atol
)) {
330 if (j
== ss
->tm_count
) { // duplicity not found, save both the "abstract" and "at omega" T-matrices
331 qpms_tmatrix_operation_copy(&ss
->tm
[j
].op
, &orig
->tm
[i
].op
);
332 ss
->tm
[j
].tmgi
= orig
->tm
[i
].tmgi
; // T-matrix functions are preserved.
334 ss
->max_bspecn
= MAX(ssw
->tm
[j
]->spec
->n
, ss
->max_bspecn
);
335 lMax
= MAX(lMax
, ssw
->tm
[j
]->spec
->lMax
);
338 else qpms_tmatrix_free(ti
);
339 tm_dupl_remap
[i
] = j
;
340 if (normalisation
== QPMS_NORMALISATION_UNDEF
)
341 normalisation
= ssw
->tm
[i
]->spec
->norm
;
342 // We expect all bspec norms to be the same.
343 else QPMS_ENSURE(normalisation
== ssw
->tm
[j
]->spec
->norm
,
344 "Normalisation convention must be the same for all T-matrices."
345 " %d != %d\n", normalisation
, ssw
->tm
[j
]->spec
->norm
);
348 // Free the original T-matrices at omega
349 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
350 qpms_tmatrix_free(tm_orig_omega
[i
]);
353 // Copy particles, remapping the t-matrix indices
354 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++(i
)) {
355 ss
->p
[i
] = orig
->p
[i
];
356 ss
->p
[i
].tmatrix_id
= tm_dupl_remap
[ss
->p
[i
].tmatrix_id
];
358 ss
->p_count
= orig
->p_count
;
360 // allocate t-matrix symmetry map
361 QPMS_CRASHING_MALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* sym
->order
* ss
->tm_count
);
363 // Extend the T-matrices list by the symmetry operations
364 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
365 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
){
366 const size_t d
= ssw
->tm
[tmi
]->spec
->n
;
368 QPMS_CRASHING_MALLOC(m
, d
*d
*sizeof(complex double)); // ownership passed to ss->tm[ss->tm_count].op
369 qpms_irot3_uvswfi_dense(m
, ssw
->tm
[tmi
]->spec
, sym
->rep3d
[gmi
]);
370 qpms_tmatrix_t
*transformed
= qpms_tmatrix_apply_symop(ssw
->tm
[tmi
], m
);
372 for (tmj
= 0; tmj
< ss
->tm_count
; ++tmj
)
373 if (qpms_tmatrix_isclose(transformed
, ssw
->tm
[tmj
], tol
->rtol
, tol
->atol
))
375 if (tmj
< ss
->tm_count
) { // HIT, transformed T-matrix already exists
376 //TODO some "rounding error cleanup" symmetrisation could be performed here?
377 qpms_tmatrix_free(transformed
);
379 } else { // MISS, save the matrix (also the "abstract" one)
380 ssw
->tm
[ss
->tm_count
] = transformed
;
381 ss
->tm
[ss
->tm_count
].tmgi
= ss
->tm
[tmi
].tmgi
;
382 qpms_tmatrix_operation_compose_chain_init(&(ss
->tm
[ss
->tm_count
].op
), 2, 1); // FIXME MEMLEAK
383 struct qpms_tmatrix_operation_compose_chain
* const o
= &(ss
->tm
[ss
->tm_count
].op
.op
.compose_chain
);
384 o
->ops
[0] = & ss
->tm
[tmi
].op
; // Let's just borrow this
385 o
->ops_owned
[0] = false;
386 o
->opmem
[0].typ
= QPMS_TMATRIX_OPERATION_LRMATRIX
;
387 o
->opmem
[0].op
.lrmatrix
.m
= m
;
388 o
->opmem
[0].op
.lrmatrix
.owns_m
= true;
389 o
->opmem
[0].op
.lrmatrix
.m_size
= d
* d
;
390 o
->ops
[1] = o
->opmem
;
391 o
->ops_owned
[1] = true;
394 ss
->tm_sym_map
[gmi
+ tmi
* sym
->order
] = tmj
; // In any case, tmj now indexes the correct transformed matrix
396 // Possibly free some space using the new ss->tm_count instead of (old) ss->tm_count*sym->order
397 QPMS_CRASHING_REALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* ss
->tm_count
);
398 // tm could be realloc'd as well, but those are just pointers, not likely many.
401 // allocate particle symmetry map
402 QPMS_CRASHING_MALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* sym
->order
* ss
->p_count
);
403 // allocate orbit type array (TODO realloc later if too long)
404 ss
->orbit_type_count
= 0;
405 QPMS_CRASHING_CALLOC(ss
->orbit_types
, ss
->p_count
, sizeof(qpms_ss_orbit_type_t
));
407 QPMS_CRASHING_MALLOC(ss
->otspace
, // reallocate later
408 (sizeof(qpms_ss_orbit_pi_t
) * sym
->order
* sym
->order
409 + sizeof(qpms_ss_tmi_t
) * sym
->order
410 + 3*sizeof(size_t) * sym
->nirreps
411 + sizeof(complex double) * SQ(sym
->order
* ss
->max_bspecn
)) * ss
->p_count
413 ss
->otspace_end
= ss
->otspace
;
415 // Workspace for the orbit type determination
416 qpms_ss_orbit_type_t ot_current
;
417 qpms_ss_orbit_pi_t ot_current_action
[sym
->order
* sym
->order
];
418 qpms_ss_tmi_t ot_current_tmatrices
[sym
->order
];
420 qpms_ss_pi_t current_orbit
[sym
->order
];
421 ot_current
.action
= ot_current_action
;
422 ot_current
.tmatrices
= ot_current_tmatrices
;
425 // Extend the particle list by the symmetry operations, check that particles mapped by symmetry ops on themselves
426 // have the correct symmetry
427 // TODO this could be sped up to O(npart * log(npart)); let's see later whether needed.
428 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
429 const bool new_orbit
= (ss
->p_orbitinfo
[pi
].t
== QPMS_SS_P_ORBITINFO_UNDEF
); // TODO build the orbit!!!
431 current_orbit
[0] = pi
;
433 ot_current
.tmatrices
[0] = ss
->p
[pi
].tmatrix_id
;
434 #ifdef DUMP_PARTICLE_POSITIONS
435 cart3_t pos
= ss
->p
[pi
].pos
;
436 fprintf(stderr
, "An orbit [%.4g, %.4g, %.4g] => ", pos
.x
, pos
.y
, pos
.z
);
440 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
) {
441 cart3_t newpoint
= qpms_irot3_apply_cart3(sym
->rep3d
[gmi
], ss
->p
[pi
].pos
);
442 qpms_ss_tmi_t new_tmi
= ss
->tm_sym_map
[gmi
+ ss
->p
[pi
].tmatrix_id
* sym
->order
]; // transformed t-matrix index
444 for (pj
= 0; pj
< ss
->p_count
; ++pj
)
445 if (cart3_isclose(newpoint
, ss
->p
[pj
].pos
, 0, ss
->lenscale
* QPMS_SCATSYS_LEN_RTOL
)) {
446 if (new_tmi
!= ss
->p
[pj
].tmatrix_id
)
447 qpms_pr_error("The %d. particle with coords (%lg, %lg, %lg) "
448 "is mapped to %d. another (or itself) with cords (%lg, %lg, %lg) "
449 "without having the required symmetry", (int)pi
,
450 ss
->p
[pi
].pos
.x
, ss
->p
[pi
].pos
.y
, ss
->p
[pi
].pos
.z
,
451 (int)pj
, ss
->p
[pj
].pos
.x
, ss
->p
[pj
].pos
.y
, ss
->p
[pj
].pos
.z
);
454 if (pj
< ss
->p_count
) { // HIT, the particle is transformed to an "existing" one.
456 } else { // MISS, the symmetry transforms the particle to a new location, so add it.
457 qpms_particle_tid_t newparticle
= {newpoint
, new_tmi
};
458 ss
->p
[ss
->p_count
] = newparticle
;
460 #ifdef DUMP_PARTICLE_POSITIONS
462 fprintf(stderr
, "[%.4g, %.4g, %.4g] ", newpoint
.x
, newpoint
.y
, newpoint
.z
);
465 ss
->p_sym_map
[gmi
+ pi
* sym
->order
] = pj
;
468 // Now check whether the particle (result of the symmetry op) is already in the current orbit
469 qpms_ss_orbit_pi_t opj
;
470 for (opj
= 0; opj
< ot_current
.size
; ++opj
)
471 if (current_orbit
[opj
] == pj
) break; // HIT, pj already on current orbit
472 if (opj
== ot_current
.size
) { // MISS, pj is new on the orbit, extend the size and set the T-matrix id
473 current_orbit
[opj
] = pj
;
475 ot_current
.tmatrices
[opj
] = ss
->p
[pj
].tmatrix_id
;
477 ot_current
.action
[gmi
] = opj
;
480 if (new_orbit
) { // Now compare if the new orbit corresponds to some of the existing types.
481 #ifdef DUMP_PARTICLE_POSITIONS
485 for(oti
= 0; oti
< ss
->orbit_type_count
; ++oti
)
486 if (orbit_types_equal(&ot_current
, &(ss
->orbit_types
[oti
]))) break; // HIT, orbit type already exists
487 assert(0 == sym
->order
% ot_current
.size
);
488 if (oti
== ss
->orbit_type_count
) // MISS, add the new orbit type
489 add_orbit_type(ss
, &ot_current
);
491 // Walk through the orbit again and set up the orbit info of the particles
492 for (qpms_ss_orbit_pi_t opi
= 0; opi
< ot_current
.size
; ++opi
) {
493 const qpms_ss_pi_t pi_opi
= current_orbit
[opi
];
494 ss
->p_orbitinfo
[pi_opi
].t
= oti
;
495 ss
->p_orbitinfo
[pi_opi
].p
= opi
;
496 ss
->p_orbitinfo
[pi_opi
].osn
= ss
->orbit_types
[oti
].instance_count
;
498 ss
->orbit_types
[oti
].instance_count
++;
501 // Possibly free some space using the new ss->p_count instead of (old) ss->p_count*sym->order
502 QPMS_CRASHING_REALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* ss
->p_count
);
503 QPMS_CRASHING_REALLOC(ss
->p
, sizeof(qpms_particle_tid_t
) * ss
->p_count
);
504 QPMS_CRASHING_REALLOC(ss
->p_orbitinfo
, sizeof(qpms_ss_particle_orbitinfo_t
)*ss
->p_count
);
505 ss
->p_capacity
= ss
->p_count
;
507 { // Reallocate the orbit type data space and update the pointers if needed.
508 size_t otspace_sz
= ss
->otspace_end
- ss
->otspace
;
509 char *old_otspace
= ss
->otspace
;
510 QPMS_CRASHING_REALLOC(ss
->otspace
, otspace_sz
);
511 ptrdiff_t shift
= ss
->otspace
- old_otspace
;
513 for (size_t oi
= 0; oi
< ss
->orbit_type_count
; ++oi
) {
514 ss
->orbit_types
[oi
].action
= (void *)(((char *) (ss
->orbit_types
[oi
].action
)) + shift
);
515 ss
->orbit_types
[oi
].tmatrices
= (void *)(((char *) (ss
->orbit_types
[oi
].tmatrices
)) + shift
);
516 ss
->orbit_types
[oi
].irbase_sizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_sizes
)) + shift
);
517 ss
->orbit_types
[oi
].irbase_cumsizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_cumsizes
)) + shift
);
518 ss
->orbit_types
[oi
].irbase_offsets
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_offsets
)) + shift
);
519 ss
->orbit_types
[oi
].irbases
= (void *)(((char *) (ss
->orbit_types
[oi
].irbases
)) + shift
);
521 ss
->otspace_end
+= shift
;
525 // Set ss->fecv_size and ss->fecv_pstarts
527 QPMS_CRASHING_MALLOC(ss
->fecv_pstarts
, ss
->p_count
* sizeof(size_t));
528 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
529 ss
->fecv_pstarts
[pi
] = ss
->fecv_size
;
530 ss
->fecv_size
+= ssw
->tm
[ss
->p
[pi
].tmatrix_id
]->spec
->n
; // That's a lot of dereferencing!
533 QPMS_CRASHING_MALLOC(ss
->saecv_sizes
, sizeof(size_t) * sym
->nirreps
);
534 QPMS_CRASHING_MALLOC(ss
->saecv_ot_offsets
, sizeof(size_t) * sym
->nirreps
* ss
->orbit_type_count
);
535 for(qpms_iri_t iri
= 0; iri
< sym
->nirreps
; ++iri
) {
536 ss
->saecv_sizes
[iri
] = 0;
537 for(qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
538 ss
->saecv_ot_offsets
[iri
* ss
->orbit_type_count
+ oti
] = ss
->saecv_sizes
[iri
];
539 const qpms_ss_orbit_type_t
*ot
= &(ss
->orbit_types
[oti
]);
540 ss
->saecv_sizes
[iri
] += ot
->instance_count
* ot
->irbase_sizes
[iri
];
544 qpms_ss_pi_t p_ot_cumsum
= 0;
545 for (qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
546 qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
547 ot
->p_offset
= p_ot_cumsum
;
548 p_ot_cumsum
+= ot
->size
* ot
->instance_count
;
551 // Set ss->p_by_orbit[]
552 QPMS_CRASHING_MALLOC(ss
->p_by_orbit
, sizeof(qpms_ss_pi_t
) * ss
->p_count
);
553 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
554 const qpms_ss_particle_orbitinfo_t
*oi
= ss
->p_orbitinfo
+ pi
;
555 const qpms_ss_oti_t oti
= oi
->t
;
556 const qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
557 ss
->p_by_orbit
[ot
->p_offset
+ ot
->size
* oi
->osn
+ oi
->p
] = pi
;
560 ss
->c
= qpms_trans_calculator_init(lMax
, normalisation
);
566 void qpms_scatsys_free(qpms_scatsys_t
*ss
) {
569 for(qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
570 qpms_tmatrix_operation_clear(&ss
->tm
[tmi
].op
);
574 free(ss
->fecv_pstarts
);
575 free(ss
->tm_sym_map
);
578 free(ss
->p_orbitinfo
);
579 free(ss
->orbit_types
);
580 free(ss
->saecv_ot_offsets
);
581 free(ss
->saecv_sizes
);
582 free(ss
->p_by_orbit
);
583 qpms_trans_calculator_free(ss
->c
);
588 void qpms_scatsys_at_omega_refill(qpms_scatsys_at_omega_t
*ssw
,
589 complex double omega
) {
590 const qpms_scatsys_t
* const ss
= ssw
->ss
;
592 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
593 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
594 qpms_tmatrix_t
**tmatrices_preop
;
595 QPMS_CRASHING_CALLOC(tmatrices_preop
, ss
->tmg_count
, sizeof(*tmatrices_preop
));
596 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
597 tmatrices_preop
[tmgi
] = qpms_tmatrix_init_from_function(ss
->tmg
[tmgi
], omega
);
598 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
599 qpms_tmatrix_apply_operation_replace(ssw
->tm
[tmi
], &ss
->tm
[tmi
].op
,
600 tmatrices_preop
[ss
->tm
[tmi
].tmgi
]);
601 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
602 qpms_tmatrix_free(tmatrices_preop
[tmgi
]);
603 free(tmatrices_preop
);
606 qpms_scatsys_at_omega_t
*qpms_scatsys_at_omega(const qpms_scatsys_t
*ss
,
607 complex double omega
) {
609 qpms_scatsys_at_omega_t
*ssw
;
610 QPMS_CRASHING_MALLOC(ssw
, sizeof(*ssw
));
613 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
614 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
615 QPMS_CRASHING_CALLOC(ssw
->tm
, ss
->tm_count
, sizeof(*ssw
->tm
));
616 qpms_tmatrix_t
**tmatrices_preop
;
617 QPMS_CRASHING_CALLOC(tmatrices_preop
, ss
->tmg_count
, sizeof(*tmatrices_preop
));
618 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
619 tmatrices_preop
[tmgi
] = qpms_tmatrix_init_from_function(ss
->tmg
[tmgi
], omega
);
620 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
) {
621 ssw
->tm
[tmi
] = qpms_tmatrix_apply_operation(&ss
->tm
[tmi
].op
,
622 tmatrices_preop
[ss
->tm
[tmi
].tmgi
]); //<- main difference to .._refill()
623 QPMS_ENSURE(ssw
->tm
[tmi
],
624 "Got NULL pointer from qpms_tmatrix_apply_operation");
626 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
627 qpms_tmatrix_free(tmatrices_preop
[tmgi
]);
628 free(tmatrices_preop
);
632 void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t
*ssw
) {
635 for(qpms_ss_tmi_t i
= 0; i
< ssw
->ss
->tm_count
; ++i
)
636 qpms_tmatrix_free(ssw
->tm
[i
]);
642 // (copypasta from symmetries.c)
643 // TODO at some point, maybe support also other norms.
644 // (perhaps just use qpms_normalisation_t_factor() at the right places)
645 static inline void check_norm_compat(const qpms_vswf_set_spec_t
*s
)
647 switch (s
->norm
& QPMS_NORMALISATION_NORM_BITS
) {
648 case QPMS_NORMALISATION_NORM_POWER
:
650 case QPMS_NORMALISATION_NORM_SPHARM
:
653 QPMS_WTF
; // Only SPHARM and POWER norms are supported right now.
657 complex double *qpms_orbit_action_matrix(complex double *target
,
658 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
659 const qpms_finite_group_t
*sym
, const qpms_gmi_t g
) {
660 assert(sym
); assert(g
< sym
->order
);
662 assert(ot
); assert(ot
->size
> 0);
663 // check_norm_compat(bspec); not needed here, the qpms_irot3_uvswfi_dense should complain if necessary
664 const size_t n
= bspec
->n
;
665 const qpms_gmi_t N
= ot
->size
;
667 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
668 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
669 complex double tmp
[n
][n
]; // this is the 'per-particle' action
670 qpms_irot3_uvswfi_dense(tmp
[0], bspec
, sym
->rep3d
[g
]);
671 for(qpms_ss_orbit_pi_t Col
= 0; Col
< ot
->size
; ++Col
) {
672 // Row is the 'destination' of the symmetry operation, Col is the 'source'
673 const qpms_ss_orbit_pi_t Row
= ot
->action
[sym
->order
* Col
+ g
];
674 for(size_t row
= 0; row
< bspec
->n
; ++row
)
675 for(size_t col
= 0; col
< bspec
->n
; ++col
)
676 target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
] = conj(tmp
[row
][col
]); //CHECKCONJ
678 #ifdef DUMP_ACTIONMATRIX
679 fprintf(stderr
,"%d: %s\n",
680 (int) g
, (sym
->permrep
&& sym
->permrep
[g
])?
681 sym
->permrep
[g
] : "");
682 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
683 fprintf(stderr
, "--------------------------\n");
684 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
685 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
686 fprintf(stderr
, "| ");
687 for (size_t col
= 0; col
< bspec
->n
; ++col
)
688 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
690 fprintf(stderr
, "|\n");
693 fprintf(stderr
, "-------------------------------\n\n");
699 complex double *qpms_orbit_irrep_projector_matrix(complex double *target
,
700 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
701 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
704 assert(ot
); assert(ot
->size
> 0);
705 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
706 // check_norm_compat(bspec); // probably not needed here, let the called functions complain if necessary, but CHEKME
707 const size_t n
= bspec
->n
;
708 const qpms_gmi_t N
= ot
->size
;
710 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
711 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
712 // Workspace for the orbit group action matrices
713 complex double *tmp
= malloc(n
*n
*N
*N
*sizeof(complex double));
714 const int d
= sym
->irreps
[iri
].dim
;
715 double prefac
= d
/ (double) sym
->order
;
716 for(int partner
= 0; partner
< d
; ++partner
) {
717 for(qpms_gmi_t g
= 0; g
< sym
->order
; ++g
) {
718 // We use the diagonal elements of D_g
719 complex double D_g_conj
= sym
->irreps
[iri
].m
[g
*d
*d
+ partner
*d
+ partner
];
720 #ifdef DUMP_ACTIONMATRIX
721 fprintf(stderr
,"(factor %+g%+gj) ", creal(D_g_conj
), cimag(D_g_conj
));
723 qpms_orbit_action_matrix(tmp
, ot
, bspec
, sym
, g
);
725 for(size_t i
= 0; i
< n
*n
*N
*N
; ++i
)
726 target
[i
] += prefac
* D_g_conj
* tmp
[i
];
730 #ifdef DUMP_PROJECTORMATRIX
731 fprintf(stderr
,"Projector %d (%s):\n", (int) iri
,
732 sym
->irreps
[iri
].name
?sym
->irreps
[iri
].name
:"");
733 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
734 fprintf(stderr
, "--------------------------\n");
735 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
736 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
737 fprintf(stderr
, "| ");
738 for (size_t col
= 0; col
< bspec
->n
; ++col
)
739 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
741 fprintf(stderr
, "|\n");
744 fprintf(stderr
, "-------------------------------\n\n");
749 #define SVD_ATOL 1e-8
751 complex double *qpms_orbit_irrep_basis(size_t *basis_size
,
752 complex double *target
,
753 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
754 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
757 assert(ot
); assert(ot
->size
> 0);
758 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
759 check_norm_compat(bspec
); // Here I'm not sure; CHECKME
760 const size_t n
= bspec
->n
;
761 const qpms_gmi_t N
= ot
->size
;
762 const bool newtarget
= (target
== NULL
);
764 QPMS_CRASHING_MALLOC(target
,n
*n
*N
*N
*sizeof(complex double));
765 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
767 // Get the projector (also workspace for right sg. vect.)
768 complex double *projector
= qpms_orbit_irrep_projector_matrix(NULL
,
769 ot
, bspec
, sym
, iri
);
770 QPMS_ENSURE(projector
!= NULL
, "got NULL from qpms_orbit_irrep_projector_matrix()");
771 // Workspace for the right singular vectors.
772 complex double *V_H
; QPMS_CRASHING_MALLOC(V_H
, n
*n
*N
*N
*sizeof(complex double));
773 // THIS SHOULD NOT BE NECESSARY
774 complex double *U
; QPMS_CRASHING_MALLOC(U
, n
*n
*N
*N
*sizeof(complex double));
775 double *s
; QPMS_CRASHING_MALLOC(s
, n
*N
*sizeof(double));
777 QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR
,
778 'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
779 n
*N
/* m */, n
*N
/* n */, projector
/* a */, n
*N
/* lda */,
780 s
/* s */, U
/* u */, n
*N
/* ldu, irrelev. */, V_H
/* vt */,
784 for(bs
= 0; bs
< n
*N
; ++bs
) {
785 QPMS_ENSURE(s
[bs
] <= 1 + SVD_ATOL
, "%zd. SV too large: %.16lf", bs
, s
[bs
]);
786 QPMS_ENSURE(!(s
[bs
] > SVD_ATOL
&& fabs(1-s
[bs
]) > SVD_ATOL
),
787 "%zd. SV in the 'wrong' interval: %.16lf", bs
, s
[bs
]);
788 if(s
[bs
] < SVD_ATOL
) break;
791 memcpy(target
, V_H
, bs
*N
*n
*sizeof(complex double));
792 if(newtarget
) QPMS_CRASHING_REALLOC(target
, bs
*N
*n
*sizeof(complex double));
793 if(basis_size
) *basis_size
= bs
;
802 complex double *qpms_scatsys_irrep_transform_matrix(complex double *U
,
803 const qpms_scatsys_t
*ss
, qpms_iri_t iri
) {
804 const size_t packedlen
= ss
->saecv_sizes
[iri
];
805 const size_t full_len
= ss
->fecv_size
;
807 QPMS_CRASHING_MALLOC(U
,full_len
* packedlen
* sizeof(complex double));
808 memset(U
, 0, full_len
* packedlen
* sizeof(complex double));
810 size_t fullvec_offset
= 0;
812 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
813 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
814 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
815 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
816 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
817 // This is where the particle's orbit starts in the "packed" vector:
818 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
819 + osn
* ot
->irbase_sizes
[iri
];
820 // Orbit coeff vector's full size:
821 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
822 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
823 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
824 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
826 for (size_t prow
= 0; prow
< orbit_packedsize
; ++prow
)
827 for (size_t pcol
= 0; pcol
< ot
->bspecn
; ++pcol
)
828 U
[full_len
* (packed_orbit_offset
+ prow
) + (fullvec_offset
+ pcol
)]
829 = om
[orbit_fullsize
* prow
+ (opi
* ot
->bspecn
+ pcol
)];
830 fullvec_offset
+= ot
->bspecn
;
836 complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed
,
837 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
839 const size_t packedlen
= ss
->saecv_sizes
[iri
];
840 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
841 return target_packed
;
842 const size_t full_len
= ss
->fecv_size
;
843 if (target_packed
== NULL
)
844 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
845 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
847 // Workspace for the intermediate matrix
849 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
851 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
853 const complex double one
= 1, zero
= 0;
857 CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
858 full_len
/*M*/, packedlen
/*N*/, full_len
/*K*/,
859 &one
/*alpha*/, orig_full
/*A*/, full_len
/*ldA*/,
860 U
/*B*/, full_len
/*ldB*/,
861 &zero
/*beta*/, tmp
/*C*/, packedlen
/*LDC*/);
863 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
864 packedlen
/*M*/, packedlen
/*N*/, full_len
/*K*/,
865 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
866 tmp
/*B*/, packedlen
/*ldB*/, &zero
/*beta*/,
867 target_packed
/*C*/, packedlen
/*ldC*/);
871 return target_packed
;
874 /// Transforms a big "packed" matrix into the full basis (trivial implementation).
875 complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full
,
876 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
877 qpms_iri_t iri
, bool add
){
878 const size_t packedlen
= ss
->saecv_sizes
[iri
];
879 const size_t full_len
= ss
->fecv_size
;
880 if (target_full
== NULL
)
881 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
882 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
884 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
886 // Workspace for the intermediate matrix result
888 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
890 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
892 const complex double one
= 1, zero
= 0;
896 CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
897 packedlen
/*M*/, full_len
/*N*/, packedlen
/*K*/,
898 &one
/*alpha*/, orig_packed
/*A*/, packedlen
/*ldA*/,
899 U
/*B*/, full_len
/*ldB*/,
900 &zero
/*beta*/, tmp
/*C*/, full_len
/*LDC*/);
902 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
903 full_len
/*M*/, full_len
/*N*/, packedlen
/*K*/,
904 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
905 tmp
/*B*/, full_len
/*ldB*/, &one
/*beta*/,
906 target_full
/*C*/, full_len
/*ldC*/);
912 complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed
,
913 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
915 const size_t packedlen
= ss
->saecv_sizes
[iri
];
916 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
917 return target_packed
;
918 const size_t full_len
= ss
->fecv_size
;
919 if (target_packed
== NULL
)
920 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
921 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
923 // Workspace for the intermediate particle-orbit matrix result
925 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
927 const complex double one
= 1, zero
= 0;
929 size_t fullvec_offsetR
= 0;
930 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
931 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
932 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
933 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
934 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
935 // This is where the particle's orbit starts in the "packed" vector:
936 const size_t packed_orbit_offsetR
=
937 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
938 + osnR
* otR
->irbase_sizes
[iri
];
939 // Orbit coeff vector's full size:
940 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
941 const size_t particle_fullsizeR
= otR
->bspecn
;
942 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
943 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
944 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
946 size_t fullvec_offsetC
= 0;
947 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
948 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
949 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
950 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
951 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
952 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
953 // This is where the particle's orbit starts in the "packed" vector:
954 const size_t packed_orbit_offsetC
=
955 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
956 + osnC
* otC
->irbase_sizes
[iri
];
957 // Orbit coeff vector's full size:
958 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
959 const size_t particle_fullsizeC
= otC
->bspecn
;
960 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
961 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
962 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
964 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
965 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
966 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
967 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
968 &one
/*alpha*/, orig_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*A*/,
970 omC
+ opiC
*particle_fullsizeC
/*B*/,
971 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
972 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
974 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
975 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
976 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
977 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
978 orbit_fullsizeR
/*ldA*/,
979 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
980 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
983 fullvec_offsetC
+= otC
->bspecn
;
986 fullvec_offsetR
+= otR
->bspecn
;
989 return target_packed
;
993 /// Transforms a big "packed" matrix into the full basis.
995 complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full
,
996 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
997 qpms_iri_t iri
, bool add
){
998 const size_t packedlen
= ss
->saecv_sizes
[iri
];
999 const size_t full_len
= ss
->fecv_size
;
1000 if (target_full
== NULL
)
1001 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
1002 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
1004 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
1006 // Workspace for the intermediate particle-orbit matrix result
1007 complex double *tmp
;
1008 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1010 const complex double one
= 1, zero
= 0;
1012 size_t fullvec_offsetR
= 0;
1013 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
1014 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
1015 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1016 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
1017 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
1018 // This is where the particle's orbit starts in the "packed" vector:
1019 const size_t packed_orbit_offsetR
=
1020 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1021 + osnR
* otR
->irbase_sizes
[iri
];
1022 // Orbit coeff vector's full size:
1023 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1024 const size_t particle_fullsizeR
= otR
->bspecn
;
1025 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1026 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1027 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1029 size_t fullvec_offsetC
= 0;
1030 if (orbit_packedsizeR
) // avoid crash on empty irrep
1031 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1032 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1033 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1034 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1035 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1036 // This is where the particle's orbit starts in the "packed" vector:
1037 const size_t packed_orbit_offsetC
=
1038 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1039 + osnC
* otC
->irbase_sizes
[iri
];
1040 // Orbit coeff vector's full size:
1041 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1042 const size_t particle_fullsizeC
= otC
->bspecn
;
1043 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1044 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1045 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1046 if (orbit_packedsizeC
) { // avoid crash on empty irrep
1048 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC]
1049 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1050 orbit_packedsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeC
/*K*/,
1051 &one
/*alpha*/, orig_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*A*/,
1053 omC
+ opiC
*particle_fullsizeC
/*B*/,
1054 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1055 tmp
/*C*/, particle_fullsizeC
/*LDC*/);
1057 // target[oiR|piR,oiC|piC] += U*[...] tmp[...]
1058 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
1059 particle_fullsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeR
/*K*/,
1060 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
1061 orbit_fullsizeR
/*ldA*/,
1062 tmp
/*B*/, particle_fullsizeC
/*ldB*/, &one
/*beta*/,
1063 target_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*C*/,
1066 fullvec_offsetC
+= otC
->bspecn
;
1068 fullvec_offsetR
+= otR
->bspecn
;
1075 /// Projects a "big" vector onto an irrep.
1077 complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed
,
1078 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
1079 const qpms_iri_t iri
) {
1080 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1081 if (!packedlen
) return target_packed
; // Empty irrep
1082 if (target_packed
== NULL
)
1083 QPMS_CRASHING_MALLOC(target_packed
, packedlen
*sizeof(complex double));
1084 memset(target_packed
, 0, packedlen
*sizeof(complex double));
1086 const complex double one
= 1;
1088 size_t fullvec_offset
= 0;
1089 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1090 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
1091 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
1092 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
1093 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
1094 // This is where the particle's orbit starts in the "packed" vector:
1095 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
1096 + osn
* ot
->irbase_sizes
[iri
];
1097 // Orbit coeff vector's full size:
1098 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
1099 const size_t particle_fullsize
= ot
->bspecn
;
1100 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
1101 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1102 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
1103 if (orbit_packedsize
) // avoid crash on empty irrep
1104 cblas_zgemv(CblasRowMajor
/*order*/, CblasNoTrans
/*transA*/,
1105 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/,
1106 om
+ opi
*particle_fullsize
/*A*/, orbit_fullsize
/*lda*/,
1107 orig_full
+fullvec_offset
/*X*/, 1/*incX*/,
1108 &one
/*beta*/, target_packed
+packed_orbit_offset
/*Y*/, 1/*incY*/);
1109 fullvec_offset
+= ot
->bspecn
;
1111 return target_packed
;
1114 /// Transforms a big "packed" vector into the full basis.
1116 complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full
,
1117 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
1118 const qpms_iri_t iri
, bool add
) {
1119 const size_t full_len
= ss
->fecv_size
;
1120 if (target_full
== NULL
)
1121 QPMS_CRASHING_MALLOC(target_full
, full_len
*sizeof(complex double));
1122 if (!add
) memset(target_full
, 0, full_len
*sizeof(complex double));
1124 const complex double one
= 1;
1125 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1126 if (!packedlen
) return target_full
; // Completely empty irrep
1128 size_t fullvec_offset
= 0;
1129 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1130 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
1131 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
1132 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
1133 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
1134 // This is where the particle's orbit starts in the "packed" vector:
1135 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
1136 + osn
* ot
->irbase_sizes
[iri
];
1137 // Orbit coeff vector's full size:
1138 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
1139 const size_t particle_fullsize
= ot
->bspecn
;
1140 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
1141 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1142 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
1144 if (orbit_packedsize
) // empty irrep, avoid zgemv crashing.
1145 cblas_zgemv(CblasRowMajor
/*order*/, CblasConjTrans
/*transA*/,
1146 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/, om
+ opi
*particle_fullsize
/*A*/,
1147 orbit_fullsize
/*lda*/, orig_packed
+packed_orbit_offset
/*X*/, 1/*incX*/, &one
/*beta*/,
1148 target_full
+fullvec_offset
/*Y*/, 1/*incY*/);
1150 fullvec_offset
+= ot
->bspecn
;
1155 complex double *qpms_scatsys_build_translation_matrix_full(
1156 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1157 complex double *target
,
1158 const qpms_scatsys_t
*ss
,
1159 complex double k
///< Wave number to use in the translation matrix.
1162 return qpms_scatsys_build_translation_matrix_e_full(
1163 target
, ss
, k
, QPMS_HANKEL_PLUS
);
1166 complex double *qpms_scatsyswk_build_translation_matrix_full(
1167 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1168 complex double *target
,
1169 const qpms_scatsys_at_omega_k_t
*sswk
1172 const qpms_scatsys_at_omega_t
*ssw
= sswk
->ssw
;
1173 const complex double wavenumber
= ssw
->wavenumber
;
1174 const qpms_scatsys_t
*ss
= ssw
->ss
;
1175 qpms_ss_ensure_periodic(ss
);
1176 const cart3_t k_cart3
= cart3_from_double_array(sswk
->k
);
1177 return qpms_scatsys_periodic_build_translation_matrix_full(target
, ss
, wavenumber
, &k_cart3
, sswk
->eta
);
1180 complex double *qpms_scatsys_build_translation_matrix_e_full(
1181 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1182 complex double *target
,
1183 const qpms_scatsys_t
*ss
,
1184 complex double k
, ///< Wave number to use in the translation matrix.
1185 qpms_bessel_t J
///< Bessel function type.
1188 qpms_ss_ensure_nonperiodic(ss
);
1189 const size_t full_len
= ss
->fecv_size
;
1191 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1192 memset(target
, 0, SQ(full_len
) * sizeof(complex double)); //unnecessary?
1193 { // Non-diagonal part; M[piR, piC] = T[piR] S(piR<-piC)
1194 size_t fullvec_offsetR
= 0;
1195 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1196 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, piR
);
1197 const cart3_t posR
= ss
->p
[piR
].pos
;
1198 size_t fullvec_offsetC
= 0;
1199 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1200 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1201 if(piC
!= piR
) { // The diagonal will be dealt with later.
1202 const cart3_t posC
= ss
->p
[piC
].pos
;
1203 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1204 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
,
1205 bspecR
, full_len
, bspecC
, 1,
1208 fullvec_offsetC
+= bspecC
->n
;
1210 assert(fullvec_offsetC
== full_len
);
1211 fullvec_offsetR
+= bspecR
->n
;
1213 assert(fullvec_offsetR
== full_len
);
1219 static inline int qpms_ss_ppair_W32xy(const qpms_scatsys_t
*ss
,
1220 qpms_ss_pi_t pdest
, qpms_ss_pi_t psrc
, complex double wavenumber
, const cart2_t kvector
,
1221 complex double *target
, const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
1222 qpms_ewald_part parts
, double eta
) {
1223 const qpms_vswf_set_spec_t
*srcspec
= qpms_ss_bspec_pi(ss
, psrc
);
1224 const qpms_vswf_set_spec_t
*destspec
= qpms_ss_bspec_pi(ss
, pdest
);
1226 // This might be a bit arbitrary, roughly "copied" from Unitcell constructor. TODO review.
1227 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
1228 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
1230 return qpms_trans_calculator_get_trans_array_e32_e(ss
->c
,
1231 target
, NULL
/*err*/, destspec
, deststride
, srcspec
, srcstride
,
1233 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
1235 cart3_substract(ss
->p
[pdest
].pos
, ss
->p
[psrc
].pos
),
1239 static inline int qpms_ss_ppair_W(const qpms_scatsys_t
*ss
,
1240 qpms_ss_pi_t pdest
, qpms_ss_pi_t psrc
, complex double wavenumber
, const double wavevector
[],
1241 complex double *target
, const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
1242 qpms_ewald_part parts
, double eta
) {
1243 if(ss
->lattice_dimension
== 2 && // Currently, we can only the xy-plane
1244 !ss
->per
.lattice_basis
[0].z
&& !ss
->per
.lattice_basis
[1].z
&&
1246 return qpms_ss_ppair_W32xy(ss
, pdest
, psrc
, wavenumber
, cart2_from_double_array(wavevector
),
1247 target
, deststride
, srcstride
, parts
, eta
);
1249 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1252 complex double *qpms_scatsys_periodic_build_translation_matrix_full(
1253 complex double *target
, const qpms_scatsys_t
*ss
,
1254 complex double wavenumber
, const cart3_t
*wavevector
, double eta
) {
1256 qpms_ss_ensure_periodic(ss
);
1257 if (eta
== 0 || isnan(eta
)) {
1259 cart3_to_double_array(tmp
, *wavevector
);
1260 eta
= qpms_ss_adjusted_eta(ss
, wavenumber
, tmp
);
1263 const size_t full_len
= ss
->fecv_size
;
1265 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1266 const ptrdiff_t deststride
= ss
->fecv_size
, srcstride
= 1;
1267 // We have some limitations in the current implementation
1268 if(ss
->lattice_dimension
== 2 && // Currently, we can only the xy-plane
1269 !ss
->per
.lattice_basis
[0].z
&& !ss
->per
.lattice_basis
[1].z
&&
1271 for (qpms_ss_pi_t pd
= 0; pd
< ss
->p_count
; ++pd
)
1272 for (qpms_ss_pi_t ps
= 0; ps
< ss
->p_count
; ++ps
) {
1273 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W32xy(ss
, pd
, ps
, wavenumber
, cart3xy2cart2(*wavevector
),
1274 target
+ deststride
* ss
->fecv_pstarts
[pd
] + srcstride
* ss
->fecv_pstarts
[ps
],
1275 deststride
, srcstride
, QPMS_EWALD_FULL
, eta
));
1278 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1282 // Common implementation of qpms_scatsysw[k]_build_modeproblem_matrix_full
1283 static inline complex double *qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1284 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1285 complex double *target
,
1286 const qpms_scatsys_at_omega_t
*ssw
,
1287 const double k
[], // NULL if non-periodic
1288 const double eta
// ignored if non-periodic
1291 const complex double wavenumber
= ssw
->wavenumber
;
1292 const qpms_scatsys_t
*ss
= ssw
->ss
;
1293 const size_t full_len
= ss
->fecv_size
;
1295 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1296 complex double *tmp
; // translation matrix, S or W
1297 QPMS_CRASHING_MALLOC(tmp
, SQ(ss
->max_bspecn
) * sizeof(complex double));
1298 memset(target
, 0, SQ(full_len
) * sizeof(complex double));
1299 const complex double zero
= 0, minusone
= -1;
1300 { // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
1301 size_t fullvec_offsetR
= 0;
1302 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1303 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1304 const cart3_t posR
= ss
->p
[piR
].pos
;
1305 size_t fullvec_offsetC
= 0;
1306 // dest particle T-matrix
1307 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1308 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1309 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1310 if (k
== NULL
) { // non-periodic case
1311 if(piC
!= piR
) { // No "self-interaction" in non-periodic case
1312 const cart3_t posC
= ss
->p
[piC
].pos
;
1313 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1314 tmp
, // tmp is S(piR<-piC)
1315 bspecR
, bspecC
->n
, bspecC
, 1,
1316 wavenumber
, posR
, posC
, QPMS_HANKEL_PLUS
));
1318 } else { // periodic case
1319 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W(ss
, piR
, piC
, wavenumber
, k
,
1320 tmp
/*target*/, bspecC
->n
/*deststride*/, 1 /*srcstride*/,
1321 QPMS_EWALD_FULL
, eta
));
1324 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1325 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1326 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1327 tmp
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1328 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
/*c*/,
1331 fullvec_offsetC
+= bspecC
->n
;
1333 fullvec_offsetR
+= bspecR
->n
;
1337 // Add the identity, diagonal part M[pi,pi] += 1
1338 for (size_t i
= 0; i
< full_len
; ++i
) target
[full_len
* i
+ i
] += 1;
1344 complex double *qpms_scatsysw_build_modeproblem_matrix_full(
1345 complex double *target
, const qpms_scatsys_at_omega_t
*ssw
) {
1346 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_build_modeproblem_matrix_full()");
1347 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1348 target
, ssw
, NULL
, NAN
);
1351 complex double *qpms_scatsyswk_build_modeproblem_matrix_full(
1352 complex double *target
, const qpms_scatsys_at_omega_k_t
*sswk
)
1354 qpms_ss_ensure_periodic_a(sswk
->ssw
->ss
, "qpms_scatsysw_build_modeproblem_matrix_full()");
1355 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(target
, sswk
->ssw
, sswk
->k
, sswk
->eta
);
1359 // Serial reference implementation.
1360 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_serial(
1361 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1362 complex double *target_packed
,
1363 const qpms_scatsys_at_omega_t
*ssw
,
1367 const qpms_scatsys_t
*ss
= ssw
->ss
;
1368 qpms_ss_ensure_nonperiodic(ss
);
1369 const complex double k
= ssw
->wavenumber
;
1370 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1371 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1372 return target_packed
;
1373 if (target_packed
== NULL
)
1374 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1375 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1377 // some of the following workspaces are probably redundant; TODO optimize later.
1379 // workspaces for the uncompressed particle<-particle tranlation matrix block
1380 // and the result of multiplying with a T-matrix (times -1)
1381 complex double *Sblock
, *TSblock
;
1382 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1383 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1385 // Workspace for the intermediate particle-orbit matrix result
1386 complex double *tmp
;
1387 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1389 const complex double one
= 1, zero
= 0, minusone
= -1;
1391 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
1392 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
1393 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1394 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
1395 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
1396 // This is where the particle's orbit starts in the "packed" vector:
1397 const size_t packed_orbit_offsetR
=
1398 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1399 + osnR
* otR
->irbase_sizes
[iri
];
1400 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1401 // Orbit coeff vector's full size:
1402 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1403 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1404 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1405 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1406 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1407 const cart3_t posR
= ss
->p
[piR
].pos
;
1408 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1409 // dest particle T-matrix
1410 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1411 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1412 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1413 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1414 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1415 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1416 // This is where the particle's orbit starts in the "packed" vector:
1417 const size_t packed_orbit_offsetC
=
1418 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1419 + osnC
* otC
->irbase_sizes
[iri
];
1420 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1421 // Orbit coeff vector's full size:
1422 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1423 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1424 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1425 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1426 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1428 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1429 if(piC
!= piR
) { // non-diagonal, calculate TS
1430 const cart3_t posC
= ss
->p
[piC
].pos
;
1431 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1432 Sblock
, // Sblock is S(piR->piC)
1433 bspecR
, bspecC
->n
, bspecC
, 1,
1434 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1436 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1437 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1438 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1439 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1440 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1441 } else { // diagonal, fill with diagonal +1
1442 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1443 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1444 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1447 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1448 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1449 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1450 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1451 omC
+ opiC
*particle_fullsizeC
/*B*/,
1452 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1453 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1455 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1456 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1457 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1458 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1459 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1460 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1469 return target_packed
;
1472 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
1473 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1474 complex double *target_packed
,
1475 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1478 const qpms_scatsys_t
*ss
= ssw
->ss
;
1479 qpms_ss_ensure_nonperiodic(ss
);
1480 const complex double k
= ssw
->wavenumber
;
1481 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1482 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1483 return target_packed
;
1484 if (target_packed
== NULL
)
1485 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1486 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1488 // some of the following workspaces are probably redundant; TODO optimize later.
1490 // workspaces for the uncompressed particle<-particle tranlation matrix block
1491 // and the result of multiplying with a T-matrix (times -1)
1492 complex double *Sblock
, *TSblock
;
1493 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1494 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1496 // Workspace for the intermediate particle-orbit matrix result
1497 complex double *tmp
;
1498 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1500 const complex double one
= 1, zero
= 0, minusone
= -1;
1502 for(qpms_ss_pi_t opistartR
= 0; opistartR
< ss
->p_count
;
1503 opistartR
+= ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
//orbit_p_countR; might write a while() instead
1505 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1506 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1507 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1508 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1509 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1510 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1512 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1513 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1514 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1515 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1516 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1517 // Orbit coeff vector's full size:
1518 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1519 // This is where the orbit starts in the "packed" vector:
1520 const size_t packed_orbit_offsetR
=
1521 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1522 + osnR
* otR
->irbase_sizes
[iri
];
1523 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1524 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1525 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1526 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1527 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1528 const cart3_t posR
= ss
->p
[piR
].pos
;
1529 // dest particle T-matrix
1530 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1531 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1532 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1533 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1534 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1535 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1536 // This is where the particle's orbit starts in the "packed" vector:
1537 const size_t packed_orbit_offsetC
=
1538 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1539 + osnC
* otC
->irbase_sizes
[iri
];
1540 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1541 // Orbit coeff vector's full size:
1542 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1543 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1544 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1545 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1546 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1548 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1549 if(piC
!= piR
) { // non-diagonal, calculate TS
1550 const cart3_t posC
= ss
->p
[piC
].pos
;
1551 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1552 Sblock
, // Sblock is S(piR->piC)
1553 bspecR
, bspecC
->n
, bspecC
, 1,
1554 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1556 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1557 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1558 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1559 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1560 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1561 } else { // diagonal, fill with diagonal +1
1562 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1563 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1564 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1567 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1568 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1569 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1570 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1571 omC
+ opiC
*particle_fullsizeC
/*B*/,
1572 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1573 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1575 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1576 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1577 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1578 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1579 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1580 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1590 return target_packed
;
1593 struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
{
1594 const qpms_scatsys_at_omega_t
*ssw
;
1595 qpms_ss_pi_t
*opistartR_ptr
;
1596 pthread_mutex_t
*opistartR_mutex
;
1598 complex double *target_packed
;
1601 static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg
)
1603 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1605 const qpms_scatsys_at_omega_t
*ssw
= a
->ssw
;
1606 const complex double k
= ssw
->wavenumber
;
1607 const qpms_scatsys_t
*ss
= ssw
->ss
;
1608 const qpms_iri_t iri
= a
->iri
;
1609 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1611 // some of the following workspaces are probably redundant; TODO optimize later.
1613 // workspaces for the uncompressed particle<-particle tranlation matrix block
1614 // and the result of multiplying with a T-matrix (times -1)
1615 complex double *Sblock
, *TSblock
;
1616 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1617 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1619 // Workspace for the intermediate particle-orbit matrix result
1620 complex double *tmp
;
1621 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1623 const complex double one
= 1, zero
= 0, minusone
= -1;
1626 // In the beginning, pick a target (row) orbit for this thread
1627 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1628 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1629 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1632 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1633 // Now increment it for another thread:
1634 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1635 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1637 // Orbit picked (defined by opistartR), do the work.
1638 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1639 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1640 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1641 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1642 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1643 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1645 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1646 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1647 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1648 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1649 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1650 // Orbit coeff vector's full size:
1651 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1652 // This is where the orbit starts in the "packed" vector:
1653 const size_t packed_orbit_offsetR
=
1654 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1655 + osnR
* otR
->irbase_sizes
[iri
];
1656 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1657 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1658 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1659 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1660 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1661 const cart3_t posR
= ss
->p
[piR
].pos
;
1662 // dest particle T-matrix
1663 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1664 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1665 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1666 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1667 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1668 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1669 // This is where the particle's orbit starts in the "packed" vector:
1670 const size_t packed_orbit_offsetC
=
1671 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1672 + osnC
* otC
->irbase_sizes
[iri
];
1673 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1674 // Orbit coeff vector's full size:
1675 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1676 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1677 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1678 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1679 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1681 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1682 if(piC
!= piR
) { // non-diagonal, calculate TS
1683 const cart3_t posC
= ss
->p
[piC
].pos
;
1684 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1685 Sblock
, // Sblock is S(piR->piC)
1686 bspecR
, bspecC
->n
, bspecC
, 1,
1687 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1689 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1690 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1691 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1692 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1693 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1694 } else { // diagonal, fill with diagonal +1
1695 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1696 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1697 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1700 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1701 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1702 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1703 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1704 omC
+ opiC
*particle_fullsizeC
/*B*/,
1705 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1706 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1708 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1709 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1710 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1711 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1712 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1713 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1729 // this differs from the ...build_modeproblem_matrix... only by the `J`
1730 // maybe I should use this one there as well to save lines... TODO
1731 struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
{
1732 const qpms_scatsys_t
*ss
;
1733 qpms_ss_pi_t
*opistartR_ptr
;
1734 pthread_mutex_t
*opistartR_mutex
;
1736 complex double *target_packed
;
1741 static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread(void *arg
)
1743 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1745 const qpms_scatsys_t
*ss
= a
->ss
;
1746 const qpms_iri_t iri
= a
->iri
;
1747 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1748 const qpms_bessel_t J
= a
->J
;
1750 // some of the following workspaces are probably redundant; TODO optimize later.
1752 // workspace for the uncompressed particle<-particle tranlation matrix block
1753 complex double *Sblock
;
1754 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1756 // Workspace for the intermediate particle-orbit matrix result
1757 complex double *tmp
;
1758 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1760 const complex double one
= 1, zero
= 0;
1763 // In the beginning, pick a target (row) orbit for this thread
1764 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1765 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1766 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1769 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1770 // Now increment it for another thread:
1771 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1772 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1774 // Orbit picked (defined by opistartR), do the work.
1775 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1776 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1777 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1778 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1779 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1780 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1782 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1783 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1784 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, orbitstartpiR
);
1785 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1786 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1787 // Orbit coeff vector's full size:
1788 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1789 // This is where the orbit starts in the "packed" vector:
1790 const size_t packed_orbit_offsetR
=
1791 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1792 + osnR
* otR
->irbase_sizes
[iri
];
1793 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1794 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1795 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1796 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1797 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1798 const cart3_t posR
= ss
->p
[piR
].pos
;
1799 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1800 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1801 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1802 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1803 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1804 // This is where the particle's orbit starts in the "packed" vector:
1805 const size_t packed_orbit_offsetC
=
1806 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1807 + osnC
* otC
->irbase_sizes
[iri
];
1808 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1809 // Orbit coeff vector's full size:
1810 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1811 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1812 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1813 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1814 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1816 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1817 // THIS IS THE MAIN PART DIFFERENT FROM ...modeproblem...() TODO unify
1818 // somehow to save lines
1819 if(piC
!= piR
) { // non-diagonal, calculate S
1820 const cart3_t posC
= ss
->p
[piC
].pos
;
1821 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1822 Sblock
, // Sblock is S(piR->piC)
1823 bspecR
, bspecC
->n
, bspecC
, 1,
1824 a
->k
, posR
, posC
, J
));
1825 } else { // diagonal, fill with zeros; TODO does this make sense?
1826 // would unit matrix be better? or unit only for QPMS_BESSEL_REGULAR?
1827 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1828 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1829 Sblock
[row
* bspecC
->n
+ col
] = 0; //(row == col)? 1 : 0;
1832 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1833 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1834 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1835 &one
/*alpha*/, Sblock
/*A*/, particle_fullsizeC
/*ldA*/,
1836 omC
+ opiC
*particle_fullsizeC
/*B*/,
1837 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1838 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1840 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1841 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1842 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1843 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1844 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1845 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1860 // Almost the same as ...build_modeproblem_matrix_...parallelR
1861 // --> TODO write this in a more generic way to save LoC.
1862 complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
1863 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1864 complex double *target_packed
,
1865 const qpms_scatsys_t
*ss
,
1867 const complex double k
,
1871 qpms_ss_ensure_nonperiodic(ss
);
1873 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1874 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1875 return target_packed
;
1876 if (target_packed
== NULL
)
1877 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1878 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1880 qpms_ss_pi_t opistartR
= 0;
1881 pthread_mutex_t opistartR_mutex
;
1882 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1883 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1884 arg
= {ss
, &opistartR
, &opistartR_mutex
, iri
, target_packed
, k
, J
};
1886 // FIXME THIS IS NOT PORTABLE:
1888 if (qpms_scatsystem_nthreads_override
> 0) {
1889 nthreads
= qpms_scatsystem_nthreads_override
;
1890 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1893 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1895 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1896 nthreads
, qpms_scatsystem_nthreads_default
);
1897 nthreads
= qpms_scatsystem_nthreads_default
;
1899 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1902 pthread_t thread_ids
[nthreads
];
1903 for(long thi
= 0; thi
< nthreads
; ++thi
)
1904 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1905 qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread
,
1907 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1909 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1912 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1913 return target_packed
;
1917 // Parallel implementation, now default
1918 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
1919 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1920 complex double *target_packed
,
1921 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1924 qpms_ss_ensure_nonperiodic(ssw
->ss
);
1925 const size_t packedlen
= ssw
->ss
->saecv_sizes
[iri
];
1926 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1927 return target_packed
;
1928 if (target_packed
== NULL
)
1929 QPMS_CRASHING_MALLOC(target_packed
,SQ(packedlen
)*sizeof(complex double));
1930 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1932 qpms_ss_pi_t opistartR
= 0;
1933 pthread_mutex_t opistartR_mutex
;
1934 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1935 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1936 arg
= {ssw
, &opistartR
, &opistartR_mutex
, iri
, target_packed
};
1938 // FIXME THIS IS NOT PORTABLE:
1940 if (qpms_scatsystem_nthreads_override
> 0) {
1941 nthreads
= qpms_scatsystem_nthreads_override
;
1942 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1945 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1947 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1948 nthreads
, qpms_scatsystem_nthreads_default
);
1949 nthreads
= qpms_scatsystem_nthreads_default
;
1951 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1954 pthread_t thread_ids
[nthreads
];
1955 for(long thi
= 0; thi
< nthreads
; ++thi
)
1956 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1957 qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread
,
1959 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1961 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1964 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1965 return target_packed
;
1969 complex double *qpms_scatsys_incident_field_vector_full(
1970 complex double *target_full
, const qpms_scatsys_t
*ss
,
1971 qpms_incfield_t f
, const void *args
, bool add
) {
1973 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
1974 sizeof(complex double));
1975 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1976 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
1977 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
1978 const cart3_t pos
= ss
->p
[pi
].pos
;
1979 QPMS_ENSURE_SUCCESS(f(ptarget
, bspec
, pos
, args
, add
));
1986 complex double *qpms_scatsys_incident_field_vector_irrep_packed(
1987 complex double *target_full
, const qpms_scatsys_t
*ss
,
1988 const qpms_iri_t iri
, qpms_incfield_t f
,
1989 const void *args
, bool add
) {
1995 complex double *qpms_scatsysw_apply_Tmatrices_full(
1996 complex double *target_full
, const complex double *inc_full
,
1997 const qpms_scatsys_at_omega_t
*ssw
) {
1999 const qpms_scatsys_t
*ss
= ssw
->ss
;
2000 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
2001 sizeof(complex double));
2002 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2003 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
2004 const complex double *psrc
= inc_full
+ ss
->fecv_pstarts
[pi
];
2005 // TODO check whether T-matrix is non-virtual after virtual t-matrices are implemented.
2006 const qpms_tmatrix_t
*T
= ssw
->tm
[ss
->p
[pi
].tmatrix_id
];
2007 qpms_apply_tmatrix(ptarget
, psrc
, T
);
2012 ccart3_t
qpms_scatsys_scattered_E(const qpms_scatsys_t
*ss
,
2014 const complex double k
,
2015 const complex double *cvf
,
2019 ccart3_t res
= {0,0,0};
2020 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2022 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2023 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2024 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2025 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2027 const csph_t kr
= sph_cscale(k
, cart2sph(
2028 cart3_substract(where
, particle_pos
)));
2029 const csphvec_t E_sph
= qpms_eval_uvswf(bspec
, particle_cv
, kr
, btyp
);
2030 const ccart3_t E_cart
= csphvec2ccart_csph(E_sph
, kr
);
2031 ckahanadd(&(res
.x
), &(res_kc
.x
), E_cart
.x
);
2032 ckahanadd(&(res
.y
), &(res_kc
.y
), E_cart
.y
);
2033 ckahanadd(&(res
.z
), &(res_kc
.z
), E_cart
.z
);
2038 ccart3_t
qpms_scatsysw_scattered_E(const qpms_scatsys_at_omega_t
*ssw
,
2040 const complex double *cvf
, const cart3_t where
) {
2041 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_scattered_E()");
2042 return qpms_scatsys_scattered_E(ssw
->ss
, btyp
, ssw
->wavenumber
,
2046 // Alternative implementation, using translation operator and regular dipole waves at zero
2047 ccart3_t
qpms_scatsys_scattered_E__alt(const qpms_scatsys_t
*ss
,
2049 const complex double k
,
2050 const complex double *cvf
,
2054 qpms_ss_ensure_nonperiodic(ss
);
2055 ccart3_t res
= {0,0,0};
2056 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2058 static const int dipspecn
= 3; // We have three basis vectors
2059 // bspec containing only electric dipoles
2060 const qpms_vswf_set_spec_t dipspec
= {
2062 .ilist
= (qpms_uvswfi_t
[]){
2063 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, -1, 1),
2064 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, 0, 1),
2065 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, +1, 1),
2067 .lMax
=1, .lMax_M
=0, .lMax_N
=1, .lMax_L
=-1,
2069 .norm
= ss
->c
->normalisation
,
2072 ccart3_t regdipoles_0
[dipspecn
]; {
2073 const sph_t origin_sph
= {.r
= 0, .theta
= M_PI_2
, .phi
=0}; // Should work with any theta/phi (TESTWORTHY)
2074 csphvec_t regdipoles_0_sph
[dipspecn
];
2075 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph
, &dipspec
,
2076 sph2csph(origin_sph
), QPMS_BESSEL_REGULAR
));
2077 for(int i
= 0; i
< dipspecn
; ++i
)
2078 regdipoles_0
[i
] = csphvec2ccart(regdipoles_0_sph
[i
], origin_sph
);
2081 complex double *s
; // Translation matrix
2082 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2084 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2085 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2086 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2087 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2089 const cart3_t origin_cart
= {0, 0, 0};
2091 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(
2092 ss
->c
, s
, &dipspec
, 1, bspec
, dipspecn
, k
, particle_pos
, where
, btyp
));
2094 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2095 for(size_t j
= 0; j
< dipspecn
; ++j
){
2096 ccart3_t summand
= ccart3_scale(particle_cv
[i
] * s
[dipspecn
*i
+j
], regdipoles_0
[j
]);
2097 ckahanadd(&(res
.x
), &(res_kc
.x
), summand
.x
);
2098 ckahanadd(&(res
.y
), &(res_kc
.y
), summand
.y
);
2099 ckahanadd(&(res
.z
), &(res_kc
.z
), summand
.z
);
2106 ccart3_t
qpms_scatsysw_scattered_E__alt(const qpms_scatsys_at_omega_t
*ssw
,
2107 qpms_bessel_t btyp
, const complex double *cvf
, const cart3_t where
) {
2108 return qpms_scatsys_scattered_E__alt(ssw
->ss
, btyp
, ssw
->wavenumber
,
2112 // For periodic lattices, we use directly the "alternative" implementation,
2113 // using translation operator and regular dipole waves at zero
2114 ccart3_t
qpms_scatsyswk_scattered_E(const qpms_scatsys_at_omega_k_t
*sswk
,
2116 const complex double *cvf
,
2120 if (btyp
!= QPMS_HANKEL_PLUS
)
2121 QPMS_NOT_IMPLEMENTED("Only scattered field with first kind Hankel functions currently implemented.");
2122 const qpms_scatsys_t
*ss
= sswk
->ssw
->ss
;
2123 if (ss
->lattice_dimension
!= 2)
2124 QPMS_NOT_IMPLEMENTED("Only 2D-periodic lattices implemented");
2125 ccart3_t res
= {0,0,0};
2126 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2128 static const int dipspecn
= 3; // We have three basis vectors
2129 // bspec containing only electric dipoles
2130 const qpms_vswf_set_spec_t dipspec
= {
2132 .ilist
= (qpms_uvswfi_t
[]){
2133 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, -1, 1),
2134 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, 0, 1),
2135 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, +1, 1),
2137 .lMax
=1, .lMax_M
=0, .lMax_N
=1, .lMax_L
=-1,
2139 .norm
= ss
->c
->normalisation
,
2142 ccart3_t regdipoles_0
[dipspecn
]; {
2143 const sph_t origin_sph
= {.r
= 0, .theta
= M_PI_2
, .phi
=0}; // Should work with any theta/phi (TESTWORTHY)
2144 csphvec_t regdipoles_0_sph
[dipspecn
];
2145 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph
, &dipspec
,
2146 sph2csph(origin_sph
), QPMS_BESSEL_REGULAR
));
2147 for(int i
= 0; i
< dipspecn
; ++i
)
2148 regdipoles_0
[i
] = csphvec2ccart(regdipoles_0_sph
[i
], origin_sph
);
2151 complex double *s
; // Translation matrix
2152 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2154 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2155 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2156 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2157 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2159 const cart3_t origin_cart
= {0, 0, 0};
2161 QPMS_ASSERT(sswk
->k
[2] == 0); // At least not implemented now
2162 QPMS_ASSERT(ss
->per
.lattice_basis
[0].z
== 0);
2163 QPMS_ASSERT(ss
->per
.lattice_basis
[1].z
== 0);
2165 // Same choices as in qpms_ss_ppair_W32xy; TODO make it more dynamic
2166 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
2167 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
2169 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_e32(
2171 &dipspec
, 1, bspec
, dipspecn
,
2172 sswk
->eta
, sswk
->ssw
->wavenumber
,
2173 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
2174 cart2_from_double_array(sswk
->k
), cart3_substract(where
, particle_pos
) /*CHECKSIGN*/,
2177 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2178 for(size_t j
= 0; j
< dipspecn
; ++j
){
2179 ccart3_t summand
= ccart3_scale(particle_cv
[i
] * s
[dipspecn
*i
+j
], regdipoles_0
[j
]);
2180 ckahanadd(&(res
.x
), &(res_kc
.x
), summand
.x
);
2181 ckahanadd(&(res
.y
), &(res_kc
.y
), summand
.y
);
2182 ckahanadd(&(res
.z
), &(res_kc
.z
), summand
.z
);
2189 qpms_errno_t
qpms_scatsyswk_scattered_field_basis(
2191 const qpms_scatsys_at_omega_k_t
*sswk
,
2192 const qpms_bessel_t btyp
,
2196 if (btyp
!= QPMS_HANKEL_PLUS
)
2197 QPMS_NOT_IMPLEMENTED("Only scattered field with first kind Hankel functions currently implemented.");
2198 const qpms_scatsys_t
*ss
= sswk
->ssw
->ss
;
2199 if (ss
->lattice_dimension
!= 2)
2200 QPMS_NOT_IMPLEMENTED("Only 2D-periodic lattices implemented");
2201 //ccart3_t res = {0,0,0};
2202 //ccart3_t res_kc = {0,0,0}; // kahan sum compensation
2204 static const int dipspecn
= 3; // We have three basis vectors
2205 // bspec containing only electric dipoles
2206 const qpms_vswf_set_spec_t dipspec
= {
2208 .ilist
= (qpms_uvswfi_t
[]){
2209 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, -1, 1),
2210 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, 0, 1),
2211 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, +1, 1),
2213 .lMax
=1, .lMax_M
=0, .lMax_N
=1, .lMax_L
=-1,
2215 .norm
= ss
->c
->normalisation
,
2218 ccart3_t regdipoles_0
[dipspecn
]; {
2219 const sph_t origin_sph
= {.r
= 0, .theta
= M_PI_2
, .phi
=0}; // Should work with any theta/phi (TESTWORTHY)
2220 csphvec_t regdipoles_0_sph
[dipspecn
];
2221 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph
, &dipspec
,
2222 sph2csph(origin_sph
), QPMS_BESSEL_REGULAR
));
2223 for(int i
= 0; i
< dipspecn
; ++i
)
2224 regdipoles_0
[i
] = csphvec2ccart(regdipoles_0_sph
[i
], origin_sph
);
2227 complex double *s
; // Translation matrix
2228 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2230 memset(target
, 0, ss
->fecv_size
* sizeof(*target
));
2232 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2233 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2234 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2235 //const complex double *particle_cv = cvf + ss->fecv_pstarts[pi];
2237 const cart3_t origin_cart
= {0, 0, 0};
2239 QPMS_ASSERT(sswk
->k
[2] == 0); // At least not implemented now
2240 QPMS_ASSERT(ss
->per
.lattice_basis
[0].z
== 0);
2241 QPMS_ASSERT(ss
->per
.lattice_basis
[1].z
== 0);
2243 // Same choices as in qpms_ss_ppair_W32xy; TODO make it more dynamic
2244 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
2245 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
2247 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_e32(
2249 &dipspec
, 1, bspec
, dipspecn
,
2250 sswk
->eta
, sswk
->ssw
->wavenumber
,
2251 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
2252 cart2_from_double_array(sswk
->k
), cart3_substract(where
, particle_pos
) /*CHECKSIGN*/,
2255 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2256 for(size_t j
= 0; j
< dipspecn
; ++j
){
2257 target
[ss
->fecv_pstarts
[pi
] + i
] = ccart3_add(target
[ss
->fecv_pstarts
[pi
] + i
],
2258 ccart3_scale(s
[dipspecn
*i
+j
], regdipoles_0
[j
]));
2259 //ccart3_t summand = ccart3_scale(particle_cv[i] * s[dipspecn*i+j], regdipoles_0[j]);
2260 //ckahanadd(&(res.x), &(res_kc.x), summand.x);
2261 //ckahanadd(&(res.y), &(res_kc.y), summand.y);
2262 //ckahanadd(&(res.z), &(res_kc.z), summand.z);
2266 return QPMS_SUCCESS
;
2270 ccart3_t
qpms_scatsys_scattered_E_irrep(const qpms_scatsys_t
*ss
,
2271 qpms_iri_t iri
, const complex double *cvr
, cart3_t where
) {
2276 void qpms_ss_LU_free(qpms_ss_LU lu
) {
2281 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_full_factorise(complex double *mpmatrix_full
,
2282 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
, const qpms_scatsys_at_omega_k_t
*sswk
) {
2284 QPMS_ASSERT(sswk
->ssw
== ssw
|| !ssw
);
2286 QPMS_ASSERT(ssw
->ss
->lattice_dimension
> 0);
2288 QPMS_ASSERT(ssw
->ss
->lattice_dimension
== 0);
2290 const qpms_scatsys_t
*ss
= ssw
->ss
;
2291 QPMS_ENSURE(mpmatrix_full
, "A non-NULL pointer to the pre-calculated mode matrix is required");
2292 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, ss
->fecv_size
* sizeof(int));
2293 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, ss
->fecv_size
, ss
->fecv_size
,
2294 mpmatrix_full
, ss
->fecv_size
, target_piv
));
2296 lu
.a
= mpmatrix_full
;
2297 lu
.ipiv
= target_piv
;
2305 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(complex double *mpmatrix_packed
,
2306 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
) {
2307 QPMS_ENSURE(mpmatrix_packed
, "A non-NULL pointer to the pre-calculated mode matrix is required");
2308 qpms_ss_ensure_nonperiodic(ssw
->ss
);
2309 size_t n
= ssw
->ss
->saecv_sizes
[iri
];
2310 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, n
* sizeof(int));
2311 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, n
, n
,
2312 mpmatrix_packed
, n
, target_piv
));
2314 lu
.a
= mpmatrix_packed
;
2315 lu
.ipiv
= target_piv
;
2322 qpms_ss_LU
qpms_scatsysw_build_modeproblem_matrix_full_LU(
2323 complex double *target
, int *target_piv
,
2324 const qpms_scatsys_at_omega_t
*ssw
){
2325 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_build_modeproblem_matrix_full_LU()");
2326 target
= qpms_scatsysw_build_modeproblem_matrix_full(target
, ssw
);
2327 return qpms_scatsysw_modeproblem_matrix_full_factorise(target
, target_piv
, ssw
, NULL
);
2330 qpms_ss_LU
qpms_scatsyswk_build_modeproblem_matrix_full_LU(
2331 complex double *target
, int *target_piv
,
2332 const qpms_scatsys_at_omega_k_t
*sswk
){
2333 target
= qpms_scatsyswk_build_modeproblem_matrix_full(target
, sswk
);
2334 return qpms_scatsysw_modeproblem_matrix_full_factorise(target
, target_piv
, sswk
->ssw
, sswk
);
2337 qpms_ss_LU
qpms_scatsysw_build_modeproblem_matrix_irrep_packed_LU(
2338 complex double *target
, int *target_piv
,
2339 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
){
2340 target
= qpms_scatsysw_build_modeproblem_matrix_irrep_packed(target
, ssw
, iri
);
2341 return qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(target
, target_piv
, ssw
, iri
);
2344 complex double *qpms_scatsys_scatter_solve(
2345 complex double *f
, const complex double *a_inc
, qpms_ss_LU lu
) {
2346 const size_t n
= lu
.full
? lu
.ssw
->ss
->fecv_size
: lu
.ssw
->ss
->saecv_sizes
[lu
.iri
];
2347 if (!f
) QPMS_CRASHING_MALLOC(f
, n
* sizeof(complex double));
2348 memcpy(f
, a_inc
, n
*sizeof(complex double)); // It will be rewritten by zgetrs
2349 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR
, 'N' /*trans*/, n
/*n*/, 1 /*nrhs number of right hand sides*/,
2350 lu
.a
/*a*/, n
/*lda*/, lu
.ipiv
/*ipiv*/, f
/*b*/, 1 /*ldb; CHECKME*/));
2354 struct qpms_scatsys_finite_eval_Beyn_ImTS_param
{
2355 const qpms_scatsys_t
*ss
;
2359 /// Wrapper for Beyn algorithm (non-periodic system)
2360 static int qpms_scatsys_finite_eval_Beyn_ImTS(complex double *target
,
2361 size_t m
, complex double omega
, void *params
) {
2362 const struct qpms_scatsys_finite_eval_Beyn_ImTS_param
*p
= params
;
2363 qpms_scatsys_at_omega_t
*ssw
= qpms_scatsys_at_omega(p
->ss
, omega
);
2364 QPMS_ENSURE(ssw
!= NULL
, "qpms_scatsys_at_omega() returned NULL");
2365 if (p
->iri
== QPMS_NO_IRREP
) {
2366 QPMS_ASSERT(m
== p
->ss
->fecv_size
);
2367 QPMS_ENSURE(NULL
!= qpms_scatsysw_build_modeproblem_matrix_full(
2369 "qpms_scatsysw_build_modeproblem_matrix_full() returned NULL");
2371 QPMS_ASSERT(m
== p
->ss
->saecv_sizes
[p
->iri
]);
2372 QPMS_ENSURE(NULL
!= qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
2373 target
, ssw
, p
->iri
),
2374 "qpms_scatsysw_build_modeproblem_matrix_irrep_packed() returned NULL");
2376 qpms_scatsys_at_omega_free(ssw
);
2377 return QPMS_SUCCESS
;
2380 beyn_result_t
*qpms_scatsys_finite_find_eigenmodes(
2381 const qpms_scatsys_t
* const ss
, const qpms_iri_t iri
,
2382 complex double omega_centre
, double omega_rr
, double omega_ri
,
2383 size_t contour_npoints
,
2384 double rank_tol
, size_t rank_sel_min
, double res_tol
) {
2385 qpms_ss_ensure_nonperiodic_a(ss
, "qpms_scatsys_periodic_find_eigenmodes()");
2386 size_t n
; // matrix dimension
2387 if (qpms_iri_is_valid(ss
->sym
, iri
)) {
2388 n
= ss
->saecv_sizes
[iri
];
2389 } else if (iri
== QPMS_NO_IRREP
) {
2393 beyn_contour_t
*contour
= beyn_contour_ellipse(omega_centre
,
2394 omega_rr
, omega_ri
, contour_npoints
);
2396 struct qpms_scatsys_finite_eval_Beyn_ImTS_param p
= {ss
, iri
};
2397 beyn_result_t
*result
= beyn_solve(n
, n
/* possibly make smaller? */,
2398 qpms_scatsys_finite_eval_Beyn_ImTS
, NULL
, (void *) &p
,
2399 contour
, rank_tol
, rank_sel_min
, res_tol
);
2400 QPMS_ENSURE(result
!= NULL
, "beyn_solve() returned NULL");
2406 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param
{
2407 const qpms_scatsys_t
*ss
;
2408 const double *k
; ///< Wavevector in cartesian coordinates.
2411 /// Wrapper for Beyn algorithm (periodic system)
2412 static int qpms_scatsys_periodic_eval_Beyn_ImTW(complex double *target
,
2413 size_t m
, complex double omega
, void *params
){
2414 const struct qpms_scatsys_periodic_eval_Beyn_ImTW_param
*p
= params
;
2415 qpms_scatsys_at_omega_t
*ssw
= qpms_scatsys_at_omega(p
->ss
, omega
);
2416 QPMS_ENSURE(ssw
!= NULL
, "qpms_scatsys_at_omega() returned NULL");
2417 qpms_scatsys_at_omega_k_t sswk
= {
2419 .k
= {p
->k
[0], p
->k
[1], p
->k
[2]},
2420 .eta
= qpms_ss_adjusted_eta(p
->ss
, ssw
->wavenumber
, p
->k
)
2422 QPMS_ASSERT(m
== p
->ss
->fecv_size
);
2424 qpms_scatsyswk_build_modeproblem_matrix_full(target
, &sswk
),
2425 "qpms_scatsyswk_build_modeproblem_matrix_full() returned NULL");
2426 qpms_scatsys_at_omega_free(ssw
);
2427 return QPMS_SUCCESS
;
2430 beyn_result_t
*qpms_scatsys_periodic_find_eigenmodes(
2431 const qpms_scatsys_t
* const ss
, const double k
[3],
2432 complex double omega_centre
, double omega_rr
, double omega_ri
,
2433 size_t contour_npoints
,
2434 double rank_tol
, size_t rank_sel_min
, double res_tol
) {
2435 qpms_ss_ensure_periodic_a(ss
, "qpms_scatsys_finite_find_eigenmodes()");
2436 size_t n
= ss
->fecv_size
; // matrix dimension
2438 beyn_contour_t
*contour
= beyn_contour_ellipse(omega_centre
,
2439 omega_rr
, omega_ri
, contour_npoints
);
2441 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param p
= {ss
, k
};
2442 beyn_result_t
*result
= beyn_solve(n
, n
,
2443 qpms_scatsys_periodic_eval_Beyn_ImTW
, NULL
, (void *) &p
,
2444 contour
, rank_tol
, rank_sel_min
, res_tol
);
2445 QPMS_ENSURE(result
!= NULL
, "beyn_solve() returned NULL");