3 #define lapack_complex_double complex double
4 #define lapack_complex_double_real(z) (creal(z))
5 #define lapack_complex_double_imag(z) (cimag(z))
9 #include "scatsystem.h"
13 #include "symmetries.h"
17 #include "quaternions.h"
19 #include "qpms_error.h"
20 #include "translations.h"
21 #include "tmatrices.h"
24 #include "tolerances.h"
26 #include "tiny_inlines.h"
28 #ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
30 #define SERIAL_ZGEMM qpms_zgemm
32 #define SERIAL_ZGEMM cblas_zgemm
35 #define QPMS_SCATSYS_LEN_RTOL 1e-13
36 #define QPMS_SCATSYS_TMATRIX_ATOL 1e-12
37 #define QPMS_SCATSYS_TMATRIX_RTOL 1e-12
39 // This is used in adjustment of Ewald parameter to avoid high frequency breakdown.
40 // Very roughly, the value of 16 should lead to allowing terms containing incomplete Gamma
41 // functions with magnitudes around exp(16) == 8.9e6
42 static const double QPMS_SCATSYS_EWALD_MAX_EXPONENT
= 16.;
44 long qpms_scatsystem_nthreads_default
= 4;
45 long qpms_scatsystem_nthreads_override
= 0;
47 void qpms_scatsystem_set_nthreads(long n
) {
48 qpms_scatsystem_nthreads_override
= n
;
51 static inline void qpms_ss_ensure_periodic(const qpms_scatsys_t
*ss
) {
52 QPMS_ENSURE(ss
->lattice_dimension
> 0, "This method is applicable only to periodic systems.");
55 static inline void qpms_ss_ensure_periodic_a(const qpms_scatsys_t
*ss
, const char *s
) {
56 QPMS_ENSURE(ss
->lattice_dimension
> 0, "This method is applicable only to periodic systems. Use %s instead.", s
);
59 static inline void qpms_ss_ensure_nonperiodic(const qpms_scatsys_t
*ss
) {
60 QPMS_ENSURE(ss
->lattice_dimension
== 0, "This method is applicable only to nonperiodic systems.");
63 static inline void qpms_ss_ensure_nonperiodic_a(const qpms_scatsys_t
*ss
, const char *s
) {
64 QPMS_ENSURE(ss
->lattice_dimension
== 0, "This method is applicable only to nonperiodic systems. Use %s instead.", s
);
67 // Adjust Ewald parameter to avoid high-frequency breakdown
68 double qpms_ss_adjusted_eta(const qpms_scatsys_t
*ss
, complex double wavenumber
, const double k
[3]) {
69 qpms_ss_ensure_periodic(ss
);
70 const double eta_default
= ss
->per
.eta
;
71 // FIXME here we silently assume that k lies in the first Brillioun zone, we should ensure that.
72 const double k2
= k
[0]*k
[0] + k
[1]*k
[1] + k
[2] * k
[2];
73 const double kappa2
= SQ(cabs(wavenumber
)); // maybe creal would be enough
74 if(kappa2
< k2
) // This should happen only for pretty low frequencies
76 const qpms_l_t maxj
= ss
->c
->lMax
; // Based on ewald.c:301, note that VSWF (c's) lMax is already half of corresponding translation matrix Ewald factors' (c->e3c's) lMax
77 const double mina
= 0.5 * (ss
->lattice_dimension
- 1) - maxj
; // minimum incomplete Gamma first argument, based on ewald.c:301; CHECKME whether this is fine also for 3D lattice
78 const double eta_min
= sqrt(fabs((kappa2
- k2
) * (mina
- 1.) / QPMS_SCATSYS_EWALD_MAX_EXPONENT
));
79 return MAX(eta_default
, eta_min
);
82 // ------------ Stupid implementation of qpms_scatsys_apply_symmetry() -------------
84 // The following functions are just to make qpms_scatsys_apply_symmetry more readable.
85 // They are not to be used elsewhere, as they do not perform any array capacity checks etc.
87 /// Compare two orbit types in a scattering system.
88 static bool orbit_types_equal(const qpms_ss_orbit_type_t
*a
, const qpms_ss_orbit_type_t
*b
) {
89 if (a
->size
!= b
->size
) return false;
90 if (memcmp(a
->action
, b
->action
, a
->size
*sizeof(qpms_ss_orbit_pi_t
))) return false;
91 if (memcmp(a
->tmatrices
, b
->tmatrices
, a
->size
*sizeof(qpms_ss_tmi_t
))) return false;
95 // Extend the action to all particles in orbit if only the action on the 0th
96 // particle has been filled.
97 static void extend_orbit_action(qpms_scatsys_t
*ss
, qpms_ss_orbit_type_t
*ot
) {
98 for(qpms_ss_orbit_pi_t src
= 1; src
< ot
->size
; ++src
) {
99 // find any element g that sends 0 to src:
101 for (g
= 0; g
< ss
->sym
->order
; ++g
)
102 if (ot
->action
[g
] == src
) break;
103 assert(g
< ss
->sym
->order
);
104 // invg sends src to 0
105 qpms_gmi_t invg
= qpms_finite_group_inv(ss
->sym
, g
);
106 for (qpms_gmi_t f
= 0; f
< ss
->sym
->order
; ++f
)
107 // if f sends 0 to dest, then f * invg sends src to dest
108 ot
->action
[src
* ss
->sym
->order
+
109 qpms_finite_group_mul(ss
->sym
,f
,invg
)] = ot
->action
[f
];
113 //Add orbit type to a scattering system, updating the ss->otspace_end pointer accordingly
114 static void add_orbit_type(qpms_scatsys_t
*ss
, const qpms_ss_orbit_type_t
*ot_current
) {
115 qpms_ss_orbit_type_t
* const ot_new
= & (ss
->orbit_types
[ss
->orbit_type_count
]);
116 ot_new
->size
= ot_current
->size
;
118 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_tmi(ss
, ot_current
->tmatrices
[0]);
119 const size_t bspecn
= bspec
->n
;
120 ot_new
->bspecn
= bspecn
;
122 const size_t actionsiz
= sizeof(ot_current
->action
[0]) * ot_current
->size
124 ot_new
->action
= (void *) (ss
->otspace_end
);
125 memcpy(ot_new
->action
, ot_current
->action
, actionsiz
);
126 // N.B. we copied mostly garbage ^^^, most of it is initialized just now:
127 extend_orbit_action(ss
, ot_new
);
128 #ifdef DUMP_ORBIT_ACTION
129 fprintf(stderr
, "Orbit action:\n");
130 for (qpms_gmi_t gmi
= 0; gmi
< ss
->sym
->order
; ++gmi
) {
131 const qpms_quat4d_t q
= qpms_quat_4d_from_2c(ss
->sym
->rep3d
[gmi
].rot
);
132 fprintf(stderr
, "%+d[%g %g %g %g] ", (int)ss
->sym
->rep3d
[gmi
].det
,
133 q
.c1
, q
.ci
, q
.cj
, q
.ck
);
134 fprintf(stderr
, "%s\t", (ss
->sym
->permrep
&& ss
->sym
->permrep
[gmi
])?
135 ss
->sym
->permrep
[gmi
] : "");
136 for (qpms_ss_orbit_pi_t pi
= 0; pi
< ot_new
->size
; ++pi
)
137 fprintf(stderr
, "%d\t", (int) ot_new
->action
[gmi
+ pi
*ss
->sym
->order
]);
138 fprintf(stderr
, "\n");
141 ss
->otspace_end
+= actionsiz
;
143 const size_t tmsiz
= sizeof(ot_current
->tmatrices
[0]) * ot_current
->size
;
144 ot_new
->tmatrices
= (void *) (ss
->otspace_end
);
145 memcpy(ot_new
->tmatrices
, ot_current
->tmatrices
, tmsiz
);
146 ss
->otspace_end
+= tmsiz
;
148 const size_t irbase_sizes_siz
= sizeof(ot_new
->irbase_sizes
[0]) * ss
->sym
->nirreps
;
149 ot_new
->irbase_sizes
= (void *) (ss
->otspace_end
);
150 ss
->otspace_end
+= irbase_sizes_siz
;
151 ot_new
->irbase_cumsizes
= (void *) (ss
->otspace_end
);
152 ss
->otspace_end
+= irbase_sizes_siz
;
153 ot_new
->irbase_offsets
= (void *) (ss
->otspace_end
);
154 ss
->otspace_end
+= irbase_sizes_siz
;
156 const size_t irbases_siz
= sizeof(ot_new
->irbases
[0]) * SQ(ot_new
->size
* bspecn
);
157 ot_new
->irbases
= (void *) (ss
->otspace_end
);
158 ss
->otspace_end
+= irbases_siz
;
160 size_t lastbs
, bs_cumsum
= 0;
161 for(qpms_iri_t iri
= 0; iri
< ss
->sym
->nirreps
; ++iri
) {
162 ot_new
->irbase_offsets
[iri
] = bs_cumsum
* bspecn
* ot_new
->size
;
163 qpms_orbit_irrep_basis(&lastbs
,
164 ot_new
->irbases
+ bs_cumsum
*ot_new
->size
*bspecn
,
165 ot_new
, bspec
, ss
->sym
, iri
);
166 ot_new
->irbase_sizes
[iri
] = lastbs
;
168 ot_new
->irbase_cumsizes
[iri
] = bs_cumsum
;
170 QPMS_ENSURE(bs_cumsum
== ot_new
->size
* bspecn
,
171 "The cumulative size of the symmetry-adapted bases is wrong; "
172 "expected %d = %d * %d, got %d.",
173 ot_new
->size
* bspecn
, ot_new
->size
, bspecn
, bs_cumsum
);
174 ot_new
->instance_count
= 0;
175 ss
->orbit_type_count
++;
178 // Standardises the lattice base vectors, and fills the other contents of ss->per[0].
179 // LPTODO split the functionality in smaller functions, these might be useful elsewhere.
180 static void process_lattice_bases(qpms_scatsys_t
*ss
, const qpms_tolerance_spec_t
*tol
) {
181 switch(ss
->lattice_dimension
) {
184 normsq
= cart3_normsq(ss
->per
.lattice_basis
[0]);
185 ss
->per
.unitcell_volume
= sqrt(normsq
);
186 ss
->per
.reciprocal_basis
[0] = cart3_divscale(ss
->per
.lattice_basis
[0], normsq
);
187 ss
->per
.eta
= 2. / M_2_SQRTPI
/ ss
->per
.unitcell_volume
;
190 // (I) Create orthonormal basis of the plane in which the basis vectors lie
192 double norm0
= cart3norm(ss
->per
.lattice_basis
[0]);
193 // 0: Just normalised 0. basis vector
194 onbasis
[0] = cart3_divscale(ss
->per
.lattice_basis
[0], norm0
);
195 // 1: Gram-Schmidt complement of the other basis vector
196 const double b0norm_dot_b1
= cart3_dot(onbasis
[0], ss
->per
.lattice_basis
[1]);
197 onbasis
[1] = cart3_substract(ss
->per
.lattice_basis
[1],
198 cart3_scale(b0norm_dot_b1
, onbasis
[0]));
199 onbasis
[1] = cart3_divscale(onbasis
[1], cart3norm(onbasis
[1]));
200 // (II) Express the lattice basis in terms of the new 2d plane vector basis onbasis
201 cart2_t b2d
[2] = {{.x
= norm0
, .y
= 0},
202 {.x
= b0norm_dot_b1
, .y
= cart3_dot(onbasis
[1], ss
->per
.lattice_basis
[1])}};
203 // (III) Reduce lattice basis and get reciprocal bases in the 2D plane
204 l2d_reduceBasis(b2d
[0], b2d
[1], &b2d
[0], &b2d
[1]);
205 ss
->per
.unitcell_volume
= l2d_unitcell_area(b2d
[0], b2d
[1]);
206 ss
->per
.eta
= 2. / M_2_SQRTPI
/ sqrt(ss
->per
.unitcell_volume
);
208 QPMS_ENSURE_SUCCESS(l2d_reciprocalBasis1(b2d
[0], b2d
[1], &r2d
[0], &r2d
[1]));
209 // (IV) Rotate everything back to the original 3D space.
210 for(int i
= 0; i
< 2; ++i
) {
211 ss
->per
.lattice_basis
[i
] = cart3_add(
212 cart3_scale(b2d
[i
].x
, onbasis
[0]),
213 cart3_scale(b2d
[i
].y
, onbasis
[1]));
214 ss
->per
.reciprocal_basis
[i
] = cart3_add(
215 cart3_scale(r2d
[i
].x
, onbasis
[0]),
216 cart3_scale(r2d
[i
].y
, onbasis
[1]));
220 l3d_reduceBasis(ss
->per
.lattice_basis
, ss
->per
.lattice_basis
);
221 ss
->per
.unitcell_volume
= fabs(l3d_reciprocalBasis1(ss
->per
.lattice_basis
,
222 ss
->per
.reciprocal_basis
));
223 ss
->per
.eta
= 2. / M_2_SQRTPI
/ cbrt(ss
->per
.unitcell_volume
);
224 // TODO check unitcell_volume for sanity w.r.t tolerance
232 // Almost 200 lines. This whole thing deserves a rewrite!
233 qpms_scatsys_at_omega_t
*qpms_scatsys_apply_symmetry(const qpms_scatsys_t
*orig
,
234 const qpms_finite_group_t
*sym
, complex double omega
,
235 const qpms_tolerance_spec_t
*tol
) {
237 QPMS_WARN("No symmetry group pointer provided, assuming trivial symmetry"
238 " (this is currently the only option for periodic systems).");
239 sym
= &QPMS_FINITE_GROUP_TRIVIAL
;
242 // TODO check data sanity
244 qpms_l_t lMax
= 0; // the overall lMax of all base specs.
245 qpms_normalisation_t normalisation
= QPMS_NORMALISATION_UNDEF
;
247 // First, determine the rough radius of the array; it should be nonzero
248 // in order to particle position equivalence work correctly
251 double minx
= +INFINITY
, miny
= +INFINITY
, minz
= +INFINITY
;
252 double maxx
= -INFINITY
, maxy
= -INFINITY
, maxz
= -INFINITY
;
253 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
) {
254 minx
= MIN(minx
,orig
->p
[i
].pos
.x
);
255 miny
= MIN(miny
,orig
->p
[i
].pos
.y
);
256 minz
= MIN(minz
,orig
->p
[i
].pos
.z
);
257 maxx
= MAX(maxx
,orig
->p
[i
].pos
.x
);
258 maxy
= MAX(maxy
,orig
->p
[i
].pos
.y
);
259 maxz
= MAX(maxz
,orig
->p
[i
].pos
.z
);
261 lenscale
= (fabs(maxx
)+fabs(maxy
)+fabs(maxz
)+(maxx
-minx
)+(maxy
-miny
)+(maxz
-minz
)) / 3;
264 // Second, check that there are no duplicit positions in the input system.
265 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++i
)
266 for (qpms_ss_pi_t j
= 0; j
< i
; ++j
)
267 assert(!cart3_isclose(orig
->p
[i
].pos
, orig
->p
[j
].pos
, 0, QPMS_SCATSYS_LEN_RTOL
* lenscale
));
270 // Allocate T-matrix, particle and particle orbit info arrays
272 QPMS_CRASHING_MALLOC(ss
, sizeof(*ss
));
273 ss
->lattice_dimension
= orig
->lattice_dimension
;
274 // TODO basic periodic lattices related stuff here.
275 if (ss
->lattice_dimension
) {
277 process_lattice_bases(ss
, tol
);
279 for(int i
= 0; i
< ss
->lattice_dimension
; ++i
) // extend lenscale by basis vectors
280 lenscale
= MAX(lenscale
, cart3norm(ss
->per
.lattice_basis
[i
]));
281 ss
->lenscale
= lenscale
;
283 ss
->medium
= orig
->medium
;
285 // Copy the qpms_tmatrix_fuction_t from orig
286 ss
->tmg_count
= orig
->tmg_count
;
287 QPMS_CRASHING_MALLOC(ss
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
288 memcpy(ss
->tmg
, orig
->tmg
, ss
->tmg_count
* sizeof(*(ss
->tmg
)));
290 ss
->tm_capacity
= sym
->order
* orig
->tm_count
;
291 QPMS_CRASHING_MALLOC(ss
->tm
, ss
->tm_capacity
* sizeof(*(ss
->tm
)));
293 ss
->p_capacity
= sym
->order
* orig
->p_count
;
294 QPMS_CRASHING_MALLOC(ss
->p
, ss
->p_capacity
* sizeof(qpms_particle_tid_t
));
295 QPMS_CRASHING_MALLOC(ss
->p_orbitinfo
, ss
->p_capacity
* sizeof(qpms_ss_particle_orbitinfo_t
));
296 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_capacity
; ++pi
) {
297 ss
->p_orbitinfo
[pi
].t
= QPMS_SS_P_ORBITINFO_UNDEF
;
298 ss
->p_orbitinfo
[pi
].p
= QPMS_SS_P_ORBITINFO_UNDEF
;
301 // Evaluate the original T-matrices at omega
302 qpms_tmatrix_t
**tm_orig_omega
;
303 QPMS_CRASHING_MALLOC(tm_orig_omega
, orig
->tmg_count
* sizeof(*tm_orig_omega
));
304 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
305 tm_orig_omega
[i
] = qpms_tmatrix_init_from_function(orig
->tmg
[i
], omega
);
307 // Evaluate the medium and derived T-matrices at omega.
308 qpms_scatsys_at_omega_t
*ssw
;
309 QPMS_CRASHING_MALLOC(ssw
, sizeof(*ssw
)); // returned
312 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
313 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
314 // we will be using ss->tm_capacity also for ssw->tm
315 QPMS_CRASHING_MALLOC(ssw
->tm
, ss
->tm_capacity
* sizeof(*(ssw
->tm
))); // returned
317 // Evaluate T-matrices at omega; checking for duplicities
319 ss
->max_bspecn
= 0; // We'll need it later.for memory alloc estimates.
321 qpms_ss_tmi_t tm_dupl_remap
[ss
->tm_capacity
]; // Auxilliary array to label remapping the indices after ignoring t-matrix duplicities; VLA!
323 for (qpms_ss_tmi_t i
= 0; i
< orig
->tm_count
; ++i
) {
324 qpms_tmatrix_t
*ti
= qpms_tmatrix_apply_operation(&(orig
->tm
[i
].op
), tm_orig_omega
[orig
->tm
[i
].tmgi
]);
326 for (j
= 0; j
< ss
->tm_count
; ++j
)
327 if (qpms_tmatrix_isclose(ti
, ssw
->tm
[j
], tol
->rtol
, tol
->atol
)) {
330 if (j
== ss
->tm_count
) { // duplicity not found, save both the "abstract" and "at omega" T-matrices
331 qpms_tmatrix_operation_copy(&ss
->tm
[j
].op
, &orig
->tm
[i
].op
);
332 ss
->tm
[j
].tmgi
= orig
->tm
[i
].tmgi
; // T-matrix functions are preserved.
334 ss
->max_bspecn
= MAX(ssw
->tm
[j
]->spec
->n
, ss
->max_bspecn
);
335 lMax
= MAX(lMax
, ssw
->tm
[j
]->spec
->lMax
);
338 else qpms_tmatrix_free(ti
);
339 tm_dupl_remap
[i
] = j
;
340 if (normalisation
== QPMS_NORMALISATION_UNDEF
)
341 normalisation
= ssw
->tm
[i
]->spec
->norm
;
342 // We expect all bspec norms to be the same.
343 else QPMS_ENSURE(normalisation
== ssw
->tm
[j
]->spec
->norm
,
344 "Normalisation convention must be the same for all T-matrices."
345 " %d != %d\n", normalisation
, ssw
->tm
[j
]->spec
->norm
);
348 // Free the original T-matrices at omega
349 for(qpms_ss_tmgi_t i
= 0; i
< orig
->tmg_count
; ++i
)
350 qpms_tmatrix_free(tm_orig_omega
[i
]);
353 // Copy particles, remapping the t-matrix indices
354 for (qpms_ss_pi_t i
= 0; i
< orig
->p_count
; ++(i
)) {
355 ss
->p
[i
] = orig
->p
[i
];
356 ss
->p
[i
].tmatrix_id
= tm_dupl_remap
[ss
->p
[i
].tmatrix_id
];
358 ss
->p_count
= orig
->p_count
;
360 // allocate t-matrix symmetry map
361 QPMS_CRASHING_MALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* sym
->order
* ss
->tm_count
);
363 // Extend the T-matrices list by the symmetry operations
364 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
365 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
){
366 const size_t d
= ssw
->tm
[tmi
]->spec
->n
;
368 QPMS_CRASHING_MALLOC(m
, d
*d
*sizeof(complex double)); // ownership passed to ss->tm[ss->tm_count].op
369 qpms_irot3_uvswfi_dense(m
, ssw
->tm
[tmi
]->spec
, sym
->rep3d
[gmi
]);
370 qpms_tmatrix_t
*transformed
= qpms_tmatrix_apply_symop(ssw
->tm
[tmi
], m
);
372 for (tmj
= 0; tmj
< ss
->tm_count
; ++tmj
)
373 if (qpms_tmatrix_isclose(transformed
, ssw
->tm
[tmj
], tol
->rtol
, tol
->atol
))
375 if (tmj
< ss
->tm_count
) { // HIT, transformed T-matrix already exists
376 //TODO some "rounding error cleanup" symmetrisation could be performed here?
377 qpms_tmatrix_free(transformed
);
379 } else { // MISS, save the matrix (also the "abstract" one)
380 ssw
->tm
[ss
->tm_count
] = transformed
;
381 ss
->tm
[ss
->tm_count
].tmgi
= ss
->tm
[tmi
].tmgi
;
382 qpms_tmatrix_operation_compose_chain_init(&(ss
->tm
[ss
->tm_count
].op
), 2, 1); // FIXME MEMLEAK
383 struct qpms_tmatrix_operation_compose_chain
* const o
= &(ss
->tm
[ss
->tm_count
].op
.op
.compose_chain
);
384 o
->ops
[0] = & ss
->tm
[tmi
].op
; // Let's just borrow this
385 o
->ops_owned
[0] = false;
386 o
->opmem
[0].typ
= QPMS_TMATRIX_OPERATION_LRMATRIX
;
387 o
->opmem
[0].op
.lrmatrix
.m
= m
;
388 o
->opmem
[0].op
.lrmatrix
.owns_m
= true;
389 o
->opmem
[0].op
.lrmatrix
.m_size
= d
* d
;
390 o
->ops
[1] = o
->opmem
;
391 o
->ops_owned
[1] = true;
394 ss
->tm_sym_map
[gmi
+ tmi
* sym
->order
] = tmj
; // In any case, tmj now indexes the correct transformed matrix
396 // Possibly free some space using the new ss->tm_count instead of (old) ss->tm_count*sym->order
397 QPMS_CRASHING_REALLOC(ss
->tm_sym_map
, sizeof(qpms_ss_tmi_t
) * sym
->order
* ss
->tm_count
);
398 // tm could be realloc'd as well, but those are just pointers, not likely many.
401 // allocate particle symmetry map
402 QPMS_CRASHING_MALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* sym
->order
* ss
->p_count
);
403 // allocate orbit type array (TODO realloc later if too long)
404 ss
->orbit_type_count
= 0;
405 QPMS_CRASHING_CALLOC(ss
->orbit_types
, ss
->p_count
, sizeof(qpms_ss_orbit_type_t
));
407 QPMS_CRASHING_MALLOC(ss
->otspace
, // reallocate later
408 (sizeof(qpms_ss_orbit_pi_t
) * sym
->order
* sym
->order
409 + sizeof(qpms_ss_tmi_t
) * sym
->order
410 + 3*sizeof(size_t) * sym
->nirreps
411 + sizeof(complex double) * SQ(sym
->order
* ss
->max_bspecn
)) * ss
->p_count
413 ss
->otspace_end
= ss
->otspace
;
415 // Workspace for the orbit type determination
416 qpms_ss_orbit_type_t ot_current
;
417 qpms_ss_orbit_pi_t ot_current_action
[sym
->order
* sym
->order
];
418 qpms_ss_tmi_t ot_current_tmatrices
[sym
->order
];
420 qpms_ss_pi_t current_orbit
[sym
->order
];
421 ot_current
.action
= ot_current_action
;
422 ot_current
.tmatrices
= ot_current_tmatrices
;
425 // Extend the particle list by the symmetry operations, check that particles mapped by symmetry ops on themselves
426 // have the correct symmetry
427 // TODO this could be sped up to O(npart * log(npart)); let's see later whether needed.
428 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
429 const bool new_orbit
= (ss
->p_orbitinfo
[pi
].t
== QPMS_SS_P_ORBITINFO_UNDEF
); // TODO build the orbit!!!
431 current_orbit
[0] = pi
;
433 ot_current
.tmatrices
[0] = ss
->p
[pi
].tmatrix_id
;
434 #ifdef DUMP_PARTICLE_POSITIONS
435 cart3_t pos
= ss
->p
[pi
].pos
;
436 fprintf(stderr
, "An orbit [%.4g, %.4g, %.4g] => ", pos
.x
, pos
.y
, pos
.z
);
440 for (qpms_gmi_t gmi
= 0; gmi
< sym
->order
; ++gmi
) {
441 cart3_t newpoint
= qpms_irot3_apply_cart3(sym
->rep3d
[gmi
], ss
->p
[pi
].pos
);
442 qpms_ss_tmi_t new_tmi
= ss
->tm_sym_map
[gmi
+ ss
->p
[pi
].tmatrix_id
* sym
->order
]; // transformed t-matrix index
444 for (pj
= 0; pj
< ss
->p_count
; ++pj
)
445 if (cart3_isclose(newpoint
, ss
->p
[pj
].pos
, 0, ss
->lenscale
* QPMS_SCATSYS_LEN_RTOL
)) {
446 if (new_tmi
!= ss
->p
[pj
].tmatrix_id
)
447 qpms_pr_error("The %d. particle with coords (%lg, %lg, %lg) "
448 "is mapped to %d. another (or itself) with cords (%lg, %lg, %lg) "
449 "without having the required symmetry", (int)pi
,
450 ss
->p
[pi
].pos
.x
, ss
->p
[pi
].pos
.y
, ss
->p
[pi
].pos
.z
,
451 (int)pj
, ss
->p
[pj
].pos
.x
, ss
->p
[pj
].pos
.y
, ss
->p
[pj
].pos
.z
);
454 if (pj
< ss
->p_count
) { // HIT, the particle is transformed to an "existing" one.
456 } else { // MISS, the symmetry transforms the particle to a new location, so add it.
457 qpms_particle_tid_t newparticle
= {newpoint
, new_tmi
};
458 ss
->p
[ss
->p_count
] = newparticle
;
460 #ifdef DUMP_PARTICLE_POSITIONS
462 fprintf(stderr
, "[%.4g, %.4g, %.4g] ", newpoint
.x
, newpoint
.y
, newpoint
.z
);
465 ss
->p_sym_map
[gmi
+ pi
* sym
->order
] = pj
;
468 // Now check whether the particle (result of the symmetry op) is already in the current orbit
469 qpms_ss_orbit_pi_t opj
;
470 for (opj
= 0; opj
< ot_current
.size
; ++opj
)
471 if (current_orbit
[opj
] == pj
) break; // HIT, pj already on current orbit
472 if (opj
== ot_current
.size
) { // MISS, pj is new on the orbit, extend the size and set the T-matrix id
473 current_orbit
[opj
] = pj
;
475 ot_current
.tmatrices
[opj
] = ss
->p
[pj
].tmatrix_id
;
477 ot_current
.action
[gmi
] = opj
;
480 if (new_orbit
) { // Now compare if the new orbit corresponds to some of the existing types.
481 #ifdef DUMP_PARTICLE_POSITIONS
485 for(oti
= 0; oti
< ss
->orbit_type_count
; ++oti
)
486 if (orbit_types_equal(&ot_current
, &(ss
->orbit_types
[oti
]))) break; // HIT, orbit type already exists
487 assert(0 == sym
->order
% ot_current
.size
);
488 if (oti
== ss
->orbit_type_count
) // MISS, add the new orbit type
489 add_orbit_type(ss
, &ot_current
);
491 // Walk through the orbit again and set up the orbit info of the particles
492 for (qpms_ss_orbit_pi_t opi
= 0; opi
< ot_current
.size
; ++opi
) {
493 const qpms_ss_pi_t pi_opi
= current_orbit
[opi
];
494 ss
->p_orbitinfo
[pi_opi
].t
= oti
;
495 ss
->p_orbitinfo
[pi_opi
].p
= opi
;
496 ss
->p_orbitinfo
[pi_opi
].osn
= ss
->orbit_types
[oti
].instance_count
;
498 ss
->orbit_types
[oti
].instance_count
++;
501 // Possibly free some space using the new ss->p_count instead of (old) ss->p_count*sym->order
502 QPMS_CRASHING_REALLOC(ss
->p_sym_map
, sizeof(qpms_ss_pi_t
) * sym
->order
* ss
->p_count
);
503 QPMS_CRASHING_REALLOC(ss
->p
, sizeof(qpms_particle_tid_t
) * ss
->p_count
);
504 QPMS_CRASHING_REALLOC(ss
->p_orbitinfo
, sizeof(qpms_ss_particle_orbitinfo_t
)*ss
->p_count
);
505 ss
->p_capacity
= ss
->p_count
;
507 { // Reallocate the orbit type data space and update the pointers if needed.
508 size_t otspace_sz
= ss
->otspace_end
- ss
->otspace
;
509 char *old_otspace
= ss
->otspace
;
510 QPMS_CRASHING_REALLOC(ss
->otspace
, otspace_sz
);
511 ptrdiff_t shift
= ss
->otspace
- old_otspace
;
513 for (size_t oi
= 0; oi
< ss
->orbit_type_count
; ++oi
) {
514 ss
->orbit_types
[oi
].action
= (void *)(((char *) (ss
->orbit_types
[oi
].action
)) + shift
);
515 ss
->orbit_types
[oi
].tmatrices
= (void *)(((char *) (ss
->orbit_types
[oi
].tmatrices
)) + shift
);
516 ss
->orbit_types
[oi
].irbase_sizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_sizes
)) + shift
);
517 ss
->orbit_types
[oi
].irbase_cumsizes
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_cumsizes
)) + shift
);
518 ss
->orbit_types
[oi
].irbase_offsets
= (void *)(((char *) (ss
->orbit_types
[oi
].irbase_offsets
)) + shift
);
519 ss
->orbit_types
[oi
].irbases
= (void *)(((char *) (ss
->orbit_types
[oi
].irbases
)) + shift
);
521 ss
->otspace_end
+= shift
;
525 // Set ss->fecv_size and ss->fecv_pstarts
527 QPMS_CRASHING_MALLOC(ss
->fecv_pstarts
, ss
->p_count
* sizeof(size_t));
528 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
529 ss
->fecv_pstarts
[pi
] = ss
->fecv_size
;
530 ss
->fecv_size
+= ssw
->tm
[ss
->p
[pi
].tmatrix_id
]->spec
->n
; // That's a lot of dereferencing!
533 QPMS_CRASHING_MALLOC(ss
->saecv_sizes
, sizeof(size_t) * sym
->nirreps
);
534 QPMS_CRASHING_MALLOC(ss
->saecv_ot_offsets
, sizeof(size_t) * sym
->nirreps
* ss
->orbit_type_count
);
535 for(qpms_iri_t iri
= 0; iri
< sym
->nirreps
; ++iri
) {
536 ss
->saecv_sizes
[iri
] = 0;
537 for(qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
538 ss
->saecv_ot_offsets
[iri
* ss
->orbit_type_count
+ oti
] = ss
->saecv_sizes
[iri
];
539 const qpms_ss_orbit_type_t
*ot
= &(ss
->orbit_types
[oti
]);
540 ss
->saecv_sizes
[iri
] += ot
->instance_count
* ot
->irbase_sizes
[iri
];
544 qpms_ss_pi_t p_ot_cumsum
= 0;
545 for (qpms_ss_oti_t oti
= 0; oti
< ss
->orbit_type_count
; ++oti
) {
546 qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
547 ot
->p_offset
= p_ot_cumsum
;
548 p_ot_cumsum
+= ot
->size
* ot
->instance_count
;
551 // Set ss->p_by_orbit[]
552 QPMS_CRASHING_MALLOC(ss
->p_by_orbit
, sizeof(qpms_ss_pi_t
) * ss
->p_count
);
553 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
554 const qpms_ss_particle_orbitinfo_t
*oi
= ss
->p_orbitinfo
+ pi
;
555 const qpms_ss_oti_t oti
= oi
->t
;
556 const qpms_ss_orbit_type_t
*ot
= ss
->orbit_types
+ oti
;
557 ss
->p_by_orbit
[ot
->p_offset
+ ot
->size
* oi
->osn
+ oi
->p
] = pi
;
560 ss
->c
= qpms_trans_calculator_init(lMax
, normalisation
);
566 void qpms_scatsys_free(qpms_scatsys_t
*ss
) {
569 for(qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
570 qpms_tmatrix_operation_clear(&ss
->tm
[tmi
].op
);
574 free(ss
->fecv_pstarts
);
575 free(ss
->tm_sym_map
);
578 free(ss
->p_orbitinfo
);
579 free(ss
->orbit_types
);
580 free(ss
->saecv_ot_offsets
);
581 free(ss
->saecv_sizes
);
582 free(ss
->p_by_orbit
);
583 qpms_trans_calculator_free(ss
->c
);
588 void qpms_scatsys_at_omega_refill(qpms_scatsys_at_omega_t
*ssw
,
589 complex double omega
) {
590 const qpms_scatsys_t
* const ss
= ssw
->ss
;
592 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
593 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
594 qpms_tmatrix_t
**tmatrices_preop
;
595 QPMS_CRASHING_CALLOC(tmatrices_preop
, ss
->tmg_count
, sizeof(*tmatrices_preop
));
596 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
597 tmatrices_preop
[tmgi
] = qpms_tmatrix_init_from_function(ss
->tmg
[tmgi
], omega
);
598 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
)
599 qpms_tmatrix_apply_operation_replace(ssw
->tm
[tmi
], &ss
->tm
[tmi
].op
,
600 tmatrices_preop
[ss
->tm
[tmi
].tmgi
]);
601 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
602 qpms_tmatrix_free(tmatrices_preop
[tmgi
]);
603 free(tmatrices_preop
);
606 qpms_scatsys_at_omega_t
*qpms_scatsys_at_omega(const qpms_scatsys_t
*ss
,
607 complex double omega
) {
609 qpms_scatsys_at_omega_t
*ssw
;
610 QPMS_CRASHING_MALLOC(ssw
, sizeof(*ssw
));
613 ssw
->medium
= qpms_epsmu_generator_eval(ss
->medium
, omega
);
614 ssw
->wavenumber
= qpms_wavenumber(omega
, ssw
->medium
);
615 QPMS_CRASHING_CALLOC(ssw
->tm
, ss
->tm_count
, sizeof(*ssw
->tm
));
616 qpms_tmatrix_t
**tmatrices_preop
;
617 QPMS_CRASHING_CALLOC(tmatrices_preop
, ss
->tmg_count
, sizeof(*tmatrices_preop
));
618 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
619 tmatrices_preop
[tmgi
] = qpms_tmatrix_init_from_function(ss
->tmg
[tmgi
], omega
);
620 for (qpms_ss_tmi_t tmi
= 0; tmi
< ss
->tm_count
; ++tmi
) {
621 ssw
->tm
[tmi
] = qpms_tmatrix_apply_operation(&ss
->tm
[tmi
].op
,
622 tmatrices_preop
[ss
->tm
[tmi
].tmgi
]); //<- main difference to .._refill()
623 QPMS_ENSURE(ssw
->tm
[tmi
],
624 "Got NULL pointer from qpms_tmatrix_apply_operation");
626 for (qpms_ss_tmgi_t tmgi
= 0; tmgi
< ss
->tmg_count
; ++tmgi
)
627 qpms_tmatrix_free(tmatrices_preop
[tmgi
]);
628 free(tmatrices_preop
);
632 void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t
*ssw
) {
635 for(qpms_ss_tmi_t i
= 0; i
< ssw
->ss
->tm_count
; ++i
)
636 qpms_tmatrix_free(ssw
->tm
[i
]);
642 // (copypasta from symmetries.c)
643 // TODO at some point, maybe support also other norms.
644 // (perhaps just use qpms_normalisation_t_factor() at the right places)
645 static inline void check_norm_compat(const qpms_vswf_set_spec_t
*s
)
647 switch (s
->norm
& QPMS_NORMALISATION_NORM_BITS
) {
648 case QPMS_NORMALISATION_NORM_POWER
:
650 case QPMS_NORMALISATION_NORM_SPHARM
:
653 QPMS_WTF
; // Only SPHARM and POWER norms are supported right now.
657 complex double *qpms_orbit_action_matrix(complex double *target
,
658 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
659 const qpms_finite_group_t
*sym
, const qpms_gmi_t g
) {
660 assert(sym
); assert(g
< sym
->order
);
662 assert(ot
); assert(ot
->size
> 0);
663 // check_norm_compat(bspec); not needed here, the qpms_irot3_uvswfi_dense should complain if necessary
664 const size_t n
= bspec
->n
;
665 const qpms_gmi_t N
= ot
->size
;
667 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
668 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
669 complex double tmp
[n
][n
]; // this is the 'per-particle' action
670 qpms_irot3_uvswfi_dense(tmp
[0], bspec
, sym
->rep3d
[g
]);
671 for(qpms_ss_orbit_pi_t Col
= 0; Col
< ot
->size
; ++Col
) {
672 // Row is the 'destination' of the symmetry operation, Col is the 'source'
673 const qpms_ss_orbit_pi_t Row
= ot
->action
[sym
->order
* Col
+ g
];
674 for(size_t row
= 0; row
< bspec
->n
; ++row
)
675 for(size_t col
= 0; col
< bspec
->n
; ++col
)
676 target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
] = conj(tmp
[row
][col
]); //CHECKCONJ
678 #ifdef DUMP_ACTIONMATRIX
679 fprintf(stderr
,"%d: %s\n",
680 (int) g
, (sym
->permrep
&& sym
->permrep
[g
])?
681 sym
->permrep
[g
] : "");
682 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
683 fprintf(stderr
, "--------------------------\n");
684 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
685 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
686 fprintf(stderr
, "| ");
687 for (size_t col
= 0; col
< bspec
->n
; ++col
)
688 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
690 fprintf(stderr
, "|\n");
693 fprintf(stderr
, "-------------------------------\n\n");
699 complex double *qpms_orbit_irrep_projector_matrix(complex double *target
,
700 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
701 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
704 assert(ot
); assert(ot
->size
> 0);
705 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
706 // check_norm_compat(bspec); // probably not needed here, let the called functions complain if necessary, but CHEKME
707 const size_t n
= bspec
->n
;
708 const qpms_gmi_t N
= ot
->size
;
710 QPMS_CRASHING_MALLOC(target
, n
*n
*N
*N
*sizeof(complex double));
711 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
712 // Workspace for the orbit group action matrices
713 complex double *tmp
= malloc(n
*n
*N
*N
*sizeof(complex double));
714 const int d
= sym
->irreps
[iri
].dim
;
715 double prefac
= d
/ (double) sym
->order
;
716 for(int partner
= 0; partner
< d
; ++partner
) {
717 for(qpms_gmi_t g
= 0; g
< sym
->order
; ++g
) {
718 // We use the diagonal elements of D_g
719 complex double D_g_conj
= sym
->irreps
[iri
].m
[g
*d
*d
+ partner
*d
+ partner
];
720 #ifdef DUMP_ACTIONMATRIX
721 fprintf(stderr
,"(factor %+g%+gj) ", creal(D_g_conj
), cimag(D_g_conj
));
723 qpms_orbit_action_matrix(tmp
, ot
, bspec
, sym
, g
);
725 for(size_t i
= 0; i
< n
*n
*N
*N
; ++i
)
726 target
[i
] += prefac
* D_g_conj
* tmp
[i
];
730 #ifdef DUMP_PROJECTORMATRIX
731 fprintf(stderr
,"Projector %d (%s):\n", (int) iri
,
732 sym
->irreps
[iri
].name
?sym
->irreps
[iri
].name
:"");
733 for (size_t Row
= 0; Row
< ot
->size
; ++Row
) {
734 fprintf(stderr
, "--------------------------\n");
735 for (size_t row
= 0; row
< bspec
->n
; ++row
) {
736 for (size_t Col
= 0; Col
< ot
->size
; ++Col
) {
737 fprintf(stderr
, "| ");
738 for (size_t col
= 0; col
< bspec
->n
; ++col
)
739 fprintf(stderr
, "%+2.3f%+2.3fj ", creal(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]),cimag(target
[n
*n
*N
*Row
+ n
*Col
+ n
*N
*row
+ col
]));
741 fprintf(stderr
, "|\n");
744 fprintf(stderr
, "-------------------------------\n\n");
749 #define SVD_ATOL 1e-8
751 complex double *qpms_orbit_irrep_basis(size_t *basis_size
,
752 complex double *target
,
753 const qpms_ss_orbit_type_t
*ot
, const qpms_vswf_set_spec_t
*bspec
,
754 const qpms_finite_group_t
*sym
, const qpms_iri_t iri
) {
757 assert(ot
); assert(ot
->size
> 0);
758 assert(iri
< sym
->nirreps
); assert(sym
->irreps
);
759 check_norm_compat(bspec
); // Here I'm not sure; CHECKME
760 const size_t n
= bspec
->n
;
761 const qpms_gmi_t N
= ot
->size
;
762 const bool newtarget
= (target
== NULL
);
764 QPMS_CRASHING_MALLOC(target
,n
*n
*N
*N
*sizeof(complex double));
765 memset(target
, 0, n
*n
*N
*N
*sizeof(complex double));
767 // Get the projector (also workspace for right sg. vect.)
768 complex double *projector
= qpms_orbit_irrep_projector_matrix(NULL
,
769 ot
, bspec
, sym
, iri
);
770 QPMS_ENSURE(projector
!= NULL
, "got NULL from qpms_orbit_irrep_projector_matrix()");
771 // Workspace for the right singular vectors.
772 complex double *V_H
; QPMS_CRASHING_MALLOC(V_H
, n
*n
*N
*N
*sizeof(complex double));
773 // THIS SHOULD NOT BE NECESSARY
774 complex double *U
; QPMS_CRASHING_MALLOC(U
, n
*n
*N
*N
*sizeof(complex double));
775 double *s
; QPMS_CRASHING_MALLOC(s
, n
*N
*sizeof(double));
777 QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR
,
778 'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
779 n
*N
/* m */, n
*N
/* n */, projector
/* a */, n
*N
/* lda */,
780 s
/* s */, U
/* u */, n
*N
/* ldu, irrelev. */, V_H
/* vt */,
784 for(bs
= 0; bs
< n
*N
; ++bs
) {
785 QPMS_ENSURE(s
[bs
] <= 1 + SVD_ATOL
, "%zd. SV too large: %.16lf", bs
, s
[bs
]);
786 QPMS_ENSURE(!(s
[bs
] > SVD_ATOL
&& fabs(1-s
[bs
]) > SVD_ATOL
),
787 "%zd. SV in the 'wrong' interval: %.16lf", bs
, s
[bs
]);
788 if(s
[bs
] < SVD_ATOL
) break;
791 memcpy(target
, V_H
, bs
*N
*n
*sizeof(complex double));
792 if(newtarget
) QPMS_CRASHING_REALLOC(target
, bs
*N
*n
*sizeof(complex double));
793 if(basis_size
) *basis_size
= bs
;
802 complex double *qpms_scatsys_irrep_transform_matrix(complex double *U
,
803 const qpms_scatsys_t
*ss
, qpms_iri_t iri
) {
804 const size_t packedlen
= ss
->saecv_sizes
[iri
];
805 const size_t full_len
= ss
->fecv_size
;
807 QPMS_CRASHING_MALLOC(U
,full_len
* packedlen
* sizeof(complex double));
808 memset(U
, 0, full_len
* packedlen
* sizeof(complex double));
810 size_t fullvec_offset
= 0;
812 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
813 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
814 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
815 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
816 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
817 // This is where the particle's orbit starts in the "packed" vector:
818 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
819 + osn
* ot
->irbase_sizes
[iri
];
820 // Orbit coeff vector's full size:
821 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
822 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
823 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
824 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
826 for (size_t prow
= 0; prow
< orbit_packedsize
; ++prow
)
827 for (size_t pcol
= 0; pcol
< ot
->bspecn
; ++pcol
)
828 U
[full_len
* (packed_orbit_offset
+ prow
) + (fullvec_offset
+ pcol
)]
829 = om
[orbit_fullsize
* prow
+ (opi
* ot
->bspecn
+ pcol
)];
830 fullvec_offset
+= ot
->bspecn
;
836 complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed
,
837 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
839 const size_t packedlen
= ss
->saecv_sizes
[iri
];
840 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
841 return target_packed
;
842 const size_t full_len
= ss
->fecv_size
;
843 if (target_packed
== NULL
)
844 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
845 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
847 // Workspace for the intermediate matrix
849 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
851 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
853 const complex double one
= 1, zero
= 0;
857 CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
858 full_len
/*M*/, packedlen
/*N*/, full_len
/*K*/,
859 &one
/*alpha*/, orig_full
/*A*/, full_len
/*ldA*/,
860 U
/*B*/, full_len
/*ldB*/,
861 &zero
/*beta*/, tmp
/*C*/, packedlen
/*LDC*/);
863 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
864 packedlen
/*M*/, packedlen
/*N*/, full_len
/*K*/,
865 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
866 tmp
/*B*/, packedlen
/*ldB*/, &zero
/*beta*/,
867 target_packed
/*C*/, packedlen
/*ldC*/);
871 return target_packed
;
874 /// Transforms a big "packed" matrix into the full basis (trivial implementation).
875 complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full
,
876 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
877 qpms_iri_t iri
, bool add
){
878 const size_t packedlen
= ss
->saecv_sizes
[iri
];
879 const size_t full_len
= ss
->fecv_size
;
880 if (target_full
== NULL
)
881 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
882 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
884 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
886 // Workspace for the intermediate matrix result
888 QPMS_CRASHING_MALLOC(tmp
, full_len
* packedlen
* sizeof(complex double));
890 complex double *U
= qpms_scatsys_irrep_transform_matrix(NULL
, ss
, iri
);
892 const complex double one
= 1, zero
= 0;
896 CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
897 packedlen
/*M*/, full_len
/*N*/, packedlen
/*K*/,
898 &one
/*alpha*/, orig_packed
/*A*/, packedlen
/*ldA*/,
899 U
/*B*/, full_len
/*ldB*/,
900 &zero
/*beta*/, tmp
/*C*/, full_len
/*LDC*/);
902 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
903 full_len
/*M*/, full_len
/*N*/, packedlen
/*K*/,
904 &one
/*alpha*/, U
/*A*/, full_len
/*ldA*/,
905 tmp
/*B*/, full_len
/*ldB*/, &one
/*beta*/,
906 target_full
/*C*/, full_len
/*ldC*/);
912 complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed
,
913 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
915 const size_t packedlen
= ss
->saecv_sizes
[iri
];
916 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
917 return target_packed
;
918 const size_t full_len
= ss
->fecv_size
;
919 if (target_packed
== NULL
)
920 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
921 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
923 // Workspace for the intermediate particle-orbit matrix result
925 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
927 const complex double one
= 1, zero
= 0;
929 size_t fullvec_offsetR
= 0;
930 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
931 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
932 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
933 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
934 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
935 // This is where the particle's orbit starts in the "packed" vector:
936 const size_t packed_orbit_offsetR
=
937 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
938 + osnR
* otR
->irbase_sizes
[iri
];
939 // Orbit coeff vector's full size:
940 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
941 const size_t particle_fullsizeR
= otR
->bspecn
;
942 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
943 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
944 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
946 size_t fullvec_offsetC
= 0;
947 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
948 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
949 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
950 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
951 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
952 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
953 // This is where the particle's orbit starts in the "packed" vector:
954 const size_t packed_orbit_offsetC
=
955 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
956 + osnC
* otC
->irbase_sizes
[iri
];
957 // Orbit coeff vector's full size:
958 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
959 const size_t particle_fullsizeC
= otC
->bspecn
;
960 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
961 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
962 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
964 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
965 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
966 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
967 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
968 &one
/*alpha*/, orig_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*A*/,
970 omC
+ opiC
*particle_fullsizeC
/*B*/,
971 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
972 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
974 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
975 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
976 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
977 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
978 orbit_fullsizeR
/*ldA*/,
979 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
980 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
983 fullvec_offsetC
+= otC
->bspecn
;
986 fullvec_offsetR
+= otR
->bspecn
;
989 return target_packed
;
993 /// Transforms a big "packed" matrix into the full basis.
995 complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full
,
996 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
997 qpms_iri_t iri
, bool add
){
998 const size_t packedlen
= ss
->saecv_sizes
[iri
];
999 const size_t full_len
= ss
->fecv_size
;
1000 if (target_full
== NULL
)
1001 QPMS_CRASHING_MALLOC(target_full
, SQ(full_len
)*sizeof(complex double));
1002 if(!add
) memset(target_full
, 0, SQ(full_len
)*sizeof(complex double));
1004 if(!packedlen
) return target_full
; // Empty irrep, do nothing.
1006 // Workspace for the intermediate particle-orbit matrix result
1007 complex double *tmp
;
1008 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1010 const complex double one
= 1, zero
= 0;
1012 size_t fullvec_offsetR
= 0;
1013 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
1014 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
1015 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1016 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
1017 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
1018 // This is where the particle's orbit starts in the "packed" vector:
1019 const size_t packed_orbit_offsetR
=
1020 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1021 + osnR
* otR
->irbase_sizes
[iri
];
1022 // Orbit coeff vector's full size:
1023 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1024 const size_t particle_fullsizeR
= otR
->bspecn
;
1025 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1026 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1027 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1029 size_t fullvec_offsetC
= 0;
1030 if (orbit_packedsizeR
) // avoid crash on empty irrep
1031 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1032 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1033 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1034 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1035 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1036 // This is where the particle's orbit starts in the "packed" vector:
1037 const size_t packed_orbit_offsetC
=
1038 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1039 + osnC
* otC
->irbase_sizes
[iri
];
1040 // Orbit coeff vector's full size:
1041 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1042 const size_t particle_fullsizeC
= otC
->bspecn
;
1043 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1044 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1045 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1046 if (orbit_packedsizeC
) { // avoid crash on empty irrep
1048 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC]
1049 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1050 orbit_packedsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeC
/*K*/,
1051 &one
/*alpha*/, orig_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*A*/,
1053 omC
+ opiC
*particle_fullsizeC
/*B*/,
1054 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1055 tmp
/*C*/, particle_fullsizeC
/*LDC*/);
1057 // target[oiR|piR,oiC|piC] += U*[...] tmp[...]
1058 cblas_zgemm(CblasRowMajor
, CblasConjTrans
, CblasNoTrans
,
1059 particle_fullsizeR
/*M*/, particle_fullsizeC
/*N*/, orbit_packedsizeR
/*K*/,
1060 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/,
1061 orbit_fullsizeR
/*ldA*/,
1062 tmp
/*B*/, particle_fullsizeC
/*ldB*/, &one
/*beta*/,
1063 target_full
+ full_len
*fullvec_offsetR
+ fullvec_offsetC
/*C*/,
1066 fullvec_offsetC
+= otC
->bspecn
;
1068 fullvec_offsetR
+= otR
->bspecn
;
1075 /// Projects a "big" vector onto an irrep.
1077 complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed
,
1078 const complex double *orig_full
, const qpms_scatsys_t
*ss
,
1079 const qpms_iri_t iri
) {
1080 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1081 if (!packedlen
) return target_packed
; // Empty irrep
1082 if (target_packed
== NULL
)
1083 QPMS_CRASHING_MALLOC(target_packed
, packedlen
*sizeof(complex double));
1084 memset(target_packed
, 0, packedlen
*sizeof(complex double));
1086 const complex double one
= 1;
1088 size_t fullvec_offset
= 0;
1089 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1090 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
1091 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
1092 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
1093 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
1094 // This is where the particle's orbit starts in the "packed" vector:
1095 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
1096 + osn
* ot
->irbase_sizes
[iri
];
1097 // Orbit coeff vector's full size:
1098 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
1099 const size_t particle_fullsize
= ot
->bspecn
;
1100 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
1101 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1102 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
1103 if (orbit_packedsize
) // avoid crash on empty irrep
1104 cblas_zgemv(CblasRowMajor
/*order*/, CblasNoTrans
/*transA*/,
1105 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/,
1106 om
+ opi
*particle_fullsize
/*A*/, orbit_fullsize
/*lda*/,
1107 orig_full
+fullvec_offset
/*X*/, 1/*incX*/,
1108 &one
/*beta*/, target_packed
+packed_orbit_offset
/*Y*/, 1/*incY*/);
1109 fullvec_offset
+= ot
->bspecn
;
1111 return target_packed
;
1114 /// Transforms a big "packed" vector into the full basis.
1116 complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full
,
1117 const complex double *orig_packed
, const qpms_scatsys_t
*ss
,
1118 const qpms_iri_t iri
, bool add
) {
1119 const size_t full_len
= ss
->fecv_size
;
1120 if (target_full
== NULL
)
1121 QPMS_CRASHING_MALLOC(target_full
, full_len
*sizeof(complex double));
1122 if (!add
) memset(target_full
, 0, full_len
*sizeof(complex double));
1124 const complex double one
= 1;
1125 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1126 if (!packedlen
) return target_full
; // Completely empty irrep
1128 size_t fullvec_offset
= 0;
1129 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1130 const qpms_ss_oti_t oti
= ss
->p_orbitinfo
[pi
].t
;
1131 const qpms_ss_orbit_type_t
*const ot
= ss
->orbit_types
+ oti
;
1132 const qpms_ss_osn_t osn
= ss
->p_orbitinfo
[pi
].osn
;
1133 const qpms_ss_orbit_pi_t opi
= ss
->p_orbitinfo
[pi
].p
;
1134 // This is where the particle's orbit starts in the "packed" vector:
1135 const size_t packed_orbit_offset
= ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ oti
]
1136 + osn
* ot
->irbase_sizes
[iri
];
1137 // Orbit coeff vector's full size:
1138 const size_t orbit_fullsize
= ot
->size
* ot
->bspecn
;
1139 const size_t particle_fullsize
= ot
->bspecn
;
1140 const size_t orbit_packedsize
= ot
->irbase_sizes
[iri
];
1141 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1142 const complex double *om
= ot
->irbases
+ ot
->irbase_offsets
[iri
];
1144 if (orbit_packedsize
) // empty irrep, avoid zgemv crashing.
1145 cblas_zgemv(CblasRowMajor
/*order*/, CblasConjTrans
/*transA*/,
1146 orbit_packedsize
/*M*/, particle_fullsize
/*N*/, &one
/*alpha*/, om
+ opi
*particle_fullsize
/*A*/,
1147 orbit_fullsize
/*lda*/, orig_packed
+packed_orbit_offset
/*X*/, 1/*incX*/, &one
/*beta*/,
1148 target_full
+fullvec_offset
/*Y*/, 1/*incY*/);
1150 fullvec_offset
+= ot
->bspecn
;
1155 complex double *qpms_scatsys_build_translation_matrix_full(
1156 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1157 complex double *target
,
1158 const qpms_scatsys_t
*ss
,
1159 complex double k
///< Wave number to use in the translation matrix.
1162 return qpms_scatsys_build_translation_matrix_e_full(
1163 target
, ss
, k
, QPMS_HANKEL_PLUS
);
1166 complex double *qpms_scatsyswk_build_translation_matrix_full(
1167 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1168 complex double *target
,
1169 const qpms_scatsys_at_omega_k_t
*sswk
1172 const qpms_scatsys_at_omega_t
*ssw
= sswk
->ssw
;
1173 const complex double wavenumber
= ssw
->wavenumber
;
1174 const qpms_scatsys_t
*ss
= ssw
->ss
;
1175 qpms_ss_ensure_periodic(ss
);
1176 const cart3_t k_cart3
= cart3_from_double_array(sswk
->k
);
1177 return qpms_scatsys_periodic_build_translation_matrix_full(target
, ss
, wavenumber
, &k_cart3
, sswk
->eta
);
1180 complex double *qpms_scatsys_build_translation_matrix_e_full(
1181 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1182 complex double *target
,
1183 const qpms_scatsys_t
*ss
,
1184 complex double k
, ///< Wave number to use in the translation matrix.
1185 qpms_bessel_t J
///< Bessel function type.
1188 qpms_ss_ensure_nonperiodic(ss
);
1189 const size_t full_len
= ss
->fecv_size
;
1191 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1192 memset(target
, 0, SQ(full_len
) * sizeof(complex double)); //unnecessary?
1193 { // Non-diagonal part; M[piR, piC] = T[piR] S(piR<-piC)
1194 size_t fullvec_offsetR
= 0;
1195 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1196 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, piR
);
1197 const cart3_t posR
= ss
->p
[piR
].pos
;
1198 size_t fullvec_offsetC
= 0;
1199 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1200 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1201 if(piC
!= piR
) { // The diagonal will be dealt with later.
1202 const cart3_t posC
= ss
->p
[piC
].pos
;
1203 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1204 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
,
1205 bspecR
, full_len
, bspecC
, 1,
1208 fullvec_offsetC
+= bspecC
->n
;
1210 assert(fullvec_offsetC
== full_len
);
1211 fullvec_offsetR
+= bspecR
->n
;
1213 assert(fullvec_offsetR
== full_len
);
1219 static inline int qpms_ss_ppair_W32xy(const qpms_scatsys_t
*ss
,
1220 qpms_ss_pi_t pdest
, qpms_ss_pi_t psrc
, complex double wavenumber
, const cart2_t kvector
,
1221 complex double *target
, const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
1222 qpms_ewald_part parts
, double eta
) {
1223 const qpms_vswf_set_spec_t
*srcspec
= qpms_ss_bspec_pi(ss
, psrc
);
1224 const qpms_vswf_set_spec_t
*destspec
= qpms_ss_bspec_pi(ss
, pdest
);
1226 // This might be a bit arbitrary, roughly "copied" from Unitcell constructor. TODO review.
1227 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
1228 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
1230 return qpms_trans_calculator_get_trans_array_e32_e(ss
->c
,
1231 target
, NULL
/*err*/, destspec
, deststride
, srcspec
, srcstride
,
1233 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
1235 cart3_substract(ss
->p
[pdest
].pos
, ss
->p
[psrc
].pos
),
1239 static inline int qpms_ss_ppair_W(const qpms_scatsys_t
*ss
,
1240 qpms_ss_pi_t pdest
, qpms_ss_pi_t psrc
, complex double wavenumber
, const double wavevector
[],
1241 complex double *target
, const ptrdiff_t deststride
, const ptrdiff_t srcstride
,
1242 qpms_ewald_part parts
, double eta
) {
1243 if(ss
->lattice_dimension
== 2 && // Currently, we can only the xy-plane
1244 !ss
->per
.lattice_basis
[0].z
&& !ss
->per
.lattice_basis
[1].z
&&
1246 return qpms_ss_ppair_W32xy(ss
, pdest
, psrc
, wavenumber
, cart2_from_double_array(wavevector
),
1247 target
, deststride
, srcstride
, parts
, eta
);
1249 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1252 complex double *qpms_scatsys_periodic_build_translation_matrix_full(
1253 complex double *target
, const qpms_scatsys_t
*ss
,
1254 complex double wavenumber
, const cart3_t
*wavevector
, double eta
) {
1256 qpms_ss_ensure_periodic(ss
);
1257 if (eta
== 0 || isnan(eta
)) {
1259 cart3_to_double_array(tmp
, *wavevector
);
1260 eta
= qpms_ss_adjusted_eta(ss
, wavenumber
, tmp
);
1263 const size_t full_len
= ss
->fecv_size
;
1265 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1266 const ptrdiff_t deststride
= ss
->fecv_size
, srcstride
= 1;
1267 // We have some limitations in the current implementation
1268 if(ss
->lattice_dimension
== 2 && // Currently, we can only the xy-plane
1269 !ss
->per
.lattice_basis
[0].z
&& !ss
->per
.lattice_basis
[1].z
&&
1271 for (qpms_ss_pi_t pd
= 0; pd
< ss
->p_count
; ++pd
)
1272 for (qpms_ss_pi_t ps
= 0; ps
< ss
->p_count
; ++ps
) {
1273 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W32xy(ss
, pd
, ps
, wavenumber
, cart3xy2cart2(*wavevector
),
1274 target
+ deststride
* ss
->fecv_pstarts
[pd
] + srcstride
* ss
->fecv_pstarts
[ps
],
1275 deststride
, srcstride
, QPMS_EWALD_FULL
, eta
));
1278 QPMS_NOT_IMPLEMENTED("Only 2D xy-lattices currently supported");
1282 // Common implementation of qpms_scatsysw[k]_build_modeproblem_matrix_full
1283 static inline complex double *qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1284 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1285 complex double *target
,
1286 const qpms_scatsys_at_omega_t
*ssw
,
1287 const double k
[], // NULL if non-periodic
1288 const double eta
// ignored if non-periodic
1291 const complex double wavenumber
= ssw
->wavenumber
;
1292 const qpms_scatsys_t
*ss
= ssw
->ss
;
1293 const size_t full_len
= ss
->fecv_size
;
1295 QPMS_CRASHING_MALLOC(target
, SQ(full_len
) * sizeof(complex double));
1296 complex double *tmp
; // translation matrix, S or W
1297 QPMS_CRASHING_MALLOC(tmp
, SQ(ss
->max_bspecn
) * sizeof(complex double));
1298 const complex double zero
= 0, minusone
= -1;
1299 { // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
1300 size_t fullvec_offsetR
= 0;
1301 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) {
1302 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1303 const cart3_t posR
= ss
->p
[piR
].pos
;
1304 size_t fullvec_offsetC
= 0;
1305 // dest particle T-matrix
1306 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1307 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) {
1308 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1309 if (k
== NULL
) { // non-periodic case
1310 if(piC
!= piR
) { // No "self-interaction" in non-periodic case
1311 const cart3_t posC
= ss
->p
[piC
].pos
;
1312 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1313 tmp
, // tmp is S(piR<-piC)
1314 bspecR
, bspecC
->n
, bspecC
, 1,
1315 wavenumber
, posR
, posC
, QPMS_HANKEL_PLUS
));
1317 } else { // periodic case
1318 QPMS_ENSURE_SUCCESS(qpms_ss_ppair_W(ss
, piR
, piC
, wavenumber
, k
,
1319 tmp
/*target*/, bspecC
->n
/*deststride*/, 1 /*srcstride*/,
1320 QPMS_EWALD_FULL
, eta
));
1323 if (k
== NULL
&& piC
== piR
) {
1324 // non-periodic case diagonal block: no "self-interaction", just
1325 // fill with zeroes (the ones on the diagonal are added in the very end)
1326 QPMS_ENSURE_SUCCESS(LAPACKE_zlaset(CblasRowMajor
, 'x',
1327 bspecR
->n
/*m*/, bspecC
->n
/*n*/, 0 /*alfa: offdiag*/, 0 /*beta: diag*/,
1328 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
/*a*/,
1330 } else cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1331 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1332 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1333 tmp
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1334 target
+ fullvec_offsetR
*full_len
+ fullvec_offsetC
/*c*/,
1337 fullvec_offsetC
+= bspecC
->n
;
1339 fullvec_offsetR
+= bspecR
->n
;
1343 // Add the identity, diagonal part M[pi,pi] += 1
1344 for (size_t i
= 0; i
< full_len
; ++i
) target
[full_len
* i
+ i
] += 1;
1350 complex double *qpms_scatsysw_build_modeproblem_matrix_full(
1351 complex double *target
, const qpms_scatsys_at_omega_t
*ssw
) {
1352 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_build_modeproblem_matrix_full()");
1353 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(
1354 target
, ssw
, NULL
, NAN
);
1357 complex double *qpms_scatsyswk_build_modeproblem_matrix_full(
1358 complex double *target
, const qpms_scatsys_at_omega_k_t
*sswk
)
1360 qpms_ss_ensure_periodic_a(sswk
->ssw
->ss
, "qpms_scatsysw_build_modeproblem_matrix_full()");
1361 return qpms_scatsysw_scatsyswk_build_modeproblem_matrix_full(target
, sswk
->ssw
, sswk
->k
, sswk
->eta
);
1365 // Serial reference implementation.
1366 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_serial(
1367 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1368 complex double *target_packed
,
1369 const qpms_scatsys_at_omega_t
*ssw
,
1373 const qpms_scatsys_t
*ss
= ssw
->ss
;
1374 qpms_ss_ensure_nonperiodic(ss
);
1375 const complex double k
= ssw
->wavenumber
;
1376 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1377 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1378 return target_packed
;
1379 if (target_packed
== NULL
)
1380 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1381 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1383 // some of the following workspaces are probably redundant; TODO optimize later.
1385 // workspaces for the uncompressed particle<-particle tranlation matrix block
1386 // and the result of multiplying with a T-matrix (times -1)
1387 complex double *Sblock
, *TSblock
;
1388 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1389 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1391 // Workspace for the intermediate particle-orbit matrix result
1392 complex double *tmp
;
1393 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1395 const complex double one
= 1, zero
= 0, minusone
= -1;
1397 for(qpms_ss_pi_t piR
= 0; piR
< ss
->p_count
; ++piR
) { //Row loop
1398 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[piR
].t
;
1399 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1400 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[piR
].osn
;
1401 const qpms_ss_orbit_pi_t opiR
= ss
->p_orbitinfo
[piR
].p
;
1402 // This is where the particle's orbit starts in the "packed" vector:
1403 const size_t packed_orbit_offsetR
=
1404 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1405 + osnR
* otR
->irbase_sizes
[iri
];
1406 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->spec
;
1407 // Orbit coeff vector's full size:
1408 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1409 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1410 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1411 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1412 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1413 const cart3_t posR
= ss
->p
[piR
].pos
;
1414 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1415 // dest particle T-matrix
1416 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1417 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1418 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1419 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1420 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1421 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1422 // This is where the particle's orbit starts in the "packed" vector:
1423 const size_t packed_orbit_offsetC
=
1424 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1425 + osnC
* otC
->irbase_sizes
[iri
];
1426 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1427 // Orbit coeff vector's full size:
1428 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1429 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1430 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1431 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1432 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1434 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1435 if(piC
!= piR
) { // non-diagonal, calculate TS
1436 const cart3_t posC
= ss
->p
[piC
].pos
;
1437 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1438 Sblock
, // Sblock is S(piR->piC)
1439 bspecR
, bspecC
->n
, bspecC
, 1,
1440 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1442 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1443 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1444 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1445 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1446 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1447 } else { // diagonal, fill with diagonal +1
1448 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1449 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1450 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1453 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1454 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1455 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1456 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1457 omC
+ opiC
*particle_fullsizeC
/*B*/,
1458 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1459 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1461 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1462 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1463 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1464 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1465 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1466 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1475 return target_packed
;
1478 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
1479 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1480 complex double *target_packed
,
1481 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1484 const qpms_scatsys_t
*ss
= ssw
->ss
;
1485 qpms_ss_ensure_nonperiodic(ss
);
1486 const complex double k
= ssw
->wavenumber
;
1487 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1488 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1489 return target_packed
;
1490 if (target_packed
== NULL
)
1491 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1492 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1494 // some of the following workspaces are probably redundant; TODO optimize later.
1496 // workspaces for the uncompressed particle<-particle tranlation matrix block
1497 // and the result of multiplying with a T-matrix (times -1)
1498 complex double *Sblock
, *TSblock
;
1499 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1500 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1502 // Workspace for the intermediate particle-orbit matrix result
1503 complex double *tmp
;
1504 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1506 const complex double one
= 1, zero
= 0, minusone
= -1;
1508 for(qpms_ss_pi_t opistartR
= 0; opistartR
< ss
->p_count
;
1509 opistartR
+= ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
//orbit_p_countR; might write a while() instead
1511 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1512 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1513 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1514 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1515 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1516 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1518 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1519 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1520 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1521 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1522 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1523 // Orbit coeff vector's full size:
1524 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1525 // This is where the orbit starts in the "packed" vector:
1526 const size_t packed_orbit_offsetR
=
1527 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1528 + osnR
* otR
->irbase_sizes
[iri
];
1529 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1530 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1531 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1532 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1533 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1534 const cart3_t posR
= ss
->p
[piR
].pos
;
1535 // dest particle T-matrix
1536 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1537 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1538 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1539 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1540 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1541 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1542 // This is where the particle's orbit starts in the "packed" vector:
1543 const size_t packed_orbit_offsetC
=
1544 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1545 + osnC
* otC
->irbase_sizes
[iri
];
1546 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1547 // Orbit coeff vector's full size:
1548 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1549 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1550 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1551 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1552 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1554 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1555 if(piC
!= piR
) { // non-diagonal, calculate TS
1556 const cart3_t posC
= ss
->p
[piC
].pos
;
1557 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1558 Sblock
, // Sblock is S(piR->piC)
1559 bspecR
, bspecC
->n
, bspecC
, 1,
1560 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1562 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1563 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1564 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1565 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1566 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1567 } else { // diagonal, fill with diagonal +1
1568 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1569 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1570 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1573 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1574 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1575 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1576 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1577 omC
+ opiC
*particle_fullsizeC
/*B*/,
1578 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1579 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1581 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1582 cblas_zgemm(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1583 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1584 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1585 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1586 target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1596 return target_packed
;
1599 struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
{
1600 const qpms_scatsys_at_omega_t
*ssw
;
1601 qpms_ss_pi_t
*opistartR_ptr
;
1602 pthread_mutex_t
*opistartR_mutex
;
1604 complex double *target_packed
;
1607 static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg
)
1609 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1611 const qpms_scatsys_at_omega_t
*ssw
= a
->ssw
;
1612 const complex double k
= ssw
->wavenumber
;
1613 const qpms_scatsys_t
*ss
= ssw
->ss
;
1614 const qpms_iri_t iri
= a
->iri
;
1615 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1617 // some of the following workspaces are probably redundant; TODO optimize later.
1619 // workspaces for the uncompressed particle<-particle tranlation matrix block
1620 // and the result of multiplying with a T-matrix (times -1)
1621 complex double *Sblock
, *TSblock
;
1622 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1623 QPMS_CRASHING_MALLOC(TSblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1625 // Workspace for the intermediate particle-orbit matrix result
1626 complex double *tmp
;
1627 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1629 const complex double one
= 1, zero
= 0, minusone
= -1;
1632 // In the beginning, pick a target (row) orbit for this thread
1633 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1634 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1635 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1638 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1639 // Now increment it for another thread:
1640 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1641 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1643 // Orbit picked (defined by opistartR), do the work.
1644 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1645 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1646 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1647 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1648 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1649 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1651 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1652 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1653 const qpms_vswf_set_spec_t
*bspecR
= ssw
->tm
[ss
->p
[orbitstartpiR
].tmatrix_id
]->spec
;
1654 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1655 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1656 // Orbit coeff vector's full size:
1657 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1658 // This is where the orbit starts in the "packed" vector:
1659 const size_t packed_orbit_offsetR
=
1660 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1661 + osnR
* otR
->irbase_sizes
[iri
];
1662 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1663 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1664 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1665 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1666 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1667 const cart3_t posR
= ss
->p
[piR
].pos
;
1668 // dest particle T-matrix
1669 const complex double *tmmR
= ssw
->tm
[ss
->p
[piR
].tmatrix_id
]->m
;
1670 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1671 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1672 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1673 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1674 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1675 // This is where the particle's orbit starts in the "packed" vector:
1676 const size_t packed_orbit_offsetC
=
1677 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1678 + osnC
* otC
->irbase_sizes
[iri
];
1679 const qpms_vswf_set_spec_t
*bspecC
= ssw
->tm
[ss
->p
[piC
].tmatrix_id
]->spec
;
1680 // Orbit coeff vector's full size:
1681 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1682 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1683 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1684 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1685 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1687 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1688 if(piC
!= piR
) { // non-diagonal, calculate TS
1689 const cart3_t posC
= ss
->p
[piC
].pos
;
1690 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1691 Sblock
, // Sblock is S(piR->piC)
1692 bspecR
, bspecC
->n
, bspecC
, 1,
1693 k
, posR
, posC
, QPMS_HANKEL_PLUS
));
1695 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1696 bspecR
->n
/*m*/, bspecC
->n
/*n*/, bspecR
->n
/*k*/,
1697 &minusone
/*alpha*/, tmmR
/*a*/, bspecR
->n
/*lda*/,
1698 Sblock
/*b*/, bspecC
->n
/*ldb*/, &zero
/*beta*/,
1699 TSblock
/*c*/, bspecC
->n
/*ldc*/);
1700 } else { // diagonal, fill with diagonal +1
1701 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1702 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1703 TSblock
[row
* bspecC
->n
+ col
] = (row
== col
)? +1 : 0;
1706 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1707 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1708 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1709 &one
/*alpha*/, TSblock
/*A*/, particle_fullsizeC
/*ldA*/,
1710 omC
+ opiC
*particle_fullsizeC
/*B*/,
1711 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1712 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1714 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1715 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1716 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1717 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1718 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1719 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1735 // this differs from the ...build_modeproblem_matrix... only by the `J`
1736 // maybe I should use this one there as well to save lines... TODO
1737 struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
{
1738 const qpms_scatsys_t
*ss
;
1739 qpms_ss_pi_t
*opistartR_ptr
;
1740 pthread_mutex_t
*opistartR_mutex
;
1742 complex double *target_packed
;
1747 static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread(void *arg
)
1749 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1751 const qpms_scatsys_t
*ss
= a
->ss
;
1752 const qpms_iri_t iri
= a
->iri
;
1753 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1754 const qpms_bessel_t J
= a
->J
;
1756 // some of the following workspaces are probably redundant; TODO optimize later.
1758 // workspace for the uncompressed particle<-particle tranlation matrix block
1759 complex double *Sblock
;
1760 QPMS_CRASHING_MALLOC(Sblock
, sizeof(complex double)*SQ(ss
->max_bspecn
));
1762 // Workspace for the intermediate particle-orbit matrix result
1763 complex double *tmp
;
1764 QPMS_CRASHING_MALLOC(tmp
, sizeof(complex double) * SQ(ss
->max_bspecn
) * ss
->sym
->order
);
1766 const complex double one
= 1, zero
= 0;
1769 // In the beginning, pick a target (row) orbit for this thread
1770 QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a
->opistartR_mutex
));
1771 if(*(a
->opistartR_ptr
) >= ss
->p_count
) {// Everything is already done, end
1772 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1775 const qpms_ss_pi_t opistartR
= *(a
->opistartR_ptr
);
1776 // Now increment it for another thread:
1777 *(a
->opistartR_ptr
) += ss
->orbit_types
[ss
->p_orbitinfo
[ss
->p_by_orbit
[opistartR
]].t
].size
;
1778 QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a
->opistartR_mutex
));
1780 // Orbit picked (defined by opistartR), do the work.
1781 const qpms_ss_pi_t orbitstartpiR
= ss
->p_by_orbit
[opistartR
];
1782 const qpms_ss_oti_t otiR
= ss
->p_orbitinfo
[orbitstartpiR
].t
;
1783 const qpms_ss_osn_t osnR
= ss
->p_orbitinfo
[orbitstartpiR
].osn
;
1784 const qpms_ss_orbit_type_t
*const otR
= ss
->orbit_types
+ otiR
;
1785 const qpms_ss_orbit_pi_t orbit_p_countR
= otR
->size
;
1786 const size_t orbit_packedsizeR
= otR
->irbase_sizes
[iri
];
1788 if(orbit_packedsizeR
) { // avoid zgemm crash on empty irrep
1789 const size_t particle_fullsizeR
= otR
->bspecn
; // == bspecR->n
1790 const qpms_vswf_set_spec_t
*bspecR
= qpms_ss_bspec_pi(ss
, orbitstartpiR
);
1791 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1792 const complex double *omR
= otR
->irbases
+ otR
->irbase_offsets
[iri
];
1793 // Orbit coeff vector's full size:
1794 const size_t orbit_fullsizeR
= otR
->size
* otR
->bspecn
;
1795 // This is where the orbit starts in the "packed" vector:
1796 const size_t packed_orbit_offsetR
=
1797 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiR
]
1798 + osnR
* otR
->irbase_sizes
[iri
];
1799 for(qpms_ss_orbit_pi_t opiR
= 0; opiR
< orbit_p_countR
; ++opiR
) {
1800 qpms_ss_pi_t piR
= ss
->p_by_orbit
[opistartR
+ opiR
];
1801 assert(opiR
== ss
->p_orbitinfo
[piR
].p
);
1802 assert(otiR
== ss
->p_orbitinfo
[piR
].t
);
1803 assert(ss
->p_orbitinfo
[piR
].osn
== osnR
);
1804 const cart3_t posR
= ss
->p
[piR
].pos
;
1805 for(qpms_ss_pi_t piC
= 0; piC
< ss
->p_count
; ++piC
) { //Column loop
1806 const qpms_ss_oti_t otiC
= ss
->p_orbitinfo
[piC
].t
;
1807 const qpms_ss_orbit_type_t
*const otC
= ss
->orbit_types
+ otiC
;
1808 const qpms_ss_osn_t osnC
= ss
->p_orbitinfo
[piC
].osn
;
1809 const qpms_ss_orbit_pi_t opiC
= ss
->p_orbitinfo
[piC
].p
;
1810 // This is where the particle's orbit starts in the "packed" vector:
1811 const size_t packed_orbit_offsetC
=
1812 ss
->saecv_ot_offsets
[iri
*ss
->orbit_type_count
+ otiC
]
1813 + osnC
* otC
->irbase_sizes
[iri
];
1814 const qpms_vswf_set_spec_t
*bspecC
= qpms_ss_bspec_pi(ss
, piC
);
1815 // Orbit coeff vector's full size:
1816 const size_t orbit_fullsizeC
= otC
->size
* otC
->bspecn
;
1817 const size_t particle_fullsizeC
= otC
->bspecn
; // == bspecC->n
1818 const size_t orbit_packedsizeC
= otC
->irbase_sizes
[iri
];
1819 // This is the orbit-level matrix projecting the whole orbit onto the irrep.
1820 const complex double *omC
= otC
->irbases
+ otC
->irbase_offsets
[iri
];
1822 if(orbit_packedsizeC
) { // avoid zgemm crash on empty irrep
1823 // THIS IS THE MAIN PART DIFFERENT FROM ...modeproblem...() TODO unify
1824 // somehow to save lines
1825 if(piC
!= piR
) { // non-diagonal, calculate S
1826 const cart3_t posC
= ss
->p
[piC
].pos
;
1827 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss
->c
,
1828 Sblock
, // Sblock is S(piR->piC)
1829 bspecR
, bspecC
->n
, bspecC
, 1,
1830 a
->k
, posR
, posC
, J
));
1831 } else { // diagonal, fill with zeros; TODO does this make sense?
1832 // would unit matrix be better? or unit only for QPMS_BESSEL_REGULAR?
1833 for (size_t row
= 0; row
< bspecR
->n
; ++row
)
1834 for (size_t col
= 0; col
< bspecC
->n
; ++col
)
1835 Sblock
[row
* bspecC
->n
+ col
] = 0; //(row == col)? 1 : 0;
1838 // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
1839 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasConjTrans
,
1840 particle_fullsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeC
/*K*/,
1841 &one
/*alpha*/, Sblock
/*A*/, particle_fullsizeC
/*ldA*/,
1842 omC
+ opiC
*particle_fullsizeC
/*B*/,
1843 orbit_fullsizeC
/*ldB*/, &zero
/*beta*/,
1844 tmp
/*C*/, orbit_packedsizeC
/*LDC*/);
1846 // target[oiR|piR,oiC|piC] += U[...] tmp[...]
1847 SERIAL_ZGEMM(CblasRowMajor
, CblasNoTrans
, CblasNoTrans
,
1848 orbit_packedsizeR
/*M*/, orbit_packedsizeC
/*N*/, particle_fullsizeR
/*K*/,
1849 &one
/*alpha*/, omR
+ opiR
*particle_fullsizeR
/*A*/, orbit_fullsizeR
/*ldA*/,
1850 tmp
/*B*/, orbit_packedsizeC
/*ldB*/, &one
/*beta*/,
1851 a
->target_packed
+ packedlen
*packed_orbit_offsetR
+ packed_orbit_offsetC
/*C*/,
1866 // Almost the same as ...build_modeproblem_matrix_...parallelR
1867 // --> TODO write this in a more generic way to save LoC.
1868 complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
1869 /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
1870 complex double *target_packed
,
1871 const qpms_scatsys_t
*ss
,
1873 const complex double k
,
1877 qpms_ss_ensure_nonperiodic(ss
);
1879 const size_t packedlen
= ss
->saecv_sizes
[iri
];
1880 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1881 return target_packed
;
1882 if (target_packed
== NULL
)
1883 QPMS_CRASHING_MALLOC(target_packed
, SQ(packedlen
)*sizeof(complex double));
1884 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1886 qpms_ss_pi_t opistartR
= 0;
1887 pthread_mutex_t opistartR_mutex
;
1888 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1889 const struct qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread_arg
1890 arg
= {ss
, &opistartR
, &opistartR_mutex
, iri
, target_packed
, k
, J
};
1892 // FIXME THIS IS NOT PORTABLE:
1894 if (qpms_scatsystem_nthreads_override
> 0) {
1895 nthreads
= qpms_scatsystem_nthreads_override
;
1896 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1899 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1901 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1902 nthreads
, qpms_scatsystem_nthreads_default
);
1903 nthreads
= qpms_scatsystem_nthreads_default
;
1905 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1908 pthread_t thread_ids
[nthreads
];
1909 for(long thi
= 0; thi
< nthreads
; ++thi
)
1910 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1911 qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thread
,
1913 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1915 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1918 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1919 return target_packed
;
1923 // Parallel implementation, now default
1924 complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
1925 /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
1926 complex double *target_packed
,
1927 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
1930 qpms_ss_ensure_nonperiodic(ssw
->ss
);
1931 const size_t packedlen
= ssw
->ss
->saecv_sizes
[iri
];
1932 if (!packedlen
) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
1933 return target_packed
;
1934 if (target_packed
== NULL
)
1935 QPMS_CRASHING_MALLOC(target_packed
,SQ(packedlen
)*sizeof(complex double));
1936 memset(target_packed
, 0, SQ(packedlen
)*sizeof(complex double));
1938 qpms_ss_pi_t opistartR
= 0;
1939 pthread_mutex_t opistartR_mutex
;
1940 QPMS_ENSURE_SUCCESS(pthread_mutex_init(&opistartR_mutex
, NULL
));
1941 const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
1942 arg
= {ssw
, &opistartR
, &opistartR_mutex
, iri
, target_packed
};
1944 // FIXME THIS IS NOT PORTABLE:
1946 if (qpms_scatsystem_nthreads_override
> 0) {
1947 nthreads
= qpms_scatsystem_nthreads_override
;
1948 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "Using overriding value of %ld thread(s).",
1951 nthreads
= sysconf(_SC_NPROCESSORS_ONLN
);
1953 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NPROCESSORS_ONLN returned %ld, using %ld thread(s) instead.",
1954 nthreads
, qpms_scatsystem_nthreads_default
);
1955 nthreads
= qpms_scatsystem_nthreads_default
;
1957 QPMS_DEBUG(QPMS_DBGMSG_THREADS
, "_SC_NRPOCESSORS_ONLN returned %ld.", nthreads
);
1960 pthread_t thread_ids
[nthreads
];
1961 for(long thi
= 0; thi
< nthreads
; ++thi
)
1962 QPMS_ENSURE_SUCCESS(pthread_create(thread_ids
+ thi
, NULL
,
1963 qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread
,
1965 for(long thi
= 0; thi
< nthreads
; ++thi
) {
1967 QPMS_ENSURE_SUCCESS(pthread_join(thread_ids
[thi
], &retval
));
1970 QPMS_ENSURE_SUCCESS(pthread_mutex_destroy(&opistartR_mutex
));
1971 return target_packed
;
1975 complex double *qpms_scatsys_incident_field_vector_full(
1976 complex double *target_full
, const qpms_scatsys_t
*ss
,
1977 qpms_incfield_t f
, const void *args
, bool add
) {
1979 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
1980 sizeof(complex double));
1981 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
1982 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
1983 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
1984 const cart3_t pos
= ss
->p
[pi
].pos
;
1985 QPMS_ENSURE_SUCCESS(f(ptarget
, bspec
, pos
, args
, add
));
1992 complex double *qpms_scatsys_incident_field_vector_irrep_packed(
1993 complex double *target_full
, const qpms_scatsys_t
*ss
,
1994 const qpms_iri_t iri
, qpms_incfield_t f
,
1995 const void *args
, bool add
) {
2001 complex double *qpms_scatsysw_apply_Tmatrices_full(
2002 complex double *target_full
, const complex double *inc_full
,
2003 const qpms_scatsys_at_omega_t
*ssw
) {
2005 const qpms_scatsys_t
*ss
= ssw
->ss
;
2006 if (!target_full
) QPMS_CRASHING_CALLOC(target_full
, ss
->fecv_size
,
2007 sizeof(complex double));
2008 for(qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2009 complex double *ptarget
= target_full
+ ss
->fecv_pstarts
[pi
];
2010 const complex double *psrc
= inc_full
+ ss
->fecv_pstarts
[pi
];
2011 // TODO check whether T-matrix is non-virtual after virtual t-matrices are implemented.
2012 const qpms_tmatrix_t
*T
= ssw
->tm
[ss
->p
[pi
].tmatrix_id
];
2013 qpms_apply_tmatrix(ptarget
, psrc
, T
);
2018 ccart3_t
qpms_scatsys_scattered_E(const qpms_scatsys_t
*ss
,
2020 const complex double k
,
2021 const complex double *cvf
,
2025 ccart3_t res
= {0,0,0};
2026 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2028 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2029 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2030 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2031 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2033 const csph_t kr
= sph_cscale(k
, cart2sph(
2034 cart3_substract(where
, particle_pos
)));
2035 const csphvec_t E_sph
= qpms_eval_uvswf(bspec
, particle_cv
, kr
, btyp
);
2036 const ccart3_t E_cart
= csphvec2ccart_csph(E_sph
, kr
);
2037 ckahanadd(&(res
.x
), &(res_kc
.x
), E_cart
.x
);
2038 ckahanadd(&(res
.y
), &(res_kc
.y
), E_cart
.y
);
2039 ckahanadd(&(res
.z
), &(res_kc
.z
), E_cart
.z
);
2044 ccart3_t
qpms_scatsysw_scattered_E(const qpms_scatsys_at_omega_t
*ssw
,
2046 const complex double *cvf
, const cart3_t where
) {
2047 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_scattered_E()");
2048 return qpms_scatsys_scattered_E(ssw
->ss
, btyp
, ssw
->wavenumber
,
2053 #define DIPSPECN 3 // We have three basis vectors
2054 // Evaluates the regular electric dipole waves in the origin. The returned
2055 // value is not to be freed as in the usual case.
2056 static inline const qpms_vswf_set_spec_t
qpms_fill_regdipoles_0(
2057 ccart3_t regdipoles_0
[DIPSPECN
], qpms_normalisation_t normalisation
) {
2058 // bspec containing only electric dipoles
2059 const qpms_vswf_set_spec_t dipspec
= {
2061 .ilist
= (qpms_uvswfi_t
[]){
2062 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, -1, 1),
2063 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, 0, 1),
2064 qpms_tmn2uvswfi(QPMS_VSWF_ELECTRIC
, +1, 1),
2066 .lMax
=1, .lMax_M
=0, .lMax_N
=1, .lMax_L
=-1,
2068 .norm
= normalisation
,
2071 const sph_t origin_sph
= {.r
= 0, .theta
= M_PI_2
, .phi
=0}; // Should work with any theta/phi (TESTWORTHY)
2072 csphvec_t regdipoles_0_sph
[DIPSPECN
];
2073 QPMS_ENSURE_SUCCESS(qpms_uvswf_fill(regdipoles_0_sph
, &dipspec
,
2074 sph2csph(origin_sph
), QPMS_BESSEL_REGULAR
));
2075 for(int i
= 0; i
< DIPSPECN
; ++i
)
2076 regdipoles_0
[i
] = csphvec2ccart(regdipoles_0_sph
[i
], origin_sph
);
2082 // Alternative implementation, using translation operator and regular dipole waves at zero
2083 ccart3_t
qpms_scatsys_scattered_E__alt(const qpms_scatsys_t
*ss
,
2085 const complex double k
,
2086 const complex double *cvf
,
2090 qpms_ss_ensure_nonperiodic(ss
);
2091 ccart3_t res
= {0,0,0};
2092 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2094 ccart3_t regdipoles_0
[DIPSPECN
];
2095 const qpms_vswf_set_spec_t dipspec
= qpms_fill_regdipoles_0(regdipoles_0
, ss
->c
->normalisation
);
2097 complex double *s
; // Translation matrix
2098 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2100 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2101 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2102 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2103 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2105 const cart3_t origin_cart
= {0, 0, 0};
2107 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(
2108 ss
->c
, s
, &dipspec
, 1, bspec
, dipspec
.n
, k
, particle_pos
, where
, btyp
));
2110 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2111 for(size_t j
= 0; j
< dipspec
.n
; ++j
){
2112 ccart3_t summand
= ccart3_scale(particle_cv
[i
] * s
[dipspec
.n
*i
+j
], regdipoles_0
[j
]);
2113 ckahanadd(&(res
.x
), &(res_kc
.x
), summand
.x
);
2114 ckahanadd(&(res
.y
), &(res_kc
.y
), summand
.y
);
2115 ckahanadd(&(res
.z
), &(res_kc
.z
), summand
.z
);
2122 ccart3_t
qpms_scatsysw_scattered_E__alt(const qpms_scatsys_at_omega_t
*ssw
,
2123 qpms_bessel_t btyp
, const complex double *cvf
, const cart3_t where
) {
2124 return qpms_scatsys_scattered_E__alt(ssw
->ss
, btyp
, ssw
->wavenumber
,
2128 // For periodic lattices, we use directly the "alternative" implementation,
2129 // using translation operator and regular dipole waves at zero
2130 ccart3_t
qpms_scatsyswk_scattered_E(const qpms_scatsys_at_omega_k_t
*sswk
,
2132 const complex double *cvf
,
2136 if (btyp
!= QPMS_HANKEL_PLUS
)
2137 QPMS_NOT_IMPLEMENTED("Only scattered field with first kind Hankel functions currently implemented.");
2138 const qpms_scatsys_t
*ss
= sswk
->ssw
->ss
;
2139 if (ss
->lattice_dimension
!= 2)
2140 QPMS_NOT_IMPLEMENTED("Only 2D-periodic lattices implemented");
2141 ccart3_t res
= {0,0,0};
2142 ccart3_t res_kc
= {0,0,0}; // kahan sum compensation
2144 ccart3_t regdipoles_0
[DIPSPECN
];
2145 const qpms_vswf_set_spec_t dipspec
= qpms_fill_regdipoles_0(regdipoles_0
, ss
->c
->normalisation
);
2147 complex double *s
; // Translation matrix
2148 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2150 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2151 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2152 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2153 const complex double *particle_cv
= cvf
+ ss
->fecv_pstarts
[pi
];
2155 const cart3_t origin_cart
= {0, 0, 0};
2157 QPMS_ASSERT(sswk
->k
[2] == 0); // At least not implemented now
2158 QPMS_ASSERT(ss
->per
.lattice_basis
[0].z
== 0);
2159 QPMS_ASSERT(ss
->per
.lattice_basis
[1].z
== 0);
2161 // Same choices as in qpms_ss_ppair_W32xy; TODO make it more dynamic
2162 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
2163 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
2165 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_e32(
2167 &dipspec
, 1, bspec
, dipspec
.n
,
2168 sswk
->eta
, sswk
->ssw
->wavenumber
,
2169 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
2170 cart2_from_double_array(sswk
->k
), cart3_substract(where
, particle_pos
) /*CHECKSIGN*/,
2173 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2174 for(size_t j
= 0; j
< dipspec
.n
; ++j
){
2175 ccart3_t summand
= ccart3_scale(particle_cv
[i
] * s
[dipspec
.n
*i
+j
], regdipoles_0
[j
]);
2176 ckahanadd(&(res
.x
), &(res_kc
.x
), summand
.x
);
2177 ckahanadd(&(res
.y
), &(res_kc
.y
), summand
.y
);
2178 ckahanadd(&(res
.z
), &(res_kc
.z
), summand
.z
);
2185 qpms_errno_t
qpms_scatsyswk_scattered_field_basis(
2187 const qpms_scatsys_at_omega_k_t
*sswk
,
2188 const qpms_bessel_t btyp
,
2192 if (btyp
!= QPMS_HANKEL_PLUS
)
2193 QPMS_NOT_IMPLEMENTED("Only scattered field with first kind Hankel functions currently implemented.");
2194 const qpms_scatsys_t
*ss
= sswk
->ssw
->ss
;
2195 if (ss
->lattice_dimension
!= 2)
2196 QPMS_NOT_IMPLEMENTED("Only 2D-periodic lattices implemented");
2197 //ccart3_t res = {0,0,0};
2198 //ccart3_t res_kc = {0,0,0}; // kahan sum compensation
2200 ccart3_t regdipoles_0
[DIPSPECN
];
2201 const qpms_vswf_set_spec_t dipspec
= qpms_fill_regdipoles_0(regdipoles_0
, ss
->c
->normalisation
);
2203 complex double *s
; // Translation matrix
2204 QPMS_CRASHING_MALLOC(s
, ss
->max_bspecn
* sizeof(*s
) * dipspec
.n
);
2206 memset(target
, 0, ss
->fecv_size
* sizeof(*target
));
2208 for (qpms_ss_pi_t pi
= 0; pi
< ss
->p_count
; ++pi
) {
2209 const qpms_vswf_set_spec_t
*bspec
= qpms_ss_bspec_pi(ss
, pi
);
2210 const cart3_t particle_pos
= ss
->p
[pi
].pos
;
2211 //const complex double *particle_cv = cvf + ss->fecv_pstarts[pi];
2213 const cart3_t origin_cart
= {0, 0, 0};
2215 QPMS_ASSERT(sswk
->k
[2] == 0); // At least not implemented now
2216 QPMS_ASSERT(ss
->per
.lattice_basis
[0].z
== 0);
2217 QPMS_ASSERT(ss
->per
.lattice_basis
[1].z
== 0);
2219 // Same choices as in qpms_ss_ppair_W32xy; TODO make it more dynamic
2220 const double maxR
= sqrt(ss
->per
.unitcell_volume
) * 64;
2221 const double maxK
= 2048 * 2 * M_PI
/ maxR
;
2223 QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_e32(
2225 &dipspec
, 1, bspec
, dipspec
.n
,
2226 sswk
->eta
, sswk
->ssw
->wavenumber
,
2227 cart3xy2cart2(ss
->per
.lattice_basis
[0]), cart3xy2cart2(ss
->per
.lattice_basis
[1]),
2228 cart2_from_double_array(sswk
->k
), cart3_substract(where
, particle_pos
) /*CHECKSIGN*/,
2231 for(size_t i
= 0; i
< bspec
->n
; ++i
)
2232 for(size_t j
= 0; j
< dipspec
.n
; ++j
){
2233 target
[ss
->fecv_pstarts
[pi
] + i
] = ccart3_add(target
[ss
->fecv_pstarts
[pi
] + i
],
2234 ccart3_scale(s
[dipspec
.n
*i
+j
], regdipoles_0
[j
]));
2235 //ccart3_t summand = ccart3_scale(particle_cv[i] * s[dipspec.n*i+j], regdipoles_0[j]);
2236 //ckahanadd(&(res.x), &(res_kc.x), summand.x);
2237 //ckahanadd(&(res.y), &(res_kc.y), summand.y);
2238 //ckahanadd(&(res.z), &(res_kc.z), summand.z);
2242 return QPMS_SUCCESS
;
2246 ccart3_t
qpms_scatsys_scattered_E_irrep(const qpms_scatsys_t
*ss
,
2247 qpms_iri_t iri
, const complex double *cvr
, cart3_t where
) {
2252 void qpms_ss_LU_free(qpms_ss_LU lu
) {
2257 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_full_factorise(complex double *mpmatrix_full
,
2258 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
, const qpms_scatsys_at_omega_k_t
*sswk
) {
2260 QPMS_ASSERT(sswk
->ssw
== ssw
|| !ssw
);
2262 QPMS_ASSERT(ssw
->ss
->lattice_dimension
> 0);
2264 QPMS_ASSERT(ssw
->ss
->lattice_dimension
== 0);
2266 const qpms_scatsys_t
*ss
= ssw
->ss
;
2267 QPMS_ENSURE(mpmatrix_full
, "A non-NULL pointer to the pre-calculated mode matrix is required");
2268 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, ss
->fecv_size
* sizeof(int));
2269 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, ss
->fecv_size
, ss
->fecv_size
,
2270 mpmatrix_full
, ss
->fecv_size
, target_piv
));
2272 lu
.a
= mpmatrix_full
;
2273 lu
.ipiv
= target_piv
;
2281 qpms_ss_LU
qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(complex double *mpmatrix_packed
,
2282 int *target_piv
, const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
) {
2283 QPMS_ENSURE(mpmatrix_packed
, "A non-NULL pointer to the pre-calculated mode matrix is required");
2284 qpms_ss_ensure_nonperiodic(ssw
->ss
);
2285 size_t n
= ssw
->ss
->saecv_sizes
[iri
];
2286 if (!target_piv
) QPMS_CRASHING_MALLOC(target_piv
, n
* sizeof(int));
2287 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR
, n
, n
,
2288 mpmatrix_packed
, n
, target_piv
));
2290 lu
.a
= mpmatrix_packed
;
2291 lu
.ipiv
= target_piv
;
2298 qpms_ss_LU
qpms_scatsysw_build_modeproblem_matrix_full_LU(
2299 complex double *target
, int *target_piv
,
2300 const qpms_scatsys_at_omega_t
*ssw
){
2301 qpms_ss_ensure_nonperiodic_a(ssw
->ss
, "qpms_scatsyswk_build_modeproblem_matrix_full_LU()");
2302 target
= qpms_scatsysw_build_modeproblem_matrix_full(target
, ssw
);
2303 return qpms_scatsysw_modeproblem_matrix_full_factorise(target
, target_piv
, ssw
, NULL
);
2306 qpms_ss_LU
qpms_scatsyswk_build_modeproblem_matrix_full_LU(
2307 complex double *target
, int *target_piv
,
2308 const qpms_scatsys_at_omega_k_t
*sswk
){
2309 target
= qpms_scatsyswk_build_modeproblem_matrix_full(target
, sswk
);
2310 return qpms_scatsysw_modeproblem_matrix_full_factorise(target
, target_piv
, sswk
->ssw
, sswk
);
2313 qpms_ss_LU
qpms_scatsysw_build_modeproblem_matrix_irrep_packed_LU(
2314 complex double *target
, int *target_piv
,
2315 const qpms_scatsys_at_omega_t
*ssw
, qpms_iri_t iri
){
2316 target
= qpms_scatsysw_build_modeproblem_matrix_irrep_packed(target
, ssw
, iri
);
2317 return qpms_scatsysw_modeproblem_matrix_irrep_packed_factorise(target
, target_piv
, ssw
, iri
);
2320 complex double *qpms_scatsys_scatter_solve(
2321 complex double *f
, const complex double *a_inc
, qpms_ss_LU lu
) {
2322 const size_t n
= lu
.full
? lu
.ssw
->ss
->fecv_size
: lu
.ssw
->ss
->saecv_sizes
[lu
.iri
];
2323 if (!f
) QPMS_CRASHING_MALLOC(f
, n
* sizeof(complex double));
2324 memcpy(f
, a_inc
, n
*sizeof(complex double)); // It will be rewritten by zgetrs
2325 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR
, 'N' /*trans*/, n
/*n*/, 1 /*nrhs number of right hand sides*/,
2326 lu
.a
/*a*/, n
/*lda*/, lu
.ipiv
/*ipiv*/, f
/*b*/, 1 /*ldb; CHECKME*/));
2330 struct qpms_scatsys_finite_eval_Beyn_ImTS_param
{
2331 const qpms_scatsys_t
*ss
;
2335 /// Wrapper for Beyn algorithm (non-periodic system)
2336 static int qpms_scatsys_finite_eval_Beyn_ImTS(complex double *target
,
2337 size_t m
, complex double omega
, void *params
) {
2338 const struct qpms_scatsys_finite_eval_Beyn_ImTS_param
*p
= params
;
2339 qpms_scatsys_at_omega_t
*ssw
= qpms_scatsys_at_omega(p
->ss
, omega
);
2340 QPMS_ENSURE(ssw
!= NULL
, "qpms_scatsys_at_omega() returned NULL");
2341 if (p
->iri
== QPMS_NO_IRREP
) {
2342 QPMS_ASSERT(m
== p
->ss
->fecv_size
);
2343 QPMS_ENSURE(NULL
!= qpms_scatsysw_build_modeproblem_matrix_full(
2345 "qpms_scatsysw_build_modeproblem_matrix_full() returned NULL");
2347 QPMS_ASSERT(m
== p
->ss
->saecv_sizes
[p
->iri
]);
2348 QPMS_ENSURE(NULL
!= qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
2349 target
, ssw
, p
->iri
),
2350 "qpms_scatsysw_build_modeproblem_matrix_irrep_packed() returned NULL");
2352 qpms_scatsys_at_omega_free(ssw
);
2353 return QPMS_SUCCESS
;
2356 beyn_result_t
*qpms_scatsys_finite_find_eigenmodes(
2357 const qpms_scatsys_t
* const ss
, const qpms_iri_t iri
,
2358 complex double omega_centre
, double omega_rr
, double omega_ri
,
2359 size_t contour_npoints
,
2360 double rank_tol
, size_t rank_sel_min
, double res_tol
) {
2361 qpms_ss_ensure_nonperiodic_a(ss
, "qpms_scatsys_periodic_find_eigenmodes()");
2362 size_t n
; // matrix dimension
2363 if (qpms_iri_is_valid(ss
->sym
, iri
)) {
2364 n
= ss
->saecv_sizes
[iri
];
2365 } else if (iri
== QPMS_NO_IRREP
) {
2369 beyn_contour_t
*contour
= beyn_contour_ellipse(omega_centre
,
2370 omega_rr
, omega_ri
, contour_npoints
);
2372 struct qpms_scatsys_finite_eval_Beyn_ImTS_param p
= {ss
, iri
};
2373 beyn_result_t
*result
= beyn_solve(n
, n
/* possibly make smaller? */,
2374 qpms_scatsys_finite_eval_Beyn_ImTS
, NULL
, (void *) &p
,
2375 contour
, rank_tol
, rank_sel_min
, res_tol
);
2376 QPMS_ENSURE(result
!= NULL
, "beyn_solve() returned NULL");
2382 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param
{
2383 const qpms_scatsys_t
*ss
;
2384 const double *k
; ///< Wavevector in cartesian coordinates.
2387 /// Wrapper for Beyn algorithm (periodic system)
2388 static int qpms_scatsys_periodic_eval_Beyn_ImTW(complex double *target
,
2389 size_t m
, complex double omega
, void *params
){
2390 const struct qpms_scatsys_periodic_eval_Beyn_ImTW_param
*p
= params
;
2391 qpms_scatsys_at_omega_t
*ssw
= qpms_scatsys_at_omega(p
->ss
, omega
);
2392 QPMS_ENSURE(ssw
!= NULL
, "qpms_scatsys_at_omega() returned NULL");
2393 qpms_scatsys_at_omega_k_t sswk
= {
2395 .k
= {p
->k
[0], p
->k
[1], p
->k
[2]},
2396 .eta
= qpms_ss_adjusted_eta(p
->ss
, ssw
->wavenumber
, p
->k
)
2398 QPMS_ASSERT(m
== p
->ss
->fecv_size
);
2400 qpms_scatsyswk_build_modeproblem_matrix_full(target
, &sswk
),
2401 "qpms_scatsyswk_build_modeproblem_matrix_full() returned NULL");
2402 qpms_scatsys_at_omega_free(ssw
);
2403 return QPMS_SUCCESS
;
2406 beyn_result_t
*qpms_scatsys_periodic_find_eigenmodes(
2407 const qpms_scatsys_t
* const ss
, const double k
[3],
2408 complex double omega_centre
, double omega_rr
, double omega_ri
,
2409 size_t contour_npoints
,
2410 double rank_tol
, size_t rank_sel_min
, double res_tol
) {
2411 qpms_ss_ensure_periodic_a(ss
, "qpms_scatsys_finite_find_eigenmodes()");
2412 size_t n
= ss
->fecv_size
; // matrix dimension
2414 beyn_contour_t
*contour
= beyn_contour_ellipse(omega_centre
,
2415 omega_rr
, omega_ri
, contour_npoints
);
2417 struct qpms_scatsys_periodic_eval_Beyn_ImTW_param p
= {ss
, k
};
2418 beyn_result_t
*result
= beyn_solve(n
, n
,
2419 qpms_scatsys_periodic_eval_Beyn_ImTW
, NULL
, (void *) &p
,
2420 contour
, rank_tol
, rank_sel_min
, res_tol
);
2421 QPMS_ENSURE(result
!= NULL
, "beyn_solve() returned NULL");