2 #include "qpms_types.h"
3 #include "qpms_specfunc.h"
5 #include "translations.h"
6 #include "indexing.h" // TODO replace size_t and int with own index types here
8 #include <gsl/gsl_sf_legendre.h>
9 #include <gsl/gsl_sf_bessel.h>
10 #include "tiny_inlines.h"
11 #include "assert_cython_workaround.h"
13 #include <gsl/gsl_sf_coupling.h>
14 #include "qpms_error.h"
15 #include "normalisation.h"
17 //#ifdef QPMS_COMPILE_PYTHON_EXTENSIONS
24 int qpms_cython_trans_calculator_get_AB_arrays_loop(
25 const qpms_trans_calculator
*c
, const qpms_bessel_t J
, const int resnd
,
26 const int daxis
, const int saxis
,
27 char *A_data
, const npy_intp
*A_shape
, const npy_intp
*A_strides
,
28 char *B_data
, const npy_intp
*B_shape
, const npy_intp
*B_strides
,
29 const char *r_data
, const npy_intp
*r_shape
, const npy_intp
*r_strides
,
30 const char *theta_data
, const npy_intp
*theta_shape
, const npy_intp
*theta_strides
,
31 const char *phi_data
, const npy_intp
*phi_shape
, const npy_intp
*phi_strides
,
32 const char *r_ge_d_data
, const npy_intp
*r_ge_d_shape
, const npy_intp
*r_ge_d_strides
){
33 assert(daxis
!= saxis
);
37 const npy_intp
*resultshape
= A_shape
, *resultstrides
= A_strides
;
38 // TODO put some restrict's everywhere?
39 for (int ax
= 0; ax
< resnd
; ++ax
){
40 assert(A_shape
[ax
] == B_shape
[ax
]);
41 assert(A_strides
[ax
] == B_strides
[ax
]);
42 if (daxis
== ax
|| saxis
== ax
) continue;
43 if (A_shape
[ax
] > longestshape
) {
48 const npy_intp longlen
= resultshape
[longest_axis
];
50 npy_intp innerloop_shape
[resnd
];
51 for (int ax
= 0; ax
< resnd
; ++ax
) {
52 innerloop_shape
[ax
] = resultshape
[ax
];
54 /* longest axis will be iterated in the outer (parallelized) loop.
55 * Therefore, longest axis, together with saxis and daxis,
56 * will not be iterated in the inner loop:
58 innerloop_shape
[longest_axis
] = 1;
59 innerloop_shape
[daxis
] = 1;
60 innerloop_shape
[saxis
] = 1;
62 // these are the 'strides' passed to the qpms_trans_calculator_get_AB_arrays_ext
63 // function, which expects 'const double *' strides, not 'char *' ones.
64 const npy_intp dstride
= resultstrides
[daxis
] / sizeof(complex double);
65 const npy_intp sstride
= resultstrides
[saxis
] / sizeof(complex double);
68 // TODO here start parallelisation
69 //#pragma omp parallel
71 npy_intp local_indices
[resnd
];
72 memset(local_indices
, 0, sizeof(local_indices
));
76 for(longi
= 0; longi
< longlen
; ++longi
) {
77 // this might be done also in the inverse order, but this is more
78 // 'c-contiguous' way of incrementing the indices
81 /* calculate the correct index/pointer for each array used.
82 * This can be further optimized from O(resnd * total size of
83 * the result array) to O(total size of the result array), but
86 const char *r_p
= r_data
+ r_strides
[longest_axis
] * longi
;
87 const char *theta_p
= theta_data
+ theta_strides
[longest_axis
] * longi
;
88 const char *phi_p
= phi_data
+ phi_strides
[longest_axis
] * longi
;
89 const char *r_ge_d_p
= r_ge_d_data
+ r_ge_d_strides
[longest_axis
] * longi
;
90 char *A_p
= A_data
+ A_strides
[longest_axis
] * longi
;
91 char *B_p
= B_data
+ B_strides
[longest_axis
] * longi
;
92 for(int i
= 0; i
< resnd
; ++i
) {
93 // following two lines are probably not needed, as innerloop_shape is there 1 anyway
94 // so if i == daxis, saxis, or longest_axis, local_indices[i] is zero.
95 if (i
== longest_axis
) continue;
96 if (daxis
== i
|| saxis
== i
) continue;
97 r_p
+= r_strides
[i
] * local_indices
[i
];
98 theta_p
+= theta_strides
[i
] * local_indices
[i
];
99 phi_p
+= phi_strides
[i
] * local_indices
[i
];
100 A_p
+= A_strides
[i
] * local_indices
[i
];
101 B_p
+= B_strides
[i
] * local_indices
[i
];
104 // perform the actual task here
105 errval_local
|= qpms_trans_calculator_get_AB_arrays_ext(c
, (complex double *)A_p
,
106 (complex double *)B_p
,
108 // FIXME change all the _ext function types to npy_... so that
109 // these casts are not needed
110 *((double *) r_p
), *((double *) theta_p
), *((double *)phi_p
),
111 (int)(*((npy_bool
*) r_ge_d_p
)), J
);
112 QPMS_ENSURE_SUCCESS(errval_local
);
114 // increment the last index 'digit' (ax is now resnd-1; we don't have do-while loop in python)
116 while(local_indices
[ax
] == innerloop_shape
[ax
] && ax
>= 0) {
117 // overflow to the next digit but stop when reached below the last one
118 local_indices
[ax
] = 0;
119 //local_indices[--ax]++; // dekrementace indexu pod nulu a následná inkrementace poruší paměť FIXME
121 if (ax
>= 0) local_indices
[ax
]++;
123 if (ax
>= 0) // did not overflow, get back to the lowest index
127 errval
|= errval_local
;
129 // FIXME when parallelizing
130 // TODO Here end parallelisation
135 //#endif // QPMS_COMPILE_PYTHON_EXTENSIONS