2 * \brief Naïve implementation of BLAS functions to workaround OpenBLAS bugs.
4 * There is a bug in certain versions of OpenBLAS causing crashes if many
5 * BLAS functions are run from different threads in parallel.
6 * Typically, this happens when one calculates some large matrix (such as the
7 * global translation matrix) in several threads and cblas_zgemm()
8 * is called on relatively small submatrices.
9 * Because the submatrices are small, we can use a naïve,
10 * serial implementation of cblas_zgemm() as a workaround without a substantial
11 * impact on performance.
13 * If included together with <cblas.h>, this must be include _afterwards_ because of the typedefs!
17 #define QPMS_BLAS_INDEX_T long long int
20 typedef enum {CblasRowMajor
=101, CblasColMajor
=102} CBLAS_LAYOUT
;
21 typedef enum {CblasNoTrans
=111, CblasTrans
=112, CblasConjTrans
=113} CBLAS_TRANSPOSE
;
22 typedef enum {CblasUpper
=121, CblasLower
=122} CBLAS_UPLO
;
23 typedef enum {CblasNonUnit
=131, CblasUnit
=132} CBLAS_DIAG
;
24 typedef enum {CblasLeft
=141, CblasRight
=142} CBLAS_SIDE
;
27 /// Naïve serial reimplementation of cblas_zgemm.
28 void qpms_zgemm(CBLAS_LAYOUT Order
, CBLAS_TRANSPOSE TransA
, CBLAS_TRANSPOSE TransB
,
29 const QPMS_BLAS_INDEX_T M
, const QPMS_BLAS_INDEX_T N
, const QPMS_BLAS_INDEX_T K
,
30 const _Complex
double *alpha
, const _Complex
double *A
, const QPMS_BLAS_INDEX_T lda
,
31 const _Complex
double *B
, const QPMS_BLAS_INDEX_T ldb
,
32 const _Complex
double *beta
, _Complex
double *C
, const QPMS_BLAS_INDEX_T ldc
);