1 // This file is part of Eigen, a lightweight C++ template library
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14 // first thing Eigen does: stop the compiler from committing suicide
15 #include "src/Core/util/DisableStupidWarnings.h"
17 // Handle NVCC/CUDA/SYCL
18 #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
19 // Do not try asserts on CUDA and SYCL!
20 #ifndef EIGEN_NO_DEBUG
21 #define EIGEN_NO_DEBUG
24 #ifdef EIGEN_INTERNAL_DEBUGGING
25 #undef EIGEN_INTERNAL_DEBUGGING
28 #ifdef EIGEN_EXCEPTIONS
29 #undef EIGEN_EXCEPTIONS
32 // All functions callable from CUDA code must be qualified with __device__
34 // Do not try to vectorize on CUDA and SYCL!
35 #ifndef EIGEN_DONT_VECTORIZE
36 #define EIGEN_DONT_VECTORIZE
39 #define EIGEN_DEVICE_FUNC __host__ __device__
40 // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
41 // works properly on the device side
42 #include <math_functions.hpp>
44 #define EIGEN_DEVICE_FUNC
48 #define EIGEN_DEVICE_FUNC
52 // When compiling CUDA device code with NVCC, pull in math functions from the
53 // global namespace. In host mode, and when device doee with clang, use the
55 #if defined(__CUDA_ARCH__) && defined(__NVCC__)
56 #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
58 #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
61 #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
62 #define EIGEN_EXCEPTIONS
65 #ifdef EIGEN_EXCEPTIONS
69 // then include this file where all our macros are defined. It's really important to do it first because
70 // it's where we do all the alignment settings (platform detection and honoring the user's will if he
71 // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
72 #include "src/Core/util/Macros.h"
74 // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
75 // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
76 #if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
77 #pragma GCC optimize ("-fno-ipa-cp-clone")
82 // this include file manages BLAS and MKL related macros
83 // and inclusion of their respective header files
84 #include "src/Core/util/MKL_support.h"
86 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
87 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
88 #if EIGEN_MAX_ALIGN_BYTES==0
89 #ifndef EIGEN_DONT_VECTORIZE
90 #define EIGEN_DONT_VECTORIZE
95 #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
96 #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
97 // Remember that usage of defined() in a #define is undefined by the standard.
98 // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
99 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
100 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
104 // Remember that usage of defined() in a #define is undefined by the standard
105 #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
106 #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
110 #ifndef EIGEN_DONT_VECTORIZE
112 #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
114 // Defines symbols for compile-time detection of which instructions are
116 // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
117 #define EIGEN_VECTORIZE
118 #define EIGEN_VECTORIZE_SSE
119 #define EIGEN_VECTORIZE_SSE2
121 // Detect sse3/ssse3/sse4:
122 // gcc and icc defines __SSE3__, ...
123 // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
124 // want to force the use of those instructions with msvc.
126 #define EIGEN_VECTORIZE_SSE3
129 #define EIGEN_VECTORIZE_SSSE3
132 #define EIGEN_VECTORIZE_SSE4_1
135 #define EIGEN_VECTORIZE_SSE4_2
138 #define EIGEN_VECTORIZE_AVX
139 #define EIGEN_VECTORIZE_SSE3
140 #define EIGEN_VECTORIZE_SSSE3
141 #define EIGEN_VECTORIZE_SSE4_1
142 #define EIGEN_VECTORIZE_SSE4_2
145 #define EIGEN_VECTORIZE_AVX2
148 #define EIGEN_VECTORIZE_FMA
150 #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
151 #define EIGEN_VECTORIZE_AVX512
152 #define EIGEN_VECTORIZE_AVX2
153 #define EIGEN_VECTORIZE_AVX
154 #define EIGEN_VECTORIZE_FMA
156 #define EIGEN_VECTORIZE_AVX512DQ
162 // This extern "C" works around a MINGW-w64 compilation issue
163 // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
164 // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
165 // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
166 // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
167 // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
168 // notice that since these are C headers, the extern "C" is theoretically needed anyways.
170 // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
171 // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
172 #if EIGEN_COMP_ICC >= 1110
173 #include <immintrin.h>
175 #include <mmintrin.h>
176 #include <emmintrin.h>
177 #include <xmmintrin.h>
178 #ifdef EIGEN_VECTORIZE_SSE3
179 #include <pmmintrin.h>
181 #ifdef EIGEN_VECTORIZE_SSSE3
182 #include <tmmintrin.h>
184 #ifdef EIGEN_VECTORIZE_SSE4_1
185 #include <smmintrin.h>
187 #ifdef EIGEN_VECTORIZE_SSE4_2
188 #include <nmmintrin.h>
190 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
191 #include <immintrin.h>
195 #elif defined __VSX__
196 #define EIGEN_VECTORIZE
197 #define EIGEN_VECTORIZE_VSX
199 // We need to #undef all these ugly tokens defined in <altivec.h>
200 // => use __vector instead of vector
204 #elif defined __ALTIVEC__
205 #define EIGEN_VECTORIZE
206 #define EIGEN_VECTORIZE_ALTIVEC
208 // We need to #undef all these ugly tokens defined in <altivec.h>
209 // => use __vector instead of vector
213 #elif (defined __ARM_NEON) || (defined __ARM_NEON__)
214 #define EIGEN_VECTORIZE
215 #define EIGEN_VECTORIZE_NEON
216 #include <arm_neon.h>
217 #elif (defined __s390x__ && defined __VEC__)
218 #define EIGEN_VECTORIZE
219 #define EIGEN_VECTORIZE_ZVECTOR
220 #include <vecintrin.h>
224 #if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
225 // We can use the optimized fp16 to float and float to fp16 conversion routines
226 #define EIGEN_HAS_FP16_C
229 #if defined __CUDACC__
230 #define EIGEN_VECTORIZE_CUDA
231 #include <vector_types.h>
232 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
233 #define EIGEN_HAS_CUDA_FP16
237 #if defined EIGEN_HAS_CUDA_FP16
238 #include <host_defines.h>
239 #include <cuda_fp16.h>
242 #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
243 #define EIGEN_HAS_OPENMP
246 #ifdef EIGEN_HAS_OPENMP
250 // MSVC for windows mobile does not have the errno.h file
251 #if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
252 #define EIGEN_HAS_ERRNO
255 #ifdef EIGEN_HAS_ERRNO
262 #include <functional>
267 #include <climits> // for CHAR_BIT
271 // for std::is_nothrow_move_assignable
272 #ifdef EIGEN_INCLUDE_TYPE_TRAITS
273 #include <type_traits>
276 // for outputting debug info
277 #ifdef EIGEN_DEBUG_ASSIGN
281 // required for __cpuid, needs to be included after cmath
282 #if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
286 /** \brief Namespace containing all symbols from the %Eigen library. */
289 inline static const char *SimdInstructionSetsInUse(void) {
290 #if defined(EIGEN_VECTORIZE_AVX512)
291 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
292 #elif defined(EIGEN_VECTORIZE_AVX)
293 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
294 #elif defined(EIGEN_VECTORIZE_SSE4_2)
295 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
296 #elif defined(EIGEN_VECTORIZE_SSE4_1)
297 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
298 #elif defined(EIGEN_VECTORIZE_SSSE3)
299 return "SSE, SSE2, SSE3, SSSE3";
300 #elif defined(EIGEN_VECTORIZE_SSE3)
301 return "SSE, SSE2, SSE3";
302 #elif defined(EIGEN_VECTORIZE_SSE2)
304 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
306 #elif defined(EIGEN_VECTORIZE_VSX)
308 #elif defined(EIGEN_VECTORIZE_NEON)
310 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
311 return "S390X ZVECTOR";
317 } // end namespace Eigen
319 #if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
320 // This will generate an error message:
321 #error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
326 // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
327 // ensure QNX/QCC support
329 // gcc 4.6.0 wants std:: for ptrdiff_t
330 using std::ptrdiff_t;
334 /** \defgroup Core_Module Core module
335 * This is the main module of Eigen providing dense matrix and vector support
336 * (both fixed and dynamic size) with all the features corresponding to a BLAS library
340 * #include <Eigen/Core>
344 #include "src/Core/util/Constants.h"
345 #include "src/Core/util/Meta.h"
346 #include "src/Core/util/ForwardDeclarations.h"
347 #include "src/Core/util/StaticAssert.h"
348 #include "src/Core/util/XprHelper.h"
349 #include "src/Core/util/Memory.h"
351 #include "src/Core/NumTraits.h"
352 #include "src/Core/MathFunctions.h"
353 #include "src/Core/GenericPacketMath.h"
354 #include "src/Core/MathFunctionsImpl.h"
356 #if defined EIGEN_VECTORIZE_AVX512
357 #include "src/Core/arch/SSE/PacketMath.h"
358 #include "src/Core/arch/AVX/PacketMath.h"
359 #include "src/Core/arch/AVX512/PacketMath.h"
360 #include "src/Core/arch/AVX512/MathFunctions.h"
361 #elif defined EIGEN_VECTORIZE_AVX
362 // Use AVX for floats and doubles, SSE for integers
363 #include "src/Core/arch/SSE/PacketMath.h"
364 #include "src/Core/arch/SSE/Complex.h"
365 #include "src/Core/arch/SSE/MathFunctions.h"
366 #include "src/Core/arch/AVX/PacketMath.h"
367 #include "src/Core/arch/AVX/MathFunctions.h"
368 #include "src/Core/arch/AVX/Complex.h"
369 #include "src/Core/arch/AVX/TypeCasting.h"
370 #elif defined EIGEN_VECTORIZE_SSE
371 #include "src/Core/arch/SSE/PacketMath.h"
372 #include "src/Core/arch/SSE/MathFunctions.h"
373 #include "src/Core/arch/SSE/Complex.h"
374 #include "src/Core/arch/SSE/TypeCasting.h"
375 #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
376 #include "src/Core/arch/AltiVec/PacketMath.h"
377 #include "src/Core/arch/AltiVec/MathFunctions.h"
378 #include "src/Core/arch/AltiVec/Complex.h"
379 #elif defined EIGEN_VECTORIZE_NEON
380 #include "src/Core/arch/NEON/PacketMath.h"
381 #include "src/Core/arch/NEON/MathFunctions.h"
382 #include "src/Core/arch/NEON/Complex.h"
383 #elif defined EIGEN_VECTORIZE_ZVECTOR
384 #include "src/Core/arch/ZVector/PacketMath.h"
385 #include "src/Core/arch/ZVector/MathFunctions.h"
386 #include "src/Core/arch/ZVector/Complex.h"
389 // Half float support
390 #include "src/Core/arch/CUDA/Half.h"
391 #include "src/Core/arch/CUDA/PacketMathHalf.h"
392 #include "src/Core/arch/CUDA/TypeCasting.h"
394 #if defined EIGEN_VECTORIZE_CUDA
395 #include "src/Core/arch/CUDA/PacketMath.h"
396 #include "src/Core/arch/CUDA/MathFunctions.h"
399 #include "src/Core/arch/Default/Settings.h"
401 #include "src/Core/functors/TernaryFunctors.h"
402 #include "src/Core/functors/BinaryFunctors.h"
403 #include "src/Core/functors/UnaryFunctors.h"
404 #include "src/Core/functors/NullaryFunctors.h"
405 #include "src/Core/functors/StlFunctors.h"
406 #include "src/Core/functors/AssignmentFunctors.h"
408 // Specialized functors to enable the processing of complex numbers
410 #include "src/Core/arch/CUDA/Complex.h"
412 #include "src/Core/IO.h"
413 #include "src/Core/DenseCoeffsBase.h"
414 #include "src/Core/DenseBase.h"
415 #include "src/Core/MatrixBase.h"
416 #include "src/Core/EigenBase.h"
418 #include "src/Core/Product.h"
419 #include "src/Core/CoreEvaluators.h"
420 #include "src/Core/AssignEvaluator.h"
422 #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
423 // at least confirmed with Doxygen 1.5.5 and 1.5.6
424 #include "src/Core/Assign.h"
427 #include "src/Core/ArrayBase.h"
428 #include "src/Core/util/BlasUtil.h"
429 #include "src/Core/DenseStorage.h"
430 #include "src/Core/NestByValue.h"
432 // #include "src/Core/ForceAlignedAccess.h"
434 #include "src/Core/ReturnByValue.h"
435 #include "src/Core/NoAlias.h"
436 #include "src/Core/PlainObjectBase.h"
437 #include "src/Core/Matrix.h"
438 #include "src/Core/Array.h"
439 #include "src/Core/CwiseTernaryOp.h"
440 #include "src/Core/CwiseBinaryOp.h"
441 #include "src/Core/CwiseUnaryOp.h"
442 #include "src/Core/CwiseNullaryOp.h"
443 #include "src/Core/CwiseUnaryView.h"
444 #include "src/Core/SelfCwiseBinaryOp.h"
445 #include "src/Core/Dot.h"
446 #include "src/Core/StableNorm.h"
447 #include "src/Core/Stride.h"
448 #include "src/Core/MapBase.h"
449 #include "src/Core/Map.h"
450 #include "src/Core/Ref.h"
451 #include "src/Core/Block.h"
452 #include "src/Core/VectorBlock.h"
453 #include "src/Core/Transpose.h"
454 #include "src/Core/DiagonalMatrix.h"
455 #include "src/Core/Diagonal.h"
456 #include "src/Core/DiagonalProduct.h"
457 #include "src/Core/Redux.h"
458 #include "src/Core/Visitor.h"
459 #include "src/Core/Fuzzy.h"
460 #include "src/Core/Swap.h"
461 #include "src/Core/CommaInitializer.h"
462 #include "src/Core/GeneralProduct.h"
463 #include "src/Core/Solve.h"
464 #include "src/Core/Inverse.h"
465 #include "src/Core/SolverBase.h"
466 #include "src/Core/PermutationMatrix.h"
467 #include "src/Core/Transpositions.h"
468 #include "src/Core/TriangularMatrix.h"
469 #include "src/Core/SelfAdjointView.h"
470 #include "src/Core/products/GeneralBlockPanelKernel.h"
471 #include "src/Core/products/Parallelizer.h"
472 #include "src/Core/ProductEvaluators.h"
473 #include "src/Core/products/GeneralMatrixVector.h"
474 #include "src/Core/products/GeneralMatrixMatrix.h"
475 #include "src/Core/SolveTriangular.h"
476 #include "src/Core/products/GeneralMatrixMatrixTriangular.h"
477 #include "src/Core/products/SelfadjointMatrixVector.h"
478 #include "src/Core/products/SelfadjointMatrixMatrix.h"
479 #include "src/Core/products/SelfadjointProduct.h"
480 #include "src/Core/products/SelfadjointRank2Update.h"
481 #include "src/Core/products/TriangularMatrixVector.h"
482 #include "src/Core/products/TriangularMatrixMatrix.h"
483 #include "src/Core/products/TriangularSolverMatrix.h"
484 #include "src/Core/products/TriangularSolverVector.h"
485 #include "src/Core/BandMatrix.h"
486 #include "src/Core/CoreIterators.h"
487 #include "src/Core/ConditionEstimator.h"
489 #include "src/Core/BooleanRedux.h"
490 #include "src/Core/Select.h"
491 #include "src/Core/VectorwiseOp.h"
492 #include "src/Core/Random.h"
493 #include "src/Core/Replicate.h"
494 #include "src/Core/Reverse.h"
495 #include "src/Core/ArrayWrapper.h"
497 #ifdef EIGEN_USE_BLAS
498 #include "src/Core/products/GeneralMatrixMatrix_BLAS.h"
499 #include "src/Core/products/GeneralMatrixVector_BLAS.h"
500 #include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h"
501 #include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h"
502 #include "src/Core/products/SelfadjointMatrixVector_BLAS.h"
503 #include "src/Core/products/TriangularMatrixMatrix_BLAS.h"
504 #include "src/Core/products/TriangularMatrixVector_BLAS.h"
505 #include "src/Core/products/TriangularSolverMatrix_BLAS.h"
506 #endif // EIGEN_USE_BLAS
508 #ifdef EIGEN_USE_MKL_VML
509 #include "src/Core/Assign_MKL.h"
512 #include "src/Core/GlobalFunctions.h"
514 #include "src/Core/util/ReenableStupidWarnings.h"
516 #endif // EIGEN_CORE_H