1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
24 * LDLT solving related code of ThreadedEquationSolverLDLT
25 * Copyright (c) 2017-2022 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
28 #include <ode/common.h>
29 #include <ode/matrix.h>
30 #include <ode/matrix_coop.h>
32 #include "threaded_solver_ldlt.h"
33 #include "threading_base.h"
34 #include "resource_control.h"
36 #include "fastldltsolve_impl.h"
40 void ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(
41 dxResourceRequirementDescriptor
*summaryRequirementsDescriptor
,
42 unsigned allowedThreadCount
, unsigned rowCount
)
44 unsigned stageBlockCountSifficiencyMask
;
45 dxThreadingBase
*threading
= summaryRequirementsDescriptor
->getrelatedThreading();
46 unsigned limitedThreadCount
= restrictSolvingLDLTAllowedThreadCount(threading
, allowedThreadCount
, rowCount
, stageBlockCountSifficiencyMask
);
48 if (limitedThreadCount
> 1)
50 if ((stageBlockCountSifficiencyMask
& (1U << SLDLTS_SOLVING_STRAIGHT
)) != 0)
52 doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(summaryRequirementsDescriptor
, allowedThreadCount
, rowCount
);
55 if ((stageBlockCountSifficiencyMask
& (1U << SLDLTS_SCALING_VECTOR
)) != 0)
57 doEstimateCooperativeScalingVectorResourceRequirementsValidated(summaryRequirementsDescriptor
, allowedThreadCount
, rowCount
);
60 if ((stageBlockCountSifficiencyMask
& (1U << SLDLTS_SOLVING_TRANSPOSED
)) == 0)
62 doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(summaryRequirementsDescriptor
, allowedThreadCount
, rowCount
);
68 void ThreadedEquationSolverLDLT::cooperativelySolveLDLT(
69 dxRequiredResourceContainer
*resourceContainer
, unsigned allowedThreadCount
,
70 const dReal
*L
, const dReal
*d
, dReal
*b
, unsigned rowCount
, unsigned rowSkip
)
72 dAASSERT(rowCount
!= 0);
74 unsigned stageBlockCountSifficiencyMask
;
76 dxThreadingBase
*threading
= resourceContainer
->getThreadingInstance();
77 unsigned limitedThreadCount
= restrictSolvingLDLTAllowedThreadCount(threading
, allowedThreadCount
, rowCount
, stageBlockCountSifficiencyMask
);
79 if (limitedThreadCount
<= 1)
81 solveEquationSystemWithLDLT
<SLDLT_D_STRIDE
, SLDLT_B_STRIDE
>(L
, d
, b
, rowCount
, rowSkip
);
85 doCooperativelySolveLDLTValidated(resourceContainer
, limitedThreadCount
, stageBlockCountSifficiencyMask
, L
, d
, b
, rowCount
, rowSkip
);
90 unsigned ThreadedEquationSolverLDLT::restrictSolvingLDLTAllowedThreadCount(
91 dxThreadingBase
*threading
, unsigned allowedThreadCount
, unsigned rowCount
, unsigned &out_stageBlockCountSifficiencyMask
)
93 unsigned limitedThreadCount
= 1;
94 unsigned stageBlockCountSifficiencyMask
= 0;
96 #if dCOOPERATIVE_ENABLED
98 const unsigned int blockStep
= SL1S_BLOCK_SIZE
; // Required by the implementation
99 unsigned solvingStraightBlockCount
= deriveSolvingL1StraightBlockCount(rowCount
, blockStep
);
100 dIASSERT(deriveSolvingL1StraightThreadCount(SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM
, 2) > 1);
102 if (solvingStraightBlockCount
>= SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM
)
104 stageBlockCountSifficiencyMask
|= 1U << SLDLTS_SOLVING_STRAIGHT
;
109 const unsigned int blockStep
= SV_BLOCK_SIZE
; // Required by the implementation
110 unsigned scalingBlockCount
= deriveScalingVectorBlockCount(rowCount
, blockStep
);
111 dIASSERT(deriveScalingVectorThreadCount(SV_COOPERATIVE_BLOCK_COUNT_MINIMUM
- 1, 2) > 1);
113 if (scalingBlockCount
>= SV_COOPERATIVE_BLOCK_COUNT_MINIMUM
)
115 stageBlockCountSifficiencyMask
|= 1U << SLDLTS_SCALING_VECTOR
;
120 const unsigned int blockStep
= SL1T_BLOCK_SIZE
; // Required by the implementation
121 unsigned solvingTransposedBlockCount
= deriveSolvingL1TransposedBlockCount(rowCount
, blockStep
);
122 dIASSERT(deriveSolvingL1TransposedThreadCount(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM
, 2) > 1);
124 if (solvingTransposedBlockCount
>= SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM
)
126 stageBlockCountSifficiencyMask
|= 1U << SLDLTS_SOLVING_TRANSPOSED
;
130 if (stageBlockCountSifficiencyMask
!= 0)
132 limitedThreadCount
= threading
->calculateThreadingLimitedThreadCount(allowedThreadCount
, true);
134 #endif // #if dCOOPERATIVE_ENABLED
136 out_stageBlockCountSifficiencyMask
= stageBlockCountSifficiencyMask
;
137 return limitedThreadCount
;
142 void ThreadedEquationSolverLDLT::doCooperativelySolveLDLTValidated(
143 dxRequiredResourceContainer
*resourceContainer
, unsigned allowedThreadCount
, unsigned stageBlockCountSifficiencyMask
,
144 const dReal
*L
, const dReal
*d
, dReal
*b
, unsigned rowCount
, unsigned rowSkip
)
146 dIASSERT(allowedThreadCount
> 1);
148 if ((stageBlockCountSifficiencyMask
& (1U << SLDLTS_SOLVING_STRAIGHT
)) == 0)
150 solveL1Straight
<SLDLT_B_STRIDE
>(L
, b
, rowCount
, rowSkip
);
154 dSASSERT(SLDLT_B_STRIDE
+ 0 == SL1S_B_STRIDE
);
156 doCooperativelySolveL1StraightValidated(resourceContainer
, allowedThreadCount
, L
, b
, rowCount
, rowSkip
);
159 if ((stageBlockCountSifficiencyMask
& (1U << SLDLTS_SCALING_VECTOR
)) == 0)
161 scaleLargeVector
<SLDLT_B_STRIDE
, SLDLT_D_STRIDE
>(b
, d
, rowCount
);
165 dSASSERT(SLDLT_B_STRIDE
+ 0 == SV_A_STRIDE
);
166 dSASSERT(SLDLT_D_STRIDE
+ 0 == SV_D_STRIDE
);
168 doCooperativelyScaleVectorValidated(resourceContainer
, allowedThreadCount
, b
, d
, rowCount
);
171 if ((stageBlockCountSifficiencyMask
& (1U << SLDLTS_SOLVING_TRANSPOSED
)) == 0)
173 solveL1Transposed
<SLDLT_B_STRIDE
>(L
, b
, rowCount
, rowSkip
);
177 dSASSERT(SLDLT_B_STRIDE
+ 0 == SL1T_B_STRIDE
);
179 doCooperativelySolveL1TransposedValidated(resourceContainer
, allowedThreadCount
, L
, b
, rowCount
, rowSkip
);
184 //////////////////////////////////////////////////////////////////////////
185 // Public interface functions
188 void dSolveLDLT(const dReal
*L
, const dReal
*d
, dReal
*b
, int n
, int nskip
)
198 solveEquationSystemWithLDLT
<1, 1>(L
, d
, b
, n
, nskip
);
204 void dEstimateCooperativelySolveLDLTResourceRequirements(dResourceRequirementsID requirements
,
205 unsigned maximalAllowedThreadCount
, unsigned maximalRowCount
)
207 dAASSERT(requirements
!= NULL
);
209 dxResourceRequirementDescriptor
*requirementsDescriptor
= (dxResourceRequirementDescriptor
*)requirements
;
210 ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(requirementsDescriptor
, maximalAllowedThreadCount
, maximalRowCount
);
214 void dCooperativelySolveLDLT(dResourceContainerID resources
, unsigned allowedThreadCount
,
215 const dReal
*L
, const dReal
*d
, dReal
*b
, unsigned rowCount
, unsigned rowSkip
)
217 dAASSERT(resources
!= NULL
);
219 dxRequiredResourceContainer
*resourceContainer
= (dxRequiredResourceContainer
*)resources
;
220 ThreadedEquationSolverLDLT::cooperativelySolveLDLT(resourceContainer
, allowedThreadCount
, L
, d
, b
, rowCount
, rowSkip
);