Cosmetic: Copyright years were updated
[ode.git] / ode / src / fastldltsolve.cpp
blobebe9c1c4b35f775e8bd681b23755818052f4ad73
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
23 /*
24 * LDLT solving related code of ThreadedEquationSolverLDLT
25 * Copyright (c) 2017-2022 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
28 #include <ode/common.h>
29 #include <ode/matrix.h>
30 #include <ode/matrix_coop.h>
31 #include "config.h"
32 #include "threaded_solver_ldlt.h"
33 #include "threading_base.h"
34 #include "resource_control.h"
36 #include "fastldltsolve_impl.h"
39 /*static */
40 void ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(
41 dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
42 unsigned allowedThreadCount, unsigned rowCount)
44 unsigned stageBlockCountSifficiencyMask;
45 dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
46 unsigned limitedThreadCount = restrictSolvingLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount, stageBlockCountSifficiencyMask);
48 if (limitedThreadCount > 1)
50 if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_STRAIGHT)) != 0)
52 doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
55 if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SCALING_VECTOR)) != 0)
57 doEstimateCooperativeScalingVectorResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
60 if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_TRANSPOSED)) == 0)
62 doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
67 /*static */
68 void ThreadedEquationSolverLDLT::cooperativelySolveLDLT(
69 dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
70 const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
72 dAASSERT(rowCount != 0);
74 unsigned stageBlockCountSifficiencyMask;
76 dxThreadingBase *threading = resourceContainer->getThreadingInstance();
77 unsigned limitedThreadCount = restrictSolvingLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount, stageBlockCountSifficiencyMask);
79 if (limitedThreadCount <= 1)
81 solveEquationSystemWithLDLT<SLDLT_D_STRIDE, SLDLT_B_STRIDE>(L, d, b, rowCount, rowSkip);
83 else
85 doCooperativelySolveLDLTValidated(resourceContainer, limitedThreadCount, stageBlockCountSifficiencyMask, L, d, b, rowCount, rowSkip);
89 /*static */
90 unsigned ThreadedEquationSolverLDLT::restrictSolvingLDLTAllowedThreadCount(
91 dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount, unsigned &out_stageBlockCountSifficiencyMask)
93 unsigned limitedThreadCount = 1;
94 unsigned stageBlockCountSifficiencyMask = 0;
96 #if dCOOPERATIVE_ENABLED
98 const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
99 unsigned solvingStraightBlockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
100 dIASSERT(deriveSolvingL1StraightThreadCount(SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
102 if (solvingStraightBlockCount >= SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM)
104 stageBlockCountSifficiencyMask |= 1U << SLDLTS_SOLVING_STRAIGHT;
109 const unsigned int blockStep = SV_BLOCK_SIZE; // Required by the implementation
110 unsigned scalingBlockCount = deriveScalingVectorBlockCount(rowCount, blockStep);
111 dIASSERT(deriveScalingVectorThreadCount(SV_COOPERATIVE_BLOCK_COUNT_MINIMUM - 1, 2) > 1);
113 if (scalingBlockCount >= SV_COOPERATIVE_BLOCK_COUNT_MINIMUM)
115 stageBlockCountSifficiencyMask |= 1U << SLDLTS_SCALING_VECTOR;
120 const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
121 unsigned solvingTransposedBlockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
122 dIASSERT(deriveSolvingL1TransposedThreadCount(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
124 if (solvingTransposedBlockCount >= SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM)
126 stageBlockCountSifficiencyMask |= 1U << SLDLTS_SOLVING_TRANSPOSED;
130 if (stageBlockCountSifficiencyMask != 0)
132 limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
134 #endif // #if dCOOPERATIVE_ENABLED
136 out_stageBlockCountSifficiencyMask = stageBlockCountSifficiencyMask;
137 return limitedThreadCount;
141 /*static */
142 void ThreadedEquationSolverLDLT::doCooperativelySolveLDLTValidated(
143 dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, unsigned stageBlockCountSifficiencyMask,
144 const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
146 dIASSERT(allowedThreadCount > 1);
148 if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_STRAIGHT)) == 0)
150 solveL1Straight<SLDLT_B_STRIDE>(L, b, rowCount, rowSkip);
152 else
154 dSASSERT(SLDLT_B_STRIDE + 0 == SL1S_B_STRIDE);
156 doCooperativelySolveL1StraightValidated(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
159 if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SCALING_VECTOR)) == 0)
161 scaleLargeVector<SLDLT_B_STRIDE, SLDLT_D_STRIDE>(b, d, rowCount);
163 else
165 dSASSERT(SLDLT_B_STRIDE + 0 == SV_A_STRIDE);
166 dSASSERT(SLDLT_D_STRIDE + 0 == SV_D_STRIDE);
168 doCooperativelyScaleVectorValidated(resourceContainer, allowedThreadCount, b, d, rowCount);
171 if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_TRANSPOSED)) == 0)
173 solveL1Transposed<SLDLT_B_STRIDE>(L, b, rowCount, rowSkip);
175 else
177 dSASSERT(SLDLT_B_STRIDE + 0 == SL1T_B_STRIDE);
179 doCooperativelySolveL1TransposedValidated(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
184 //////////////////////////////////////////////////////////////////////////
185 // Public interface functions
187 /*extern ODE_API */
188 void dSolveLDLT(const dReal *L, const dReal *d, dReal *b, int n, int nskip)
190 dAASSERT(n != 0);
192 if (n != 0)
194 dAASSERT(L != NULL);
195 dAASSERT(d != NULL);
196 dAASSERT(b != NULL);
198 solveEquationSystemWithLDLT<1, 1>(L, d, b, n, nskip);
203 /*extern ODE_API */
204 void dEstimateCooperativelySolveLDLTResourceRequirements(dResourceRequirementsID requirements,
205 unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
207 dAASSERT(requirements != NULL);
209 dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
210 ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
213 /*extern ODE_API */
214 void dCooperativelySolveLDLT(dResourceContainerID resources, unsigned allowedThreadCount,
215 const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
217 dAASSERT(resources != NULL);
219 dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
220 ThreadedEquationSolverLDLT::cooperativelySolveLDLT(resourceContainer, allowedThreadCount, L, d, b, rowCount, rowSkip);