Cosmetic: Copyright years were updated
[ode.git] / ode / src / fastltsolve.cpp
blob3f1e6b180d947d70657a89e9cd37fa6df9a57c07
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
24 * L1Transposed Equation Solving Routines
25 * Copyright (c) 2017-2024 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
28 #include <ode/common.h>
29 #include <ode/matrix.h>
30 #include <ode/matrix_coop.h>
31 #include "config.h"
32 #include "threaded_solver_ldlt.h"
33 #include "threading_base.h"
34 #include "resource_control.h"
35 #include "error.h"
37 #include "fastltsolve_impl.h"
40 /*static */
41 void ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1TransposedResourceRequirements(
42 dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
43 unsigned allowedThreadCount, unsigned rowCount)
45 dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
46 unsigned limitedThreadCount = restrictSolvingL1TransposedAllowedThreadCount(threading, allowedThreadCount, rowCount);
48 if (limitedThreadCount > 1)
50 doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
54 /*static */
55 void ThreadedEquationSolverLDLT::cooperativelySolveL1Transposed(
56 dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
57 const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
59 dIASSERT(rowCount != 0);
61 dxThreadingBase *threading = resourceContainer->getThreadingInstance();
62 unsigned limitedThreadCount = restrictSolvingL1TransposedAllowedThreadCount(threading, allowedThreadCount, rowCount);
64 if (limitedThreadCount <= 1)
66 solveL1Transposed<SL1T_B_STRIDE>(L, b, rowCount, rowSkip);
68 else
70 doCooperativelySolveL1TransposedValidated(resourceContainer, limitedThreadCount, L, b, rowCount, rowSkip);
75 /*static */
76 unsigned ThreadedEquationSolverLDLT::restrictSolvingL1TransposedAllowedThreadCount(
77 dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount)
79 unsigned limitedThreadCount = 1;
81 #if dCOOPERATIVE_ENABLED
82 const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
83 unsigned solvingBlockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
84 dIASSERT(deriveSolvingL1TransposedThreadCount(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
86 if (solvingBlockCount >= SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM)
88 limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
90 #endif // #if dCOOPERATIVE_ENABLED
92 return limitedThreadCount;
95 /*static */
96 void ThreadedEquationSolverLDLT::doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(
97 dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
98 unsigned allowedThreadCount, unsigned rowCount)
100 const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
101 unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
102 dIASSERT(blockCount >= 1);
104 unsigned threadCountToUse = deriveSolvingL1TransposedThreadCount(blockCount, allowedThreadCount);
105 dIASSERT(threadCountToUse > 1);
107 unsigned simultaneousCallCount = 1 + (threadCountToUse - 1);
109 SolvingL1TransposedMemoryEstimates solvingMemoryEstimates;
110 sizeint solvingMemoryRequired = estimateCooperativelySolvingL1TransposedMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
111 const unsigned solvingAlignmentRequired = ALLOCATION_DEFAULT_ALIGNMENT;
113 unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
114 summaryRequirementsDescriptor->mergeAnotherDescriptorIn(solvingMemoryRequired, solvingAlignmentRequired, simultaneousCallCount, featureRequirement);
117 /*static */
118 void ThreadedEquationSolverLDLT::doCooperativelySolveL1TransposedValidated(
119 dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
120 const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
122 dIASSERT(allowedThreadCount > 1);
124 const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
125 unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
126 dIASSERT(blockCount >= 1);
128 unsigned threadCountToUse = deriveSolvingL1TransposedThreadCount(blockCount, allowedThreadCount);
129 dIASSERT(threadCountToUse > 1);
131 dCallWaitID completionWait = resourceContainer->getStockCallWait();
132 dAASSERT(completionWait != NULL);
134 atomicord32 blockCompletionProgress;
135 cellindexint *blockProgressDescriptors;
136 SolveL1TransposedCellContext *cellContexts;
138 SolvingL1TransposedMemoryEstimates solvingMemoryEstimates;
139 sizeint solvingMemoryRequired = estimateCooperativelySolvingL1TransposedMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
140 dIASSERT(solvingMemoryRequired <= resourceContainer->getMemoryBufferSize());
142 void *bufferAllocated = resourceContainer->getMemoryBufferPointer();
143 dIASSERT(bufferAllocated != NULL);
144 dIASSERT(dALIGN_PTR(bufferAllocated, ALLOCATION_DEFAULT_ALIGNMENT) == bufferAllocated);
146 void *bufferCurrentLocation = bufferAllocated;
147 bufferCurrentLocation = markCooperativelySolvingL1TransposedMemoryStructuresOut(bufferCurrentLocation, solvingMemoryEstimates, blockProgressDescriptors, cellContexts);
148 dIVERIFY(bufferCurrentLocation <= (uint8 *)bufferAllocated + solvingMemoryRequired);
150 initializeCooperativelySolveL1TransposedMemoryStructures<blockStep>(rowCount, blockCompletionProgress, blockProgressDescriptors, cellContexts);
152 dCallReleaseeID calculationFinishReleasee;
153 SolveL1TransposedWorkerContext workerContext; // The variable must exist in the outer scope
155 workerContext.init(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts);
157 dxThreadingBase *threading = resourceContainer->getThreadingInstance();
158 threading->PostThreadedCall(NULL, &calculationFinishReleasee, threadCountToUse - 1, NULL, completionWait, &solveL1Transposed_completion_callback, NULL, 0, "SolveL1Transposed Completion");
159 threading->PostThreadedCallsGroup(NULL, threadCountToUse - 1, calculationFinishReleasee, &solveL1Transposed_worker_callback, &workerContext, "SolveL1Transposed Work");
161 participateSolvingL1Transposed<blockStep, SL1T_B_STRIDE>(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts, threadCountToUse - 1);
163 threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "SolveL1Transposed End Wait");
166 /*static */
167 int ThreadedEquationSolverLDLT::solveL1Transposed_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
169 SolveL1TransposedWorkerContext *ptrContext = (SolveL1TransposedWorkerContext *)callContext;
171 solveL1Transposed_worker(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
173 return 1;
176 /*static */
177 void ThreadedEquationSolverLDLT::solveL1Transposed_worker(SolveL1TransposedWorkerContext &ref_context, unsigned ownThreadIndex)
179 const unsigned blockStep = SL1T_BLOCK_SIZE;
180 participateSolvingL1Transposed<blockStep, SL1T_B_STRIDE>(ref_context.m_L, ref_context.m_b, ref_context.m_rowCount, ref_context.m_rowSkip,
181 *ref_context.m_ptrBlockCompletionProgress, ref_context.m_blockProgressDescriptors, ref_context.m_cellContexts, ownThreadIndex);
184 /*static */
185 int ThreadedEquationSolverLDLT::solveL1Transposed_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
187 return 1;
192 //////////////////////////////////////////////////////////////////////////
193 // Public interface functions
195 /*extern ODE_API */
196 void dSolveL1T(const dReal *L, dReal *B, int rowCount, int rowSkip)
198 dAASSERT(rowCount != 0);
200 if (rowCount != 0)
202 dAASSERT(L != NULL);
203 dAASSERT(B != NULL);
205 solveL1Transposed<1>(L, B, rowCount, rowSkip);
210 /*extern ODE_API */
211 void dEstimateCooperativelySolveL1TransposedResourceRequirements(dResourceRequirementsID requirements,
212 unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
214 dAASSERT(requirements != NULL);
216 dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
217 ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1TransposedResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
220 /*extern ODE_API */
221 void dCooperativelySolveL1Transposed(dResourceContainerID resources, unsigned allowedThreadCount,
222 const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
224 dAASSERT(resources != NULL);
226 dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
227 ThreadedEquationSolverLDLT::cooperativelySolveL1Transposed(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);