1 /*************************************************************************
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
21 *************************************************************************/
24 * Vector scaling related code of ThreadedEquationSolverLDLT
25 * Copyright (c) 2017-2024 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
29 #include <ode/common.h>
30 #include <ode/matrix.h>
31 #include <ode/matrix_coop.h>
33 #include "threaded_solver_ldlt.h"
34 #include "threading_base.h"
35 #include "resource_control.h"
38 #include "fastvecscale_impl.h"
42 void ThreadedEquationSolverLDLT::estimateCooperativeScalingVectorResourceRequirements(
43 dxResourceRequirementDescriptor
*summaryRequirementsDescriptor
,
44 unsigned allowedThreadCount
, unsigned elementCount
)
46 dxThreadingBase
*threading
= summaryRequirementsDescriptor
->getrelatedThreading();
47 unsigned limitedThreadCount
= restrictScalingVectorAllowedThreadCount(threading
, allowedThreadCount
, elementCount
);
49 if (limitedThreadCount
> 1)
51 doEstimateCooperativeScalingVectorResourceRequirementsValidated(summaryRequirementsDescriptor
, allowedThreadCount
, elementCount
);
56 void ThreadedEquationSolverLDLT::cooperativelyScaleVector(dxRequiredResourceContainer
*resourceContainer
, unsigned allowedThreadCount
,
57 dReal
*vectorData
, const dReal
*scaleData
, unsigned elementCount
)
59 dAASSERT(elementCount
!= 0);
61 dxThreadingBase
*threading
= resourceContainer
->getThreadingInstance();
62 unsigned limitedThreadCount
= restrictScalingVectorAllowedThreadCount(threading
, allowedThreadCount
, elementCount
);
64 if (limitedThreadCount
<= 1)
66 scaleLargeVector
<SV_A_STRIDE
, SV_D_STRIDE
>(vectorData
, scaleData
, elementCount
);
70 doCooperativelyScaleVectorValidated(resourceContainer
, limitedThreadCount
, vectorData
, scaleData
, elementCount
);
75 unsigned ThreadedEquationSolverLDLT::restrictScalingVectorAllowedThreadCount(
76 dxThreadingBase
*threading
, unsigned allowedThreadCount
, unsigned elementCount
)
78 unsigned limitedThreadCount
= 1;
80 #if dCOOPERATIVE_ENABLED
81 const unsigned int blockStep
= SV_BLOCK_SIZE
; // Required by the implementation
82 unsigned scalingBlockCount
= deriveScalingVectorBlockCount(elementCount
, blockStep
);
83 dIASSERT(deriveScalingVectorThreadCount(SV_COOPERATIVE_BLOCK_COUNT_MINIMUM
- 1, 2) > 1);
85 if (scalingBlockCount
>= SV_COOPERATIVE_BLOCK_COUNT_MINIMUM
)
87 limitedThreadCount
= threading
->calculateThreadingLimitedThreadCount(allowedThreadCount
, true);
89 #endif // #if dCOOPERATIVE_ENABLED
91 return limitedThreadCount
;
95 void ThreadedEquationSolverLDLT::doEstimateCooperativeScalingVectorResourceRequirementsValidated(
96 dxResourceRequirementDescriptor
*summaryRequirementsDescriptor
,
97 unsigned allowedThreadCount
, unsigned elementCount
)
99 unsigned simultaneousCallCount
= 1 + (allowedThreadCount
- 1);
101 sizeint scalingMemoryRequired
= 0;
102 const unsigned scalingAlignmentRequired
= 0;
104 unsigned featureRequirement
= dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED
;
105 summaryRequirementsDescriptor
->mergeAnotherDescriptorIn(scalingMemoryRequired
, scalingAlignmentRequired
, simultaneousCallCount
, featureRequirement
);
109 void ThreadedEquationSolverLDLT::doCooperativelyScaleVectorValidated(
110 dxRequiredResourceContainer
*resourceContainer
, unsigned allowedThreadCount
,
111 dReal
*vectorData
, const dReal
*scaleData
, unsigned elementCount
)
113 dIASSERT(allowedThreadCount
> 1);
115 const unsigned int blockStep
= SV_BLOCK_SIZE
; // Required by the implementation
116 unsigned scalingBlockCount
= deriveScalingVectorBlockCount(elementCount
, blockStep
);
117 dIASSERT(scalingBlockCount
> 0U);
119 unsigned threadCountToUse
= deriveScalingVectorThreadCount(scalingBlockCount
- 1, allowedThreadCount
);
120 dIASSERT(threadCountToUse
> 1);
122 dCallWaitID completionWait
= resourceContainer
->getStockCallWait();
123 dAASSERT(completionWait
!= NULL
);
125 atomicord32 blockCompletionProgress
;
127 initializeCooperativelyScaleVectorMemoryStructures(blockCompletionProgress
);
129 dCallReleaseeID calculationFinishReleasee
;
130 ScaleVectorWorkerContext workerContext
; // The variable must exist in the outer scope
132 workerContext
.init(vectorData
, scaleData
, elementCount
, blockCompletionProgress
);
134 dxThreadingBase
*threading
= resourceContainer
->getThreadingInstance();
135 threading
->PostThreadedCall(NULL
, &calculationFinishReleasee
, threadCountToUse
- 1, NULL
, completionWait
, &scaleVector_completion_callback
, NULL
, 0, "ScaleVector Completion");
136 threading
->PostThreadedCallsGroup(NULL
, threadCountToUse
- 1, calculationFinishReleasee
, &scaleVector_worker_callback
, &workerContext
, "ScaleVector Work");
138 participateScalingVector
<blockStep
, SV_A_STRIDE
, SV_D_STRIDE
>(vectorData
, scaleData
, elementCount
, blockCompletionProgress
);
140 threading
->WaitThreadedCallExclusively(NULL
, completionWait
, NULL
, "ScaleVector End Wait");
145 int ThreadedEquationSolverLDLT::scaleVector_worker_callback(void *callContext
, dcallindex_t
dUNUSED(callInstanceIndex
), dCallReleaseeID
dUNUSED(callThisReleasee
))
147 ScaleVectorWorkerContext
*ptrContext
= (ScaleVectorWorkerContext
*)callContext
;
149 scaleVector_worker(*ptrContext
);
155 void ThreadedEquationSolverLDLT::scaleVector_worker(ScaleVectorWorkerContext
&ref_context
)
157 const unsigned blockStep
= SV_BLOCK_SIZE
;
159 participateScalingVector
<blockStep
, SV_A_STRIDE
, SV_D_STRIDE
>(ref_context
.m_vectorData
, ref_context
.m_scaleData
, ref_context
.m_elementCount
, *ref_context
.m_ptrBlockCompletionProgress
);
163 int ThreadedEquationSolverLDLT::scaleVector_completion_callback(void *dUNUSED(callContext
), dcallindex_t
dUNUSED(callInstanceIndex
), dCallReleaseeID
dUNUSED(callThisReleasee
))
169 //////////////////////////////////////////////////////////////////////////
170 // Public interface functions
173 void dScaleVector(dReal
*a
, const dReal
*d
, int n
)
175 scaleLargeVector
<1, 1>(a
, d
, n
);
178 /*extern ODE_API_DEPRECATED ODE_API */
179 void dVectorScale(dReal
*a
, const dReal
*d
, int n
)
181 scaleLargeVector
<1, 1>(a
, d
, n
);
186 void dEstimateCooperativelyScaleVectorResourceRequirements(dResourceRequirementsID requirements
,
187 unsigned maximalAllowedThreadCount
, unsigned maximalElementCount
)
189 dAASSERT(requirements
!= NULL
);
191 dxResourceRequirementDescriptor
*requirementsDescriptor
= (dxResourceRequirementDescriptor
*)requirements
;
192 ThreadedEquationSolverLDLT::estimateCooperativeScalingVectorResourceRequirements(requirementsDescriptor
, maximalAllowedThreadCount
, maximalElementCount
);
196 void dCooperativelyScaleVector(dResourceContainerID resources
, unsigned allowedThreadCount
,
197 dReal
*dataVector
, const dReal
*scaleVector
, unsigned elementCount
)
199 dAASSERT(resources
!= NULL
);
201 dxRequiredResourceContainer
*resourceContainer
= (dxRequiredResourceContainer
*)resources
;
202 ThreadedEquationSolverLDLT::cooperativelyScaleVector(resourceContainer
, allowedThreadCount
, dataVector
, scaleVector
, elementCount
);