Add a TriMesh to TriMesh collision demo.
[ode.git] / ode / src / fastvecscale.cpp
blob630e98e5790b36b480d9106c157b993e17b6b463
1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
23 /*
24 * Vector scaling related code of ThreadedEquationSolverLDLT
25 * Copyright (c) 2017-2024 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
29 #include <ode/common.h>
30 #include <ode/matrix.h>
31 #include <ode/matrix_coop.h>
32 #include "config.h"
33 #include "threaded_solver_ldlt.h"
34 #include "threading_base.h"
35 #include "resource_control.h"
36 #include "error.h"
38 #include "fastvecscale_impl.h"
41 /*static */
42 void ThreadedEquationSolverLDLT::estimateCooperativeScalingVectorResourceRequirements(
43 dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
44 unsigned allowedThreadCount, unsigned elementCount)
46 dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
47 unsigned limitedThreadCount = restrictScalingVectorAllowedThreadCount(threading, allowedThreadCount, elementCount);
49 if (limitedThreadCount > 1)
51 doEstimateCooperativeScalingVectorResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, elementCount);
55 /*static */
56 void ThreadedEquationSolverLDLT::cooperativelyScaleVector(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
57 dReal *vectorData, const dReal *scaleData, unsigned elementCount)
59 dAASSERT(elementCount != 0);
61 dxThreadingBase *threading = resourceContainer->getThreadingInstance();
62 unsigned limitedThreadCount = restrictScalingVectorAllowedThreadCount(threading, allowedThreadCount, elementCount);
64 if (limitedThreadCount <= 1)
66 scaleLargeVector<SV_A_STRIDE, SV_D_STRIDE>(vectorData, scaleData, elementCount);
68 else
70 doCooperativelyScaleVectorValidated(resourceContainer, limitedThreadCount, vectorData, scaleData, elementCount);
74 /*static */
75 unsigned ThreadedEquationSolverLDLT::restrictScalingVectorAllowedThreadCount(
76 dxThreadingBase *threading, unsigned allowedThreadCount, unsigned elementCount)
78 unsigned limitedThreadCount = 1;
80 #if dCOOPERATIVE_ENABLED
81 const unsigned int blockStep = SV_BLOCK_SIZE; // Required by the implementation
82 unsigned scalingBlockCount = deriveScalingVectorBlockCount(elementCount, blockStep);
83 dIASSERT(deriveScalingVectorThreadCount(SV_COOPERATIVE_BLOCK_COUNT_MINIMUM - 1, 2) > 1);
85 if (scalingBlockCount >= SV_COOPERATIVE_BLOCK_COUNT_MINIMUM)
87 limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
89 #endif // #if dCOOPERATIVE_ENABLED
91 return limitedThreadCount;
94 /*static */
95 void ThreadedEquationSolverLDLT::doEstimateCooperativeScalingVectorResourceRequirementsValidated(
96 dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
97 unsigned allowedThreadCount, unsigned elementCount)
99 unsigned simultaneousCallCount = 1 + (allowedThreadCount - 1);
101 sizeint scalingMemoryRequired = 0;
102 const unsigned scalingAlignmentRequired = 0;
104 unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
105 summaryRequirementsDescriptor->mergeAnotherDescriptorIn(scalingMemoryRequired, scalingAlignmentRequired, simultaneousCallCount, featureRequirement);
108 /*static */
109 void ThreadedEquationSolverLDLT::doCooperativelyScaleVectorValidated(
110 dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
111 dReal *vectorData, const dReal *scaleData, unsigned elementCount)
113 dIASSERT(allowedThreadCount > 1);
115 const unsigned int blockStep = SV_BLOCK_SIZE; // Required by the implementation
116 unsigned scalingBlockCount = deriveScalingVectorBlockCount(elementCount, blockStep);
117 dIASSERT(scalingBlockCount > 0U);
119 unsigned threadCountToUse = deriveScalingVectorThreadCount(scalingBlockCount - 1, allowedThreadCount);
120 dIASSERT(threadCountToUse > 1);
122 dCallWaitID completionWait = resourceContainer->getStockCallWait();
123 dAASSERT(completionWait != NULL);
125 atomicord32 blockCompletionProgress;
127 initializeCooperativelyScaleVectorMemoryStructures(blockCompletionProgress);
129 dCallReleaseeID calculationFinishReleasee;
130 ScaleVectorWorkerContext workerContext; // The variable must exist in the outer scope
132 workerContext.init(vectorData, scaleData, elementCount, blockCompletionProgress);
134 dxThreadingBase *threading = resourceContainer->getThreadingInstance();
135 threading->PostThreadedCall(NULL, &calculationFinishReleasee, threadCountToUse - 1, NULL, completionWait, &scaleVector_completion_callback, NULL, 0, "ScaleVector Completion");
136 threading->PostThreadedCallsGroup(NULL, threadCountToUse - 1, calculationFinishReleasee, &scaleVector_worker_callback, &workerContext, "ScaleVector Work");
138 participateScalingVector<blockStep, SV_A_STRIDE, SV_D_STRIDE>(vectorData, scaleData, elementCount, blockCompletionProgress);
140 threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "ScaleVector End Wait");
144 /*static */
145 int ThreadedEquationSolverLDLT::scaleVector_worker_callback(void *callContext, dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
147 ScaleVectorWorkerContext *ptrContext = (ScaleVectorWorkerContext *)callContext;
149 scaleVector_worker(*ptrContext);
151 return 1;
154 /*static */
155 void ThreadedEquationSolverLDLT::scaleVector_worker(ScaleVectorWorkerContext &ref_context)
157 const unsigned blockStep = SV_BLOCK_SIZE;
159 participateScalingVector<blockStep, SV_A_STRIDE, SV_D_STRIDE>(ref_context.m_vectorData, ref_context.m_scaleData, ref_context.m_elementCount, *ref_context.m_ptrBlockCompletionProgress);
162 /*static */
163 int ThreadedEquationSolverLDLT::scaleVector_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
165 return 1;
169 //////////////////////////////////////////////////////////////////////////
170 // Public interface functions
172 /*extern ODE_API */
173 void dScaleVector(dReal *a, const dReal *d, int n)
175 scaleLargeVector<1, 1>(a, d, n);
178 /*extern ODE_API_DEPRECATED ODE_API */
179 void dVectorScale(dReal *a, const dReal *d, int n)
181 scaleLargeVector<1, 1>(a, d, n);
185 /*extern ODE_API */
186 void dEstimateCooperativelyScaleVectorResourceRequirements(dResourceRequirementsID requirements,
187 unsigned maximalAllowedThreadCount, unsigned maximalElementCount)
189 dAASSERT(requirements != NULL);
191 dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
192 ThreadedEquationSolverLDLT::estimateCooperativeScalingVectorResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalElementCount);
195 /*extern ODE_API */
196 void dCooperativelyScaleVector(dResourceContainerID resources, unsigned allowedThreadCount,
197 dReal *dataVector, const dReal *scaleVector, unsigned elementCount)
199 dAASSERT(resources != NULL);
201 dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
202 ThreadedEquationSolverLDLT::cooperativelyScaleVector(resourceContainer, allowedThreadCount, dataVector, scaleVector, elementCount);