BUG: UListIO: byteSize overflowing on really big faceLists
[OpenFOAM-2.0.x.git] / src / parallel / decompose / decompositionMethods / simpleGeomDecomp / simpleGeomDecomp.C
bloba480dae8ceb9ebb144d66fcbb8df2ab8f16fcc99
1 /*---------------------------------------------------------------------------*\
2   =========                 |
3   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
4    \\    /   O peration     |
5     \\  /    A nd           | Copyright (C) 2011 OpenFOAM Foundation
6      \\/     M anipulation  |
7 -------------------------------------------------------------------------------
8 License
9     This file is part of OpenFOAM.
11     OpenFOAM is free software: you can redistribute it and/or modify it
12     under the terms of the GNU General Public License as published by
13     the Free Software Foundation, either version 3 of the License, or
14     (at your option) any later version.
16     OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
17     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19     for more details.
21     You should have received a copy of the GNU General Public License
22     along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
24 \*---------------------------------------------------------------------------*/
26 #include "simpleGeomDecomp.H"
27 #include "addToRunTimeSelectionTable.H"
28 #include "SortableList.H"
29 #include "globalIndex.H"
30 #include "SubField.H"
32 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
34 namespace Foam
36     defineTypeNameAndDebug(simpleGeomDecomp, 0);
38     addToRunTimeSelectionTable
39     (
40         decompositionMethod,
41         simpleGeomDecomp,
42         dictionary
43     );
46 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
48 // assignToProcessorGroup : given nCells cells and nProcGroup processor
49 // groups to share them, how do we share them out? Answer : each group
50 // gets nCells/nProcGroup cells, and the first few get one
51 // extra to make up the numbers. This should produce almost
52 // perfect load balancing
54 void Foam::simpleGeomDecomp::assignToProcessorGroup
56     labelList& processorGroup,
57     const label nProcGroup
58 ) const
60     label jump = processorGroup.size()/nProcGroup;
61     label jumpb = jump + 1;
62     label fstProcessorGroup = processorGroup.size() - jump*nProcGroup;
64     label ind = 0;
65     label j = 0;
67     // assign cells to the first few processor groups (those with
68     // one extra cell each
69     for (j=0; j<fstProcessorGroup; j++)
70     {
71         for (register label k=0; k<jumpb; k++)
72         {
73             processorGroup[ind++] = j;
74         }
75     }
77     // and now to the `normal' processor groups
78     for (; j<nProcGroup; j++)
79     {
80         for (register label k=0; k<jump; k++)
81         {
82             processorGroup[ind++] = j;
83         }
84     }
88 void Foam::simpleGeomDecomp::assignToProcessorGroup
90     labelList& processorGroup,
91     const label nProcGroup,
92     const labelList& indices,
93     const scalarField& weights,
94     const scalar summedWeights
95 ) const
97     // This routine gets the sorted points.
98     // Easiest to explain with an example.
99     // E.g. 400 points, sum of weights : 513.
100     // Now with number of divisions in this direction (nProcGroup) : 4
101     // gives the split at 513/4 = 128
102     // So summed weight from 0..128 goes into bin 0,
103     //     ,,              128..256 goes into bin 1
104     //   etc.
105     // Finally any remaining ones go into the last bin (3).
107     const scalar jump = summedWeights/nProcGroup;
108     const label nProcGroupM1 = nProcGroup - 1;
109     scalar sumWeights = 0;
110     label ind = 0;
111     label j = 0;
113     // assign cells to all except last group.
114     for (j=0; j<nProcGroupM1; j++)
115     {
116         const scalar limit = jump*scalar(j + 1);
117         while (sumWeights < limit)
118         {
119             sumWeights += weights[indices[ind]];
120             processorGroup[ind++] = j;
121         }
122     }
123     // Ensure last included.
124     while (ind < processorGroup.size())
125     {
126        processorGroup[ind++] = nProcGroupM1;
127     }
131 Foam::labelList Foam::simpleGeomDecomp::decomposeOneProc
133     const pointField& points
134 ) const
136     // construct a list for the final result
137     labelList finalDecomp(points.size());
139     labelList processorGroups(points.size());
141     labelList pointIndices(points.size());
142     forAll(pointIndices, i)
143     {
144         pointIndices[i] = i;
145     }
147     const pointField rotatedPoints(rotDelta_ & points);
149     // and one to take the processor group id's. For each direction.
150     // we assign the processors to groups of processors labelled
151     // 0..nX to give a banded structure on the mesh. Then we
152     // construct the actual processor number by treating this as
153     // the units part of the processor number.
154     sort
155     (
156         pointIndices,
157         UList<scalar>::less(rotatedPoints.component(vector::X))
158     );
160     assignToProcessorGroup(processorGroups, n_.x());
162     forAll(points, i)
163     {
164         finalDecomp[pointIndices[i]] = processorGroups[i];
165     }
168     // now do the same thing in the Y direction. These processor group
169     // numbers add multiples of nX to the proc. number (columns)
170     sort
171     (
172         pointIndices,
173         UList<scalar>::less(rotatedPoints.component(vector::Y))
174     );
176     assignToProcessorGroup(processorGroups, n_.y());
178     forAll(points, i)
179     {
180         finalDecomp[pointIndices[i]] += n_.x()*processorGroups[i];
181     }
184     // finally in the Z direction. Now we add multiples of nX*nY to give
185     // layers
186     sort
187     (
188         pointIndices,
189         UList<scalar>::less(rotatedPoints.component(vector::Z))
190     );
192     assignToProcessorGroup(processorGroups, n_.z());
194     forAll(points, i)
195     {
196         finalDecomp[pointIndices[i]] += n_.x()*n_.y()*processorGroups[i];
197     }
199     return finalDecomp;
203 Foam::labelList Foam::simpleGeomDecomp::decomposeOneProc
205     const pointField& points,
206     const scalarField& weights
207 ) const
209     // construct a list for the final result
210     labelList finalDecomp(points.size());
212     labelList processorGroups(points.size());
214     labelList pointIndices(points.size());
215     forAll(pointIndices, i)
216     {
217         pointIndices[i] = i;
218     }
220     const pointField rotatedPoints(rotDelta_ & points);
222     // and one to take the processor group id's. For each direction.
223     // we assign the processors to groups of processors labelled
224     // 0..nX to give a banded structure on the mesh. Then we
225     // construct the actual processor number by treating this as
226     // the units part of the processor number.
227     sort
228     (
229         pointIndices,
230         UList<scalar>::less(rotatedPoints.component(vector::X))
231     );
233     const scalar summedWeights = sum(weights);
234     assignToProcessorGroup
235     (
236         processorGroups,
237         n_.x(),
238         pointIndices,
239         weights,
240         summedWeights
241     );
243     forAll(points, i)
244     {
245         finalDecomp[pointIndices[i]] = processorGroups[i];
246     }
249     // now do the same thing in the Y direction. These processor group
250     // numbers add multiples of nX to the proc. number (columns)
251     sort
252     (
253         pointIndices,
254         UList<scalar>::less(rotatedPoints.component(vector::Y))
255     );
257     assignToProcessorGroup
258     (
259         processorGroups,
260         n_.y(),
261         pointIndices,
262         weights,
263         summedWeights
264     );
266     forAll(points, i)
267     {
268         finalDecomp[pointIndices[i]] += n_.x()*processorGroups[i];
269     }
272     // finally in the Z direction. Now we add multiples of nX*nY to give
273     // layers
274     sort
275     (
276         pointIndices,
277         UList<scalar>::less(rotatedPoints.component(vector::Z))
278     );
280     assignToProcessorGroup
281     (
282         processorGroups,
283         n_.z(),
284         pointIndices,
285         weights,
286         summedWeights
287     );
289     forAll(points, i)
290     {
291         finalDecomp[pointIndices[i]] += n_.x()*n_.y()*processorGroups[i];
292     }
294     return finalDecomp;
298 // * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
300 Foam::simpleGeomDecomp::simpleGeomDecomp(const dictionary& decompositionDict)
302     geomDecomp(decompositionDict, typeName)
306 // * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
308 Foam::labelList Foam::simpleGeomDecomp::decompose
310     const pointField& points
313     if (!Pstream::parRun())
314     {
315         return decomposeOneProc(points);
316     }
317     else
318     {
319         globalIndex globalNumbers(points.size());
321         // Collect all points on master
322         if (Pstream::master())
323         {
324             pointField allPoints(globalNumbers.size());
326             label nTotalPoints = 0;
327             // Master first
328             SubField<point>(allPoints, points.size()).assign(points);
329             nTotalPoints += points.size();
331             // Add slaves
332             for (int slave=1; slave<Pstream::nProcs(); slave++)
333             {
334                 IPstream fromSlave(Pstream::scheduled, slave);
335                 pointField nbrPoints(fromSlave);
336                 SubField<point>
337                 (
338                     allPoints,
339                     nbrPoints.size(),
340                     nTotalPoints
341                 ).assign(nbrPoints);
342                 nTotalPoints += nbrPoints.size();
343             }
345             // Decompose
346             labelList finalDecomp(decomposeOneProc(allPoints));
348             // Send back
349             for (int slave=1; slave<Pstream::nProcs(); slave++)
350             {
351                 OPstream toSlave(Pstream::scheduled, slave);
352                 toSlave << SubField<label>
353                 (
354                     finalDecomp,
355                     globalNumbers.localSize(slave),
356                     globalNumbers.offset(slave)
357                 );
358             }
359             // Get my own part
360             finalDecomp.setSize(points.size());
362             return finalDecomp;
363         }
364         else
365         {
366             // Send my points
367             {
368                 OPstream toMaster(Pstream::scheduled, Pstream::masterNo());
369                 toMaster<< points;
370             }
372             // Receive back decomposition
373             IPstream fromMaster(Pstream::scheduled, Pstream::masterNo());
374             labelList finalDecomp(fromMaster);
376             return finalDecomp;
377         }
378     }
382 Foam::labelList Foam::simpleGeomDecomp::decompose
384     const pointField& points,
385     const scalarField& weights
388     if (!Pstream::parRun())
389     {
390         return decomposeOneProc(points, weights);
391     }
392     else
393     {
394         globalIndex globalNumbers(points.size());
396         // Collect all points on master
397         if (Pstream::master())
398         {
399             pointField allPoints(globalNumbers.size());
400             scalarField allWeights(allPoints.size());
402             label nTotalPoints = 0;
403             // Master first
404             SubField<point>(allPoints, points.size()).assign(points);
405             SubField<scalar>(allWeights, points.size()).assign(weights);
406             nTotalPoints += points.size();
408             // Add slaves
409             for (int slave=1; slave<Pstream::nProcs(); slave++)
410             {
411                 IPstream fromSlave(Pstream::scheduled, slave);
412                 pointField nbrPoints(fromSlave);
413                 scalarField nbrWeights(fromSlave);
414                 SubField<point>
415                 (
416                     allPoints,
417                     nbrPoints.size(),
418                     nTotalPoints
419                 ).assign(nbrPoints);
420                 SubField<scalar>
421                 (
422                     allWeights,
423                     nbrWeights.size(),
424                     nTotalPoints
425                 ).assign(nbrWeights);
426                 nTotalPoints += nbrPoints.size();
427             }
429             // Decompose
430             labelList finalDecomp(decomposeOneProc(allPoints, allWeights));
432             // Send back
433             for (int slave=1; slave<Pstream::nProcs(); slave++)
434             {
435                 OPstream toSlave(Pstream::scheduled, slave);
436                 toSlave << SubField<label>
437                 (
438                     finalDecomp,
439                     globalNumbers.localSize(slave),
440                     globalNumbers.offset(slave)
441                 );
442             }
443             // Get my own part
444             finalDecomp.setSize(points.size());
446             return finalDecomp;
447         }
448         else
449         {
450             // Send my points
451             {
452                 OPstream toMaster(Pstream::scheduled, Pstream::masterNo());
453                 toMaster<< points << weights;
454             }
456             // Receive back decomposition
457             IPstream fromMaster(Pstream::scheduled, Pstream::masterNo());
458             labelList finalDecomp(fromMaster);
460             return finalDecomp;
461         }
462     }
466 // ************************************************************************* //