1 /*---------------------------------------------------------------------------*\
3 \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
5 \\ / A nd | Copyright (C) 2011 OpenFOAM Foundation
7 -------------------------------------------------------------------------------
9 This file is part of OpenFOAM.
11 OpenFOAM is free software: you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 You should have received a copy of the GNU General Public License
22 along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
26 By: Francois PELLEGRINI RE: Graph mapping 'strategy' string [ reply ]
27 2008-08-22 10:09 Strategy handling in Scotch is a bit tricky. In order
28 not to be confused, you must have a clear view of how they are built.
31 1- Strategies are made up of "methods" which are combined by means of
34 2- A method is of the form "m{param=value,param=value,...}", where "m"
35 is a single character (this is your first error: "f" is a method name,
36 not a parameter name).
38 3- There exist different sort of strategies : bipartitioning strategies,
39 mapping strategies, ordering strategies, which cannot be mixed. For
40 instance, you cannot build a bipartitioning strategy and feed it to a
41 mapping method (this is your second error).
43 To use the "mapCompute" routine, you must create a mapping strategy, not
44 a bipartitioning one, and so use stratGraphMap() and not
45 stratGraphBipart(). Your mapping strategy should however be based on the
46 "recursive bipartitioning" method ("b"). For instance, a simple (and
47 hence not very efficient) mapping strategy can be :
51 which computes mappings with the recursive bipartitioning method "b",
52 this latter using the Fiduccia-Mattheyses method "f" to compute its
55 If you want an exact partition (see your previous post), try
58 However, these strategies are not the most efficient, as they do not
59 make use of the multi-level framework.
61 To use the multi-level framework, try for instance:
63 "b{sep=m{vert=100,low=h,asc=f}x}"
65 The current default mapping strategy in Scotch can be seen by using the
66 "-vs" option of program gmap. It is, to date:
79 bnd=d{pass=40,dif=1,rem=1}f{move=80,pass=-1,bal=0.005},
80 org=f{move=80,pass=-1,bal=0.005},
83 low=h{pass=10}f{move=80,pass=-1,bal=0.0005},
92 bnd=d{pass=40,dif=1,rem=1}f{move=80,pass=-1,bal=0.005},
93 org=f{move=80,pass=-1,bal=0.005},
96 low=h{pass=10}f{move=80,pass=-1,bal=0.0005},
106 Note: writes out .dgr files for debugging. Run with e.g.
108 mpirun -np 4 dgpart 2 'processor%r.grf'
110 - %r gets replaced by current processor rank
111 - decompose into 2 domains
113 \*---------------------------------------------------------------------------*/
115 #include "ptscotchDecomp.H"
116 #include "addToRunTimeSelectionTable.H"
118 #include "OFstream.H"
119 #include "globalIndex.H"
120 #include "SubField.H"
126 #include "ptscotch.h"
130 // Hack: scotch generates floating point errors so need to switch of error
132 #if defined(linux) || defined(linuxAMD64) || defined(linuxIA64)
136 #if defined(LINUX) && defined(__GNUC__)
148 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
152 defineTypeNameAndDebug(ptscotchDecomp, 0);
154 addToRunTimeSelectionTable(decompositionMethod, ptscotchDecomp, dictionary);
157 // * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * //
159 void Foam::ptscotchDecomp::check(const int retVal, const char* str)
163 FatalErrorIn("ptscotchDecomp::decompose(..)")
164 << "Call to scotch routine " << str << " failed."
170 //- Does prevention of 0 cell domains and calls ptscotch.
171 Foam::label Foam::ptscotchDecomp::decomposeZeroDomains
173 const fileName& meshPath,
174 const List<int>& initadjncy,
175 const List<int>& initxadj,
176 const scalarField& initcWeights,
178 List<int>& finalDecomp
181 globalIndex globalCells(initxadj.size()-1);
183 bool hasZeroDomain = false;
184 for (label procI = 0; procI < Pstream::nProcs(); procI++)
186 if (globalCells.localSize(procI) == 0)
188 hasZeroDomain = true;
208 Info<< "ptscotchDecomp : have graphs with locally 0 cells."
209 << " trickling down." << endl;
212 // Make sure every domain has at least one cell
213 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
214 // (scotch does not like zero sized domains)
215 // Trickle cells from processors that have them up to those that
219 // Number of cells to send to the next processor
220 // (is same as number of cells next processor has to receive)
221 List<int> nSendCells(Pstream::nProcs(), 0);
223 for (label procI = nSendCells.size()-1; procI >=1; procI--)
225 label nLocalCells = globalCells.localSize(procI);
226 if (nLocalCells-nSendCells[procI] < 1)
228 nSendCells[procI-1] = nSendCells[procI]-nLocalCells+1;
232 // First receive (so increasing the sizes of all arrays)
234 Field<int> xadj(initxadj);
235 Field<int> adjncy(initadjncy);
236 scalarField cWeights(initcWeights);
238 if (Pstream::myProcNo() >= 1 && nSendCells[Pstream::myProcNo()-1] > 0)
240 // Receive cells from previous processor
241 IPstream fromPrevProc(Pstream::blocking, Pstream::myProcNo()-1);
243 Field<int> prevXadj(fromPrevProc);
244 Field<int> prevAdjncy(fromPrevProc);
245 scalarField prevCellWeights(fromPrevProc);
247 if (prevXadj.size() != nSendCells[Pstream::myProcNo()-1])
249 FatalErrorIn("ptscotchDecomp::decompose(..)")
250 << "Expected from processor " << Pstream::myProcNo()-1
251 << " connectivity for " << nSendCells[Pstream::myProcNo()-1]
252 << " nCells but only received " << prevXadj.size()
253 << abort(FatalError);
257 prepend(prevAdjncy, adjncy);
258 // Adapt offsets and prepend xadj
259 xadj += prevAdjncy.size();
260 prepend(prevXadj, xadj);
262 prepend(prevCellWeights, cWeights);
266 // Send to my next processor
268 if (nSendCells[Pstream::myProcNo()] > 0)
270 // Send cells to next processor
271 OPstream toNextProc(Pstream::blocking, Pstream::myProcNo()+1);
273 int nCells = nSendCells[Pstream::myProcNo()];
274 int startCell = xadj.size()-1 - nCells;
275 int startFace = xadj[startCell];
276 int nFaces = adjncy.size()-startFace;
278 // Send for all cell data: last nCells elements
279 // Send for all face data: last nFaces elements
281 << Field<int>::subField(xadj, nCells, startCell)-startFace
282 << Field<int>::subField(adjncy, nFaces, startFace)
286 ? static_cast<const scalarField&>
288 scalarField::subField(cWeights, nCells, startCell)
293 // Remove data that has been sent
296 cWeights.setSize(cWeights.size()-nCells);
298 adjncy.setSize(adjncy.size()-nFaces);
299 xadj.setSize(xadj.size() - nCells);
303 // Do decomposition as normal. Sets finalDecomp.
304 label result = decompose(meshPath, adjncy, xadj, cWeights, finalDecomp);
309 Info<< "ptscotchDecomp : have graphs with locally 0 cells."
310 << " trickling up." << endl;
314 // If we sent cells across make sure we undo it
315 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
317 // Receive back from next processor if I sent something
318 if (nSendCells[Pstream::myProcNo()] > 0)
320 IPstream fromNextProc(Pstream::blocking, Pstream::myProcNo()+1);
322 List<int> nextFinalDecomp(fromNextProc);
324 if (nextFinalDecomp.size() != nSendCells[Pstream::myProcNo()])
326 FatalErrorIn("parMetisDecomp::decompose(..)")
327 << "Expected from processor " << Pstream::myProcNo()+1
328 << " decomposition for " << nSendCells[Pstream::myProcNo()]
329 << " nCells but only received " << nextFinalDecomp.size()
330 << abort(FatalError);
333 append(nextFinalDecomp, finalDecomp);
336 // Send back to previous processor.
337 if (Pstream::myProcNo() >= 1 && nSendCells[Pstream::myProcNo()-1] > 0)
339 OPstream toPrevProc(Pstream::blocking, Pstream::myProcNo()-1);
341 int nToPrevious = nSendCells[Pstream::myProcNo()-1];
348 finalDecomp.size()-nToPrevious
351 // Remove locally what has been sent
352 finalDecomp.setSize(finalDecomp.size()-nToPrevious);
358 // Call scotch with options from dictionary.
359 Foam::label Foam::ptscotchDecomp::decompose
361 const fileName& meshPath,
362 const List<int>& adjncy,
363 const List<int>& xadj,
364 const scalarField& cWeights,
366 List<int>& finalDecomp
371 Pout<< "ptscotchDecomp : entering with xadj:" << xadj.size() << endl;
375 if (decompositionDict_.found("ptscotchCoeffs"))
377 const dictionary& scotchCoeffs =
378 decompositionDict_.subDict("ptscotchCoeffs");
380 if (scotchCoeffs.lookupOrDefault("writeGraph", false))
384 meshPath + "_" + Foam::name(Pstream::myProcNo()) + ".dgr"
387 Pout<< "Dumping Scotch graph file to " << str.name() << endl
388 << "Use this in combination with dgpart." << endl;
390 globalIndex globalCells(xadj.size()-1);
392 // Distributed graph file (.grf)
394 str << version << nl;
395 // Number of files (procglbnbr)
396 str << Pstream::nProcs();
397 // My file number (procloc)
398 str << ' ' << Pstream::myProcNo() << nl;
400 // Total number of vertices (vertglbnbr)
401 str << globalCells.size();
402 // Total number of connections (edgeglbnbr)
403 str << ' ' << returnReduce(xadj[xadj.size()-1], sumOp<label>())
405 // Local number of vertices (vertlocnbr)
406 str << xadj.size()-1;
407 // Local number of connections (edgelocnbr)
408 str << ' ' << xadj[xadj.size()-1] << nl;
409 // Numbering starts from 0
411 // 100*hasVertlabels+10*hasEdgeWeights+1*hasVertWeighs
412 str << baseval << ' ' << "000" << nl;
413 for (label cellI = 0; cellI < xadj.size()-1; cellI++)
415 label start = xadj[cellI];
416 label end = xadj[cellI+1];
419 for (label i = start; i < end; i++)
421 str << ' ' << adjncy[i];
432 SCOTCH_Strat stradat;
433 check(SCOTCH_stratInit(&stradat), "SCOTCH_stratInit");
435 if (decompositionDict_.found("scotchCoeffs"))
437 const dictionary& scotchCoeffs =
438 decompositionDict_.subDict("scotchCoeffs");
442 if (scotchCoeffs.readIfPresent("strategy", strategy))
446 Info<< "ptscotchDecomp : Using strategy " << strategy << endl;
448 SCOTCH_stratDgraphMap(&stradat, strategy.c_str());
449 //fprintf(stdout, "S\tStrat=");
450 //SCOTCH_stratSave(&stradat, stdout);
451 //fprintf(stdout, "\n");
462 // Check for externally provided cellweights and if so initialise weights
463 scalar minWeights = gMin(cWeights);
464 if (cWeights.size() > 0)
470 "ptscotchDecomp::decompose(..)"
471 ) << "Illegal minimum weight " << minWeights
475 if (cWeights.size() != xadj.size()-1)
479 "ptscotchDecomp::decompose(..)"
480 ) << "Number of cell weights " << cWeights.size()
481 << " does not equal number of cells " << xadj.size()-1
485 // Convert to integers.
486 velotab.setSize(cWeights.size());
489 velotab[i] = int(cWeights[i]/minWeights);
497 Pout<< "SCOTCH_dgraphInit" << endl;
499 SCOTCH_Dgraph grafdat;
500 check(SCOTCH_dgraphInit(&grafdat, MPI_COMM_WORLD), "SCOTCH_dgraphInit");
505 Pout<< "SCOTCH_dgraphBuild with:" << nl
506 << "xadj.size()-1 : " << xadj.size()-1 << nl
507 << "xadj : " << long(xadj.begin()) << nl
508 << "velotab : " << long(velotab.begin()) << nl
509 << "adjncy.size() : " << adjncy.size() << nl
510 << "adjncy : " << long(adjncy.begin()) << nl
519 0, // baseval, c-style numbering
520 xadj.size()-1, // vertlocnbr, nCells
521 xadj.size()-1, // vertlocmax
522 const_cast<SCOTCH_Num*>(xadj.begin()),
523 // vertloctab, start index per cell into
525 const_cast<SCOTCH_Num*>(&xadj[1]),// vendloctab, end index ,,
527 const_cast<SCOTCH_Num*>(velotab.begin()),// veloloctab, vtx weights
530 adjncy.size(), // edgelocnbr, number of arcs
531 adjncy.size(), // edgelocsiz
532 const_cast<SCOTCH_Num*>(adjncy.begin()), // edgeloctab
534 NULL // edlotab, edge weights
542 Pout<< "SCOTCH_dgraphCheck" << endl;
544 check(SCOTCH_dgraphCheck(&grafdat), "SCOTCH_dgraphCheck");
549 // (fully connected network topology since using switch)
553 Pout<< "SCOTCH_archInit" << endl;
556 check(SCOTCH_archInit(&archdat), "SCOTCH_archInit");
558 List<label> processorWeights;
559 if (decompositionDict_.found("scotchCoeffs"))
561 const dictionary& scotchCoeffs =
562 decompositionDict_.subDict("scotchCoeffs");
564 scotchCoeffs.readIfPresent("processorWeights", processorWeights);
566 if (processorWeights.size())
570 Info<< "ptscotchDecomp : Using procesor weights "
576 SCOTCH_archCmpltw(&archdat, nProcessors_, processorWeights.begin()),
584 Pout<< "SCOTCH_archCmplt" << endl;
588 SCOTCH_archCmplt(&archdat, nProcessors_),
594 //SCOTCH_Mapping mapdat;
595 //SCOTCH_dgraphMapInit(&grafdat, &mapdat, &archdat, NULL);
596 //SCOTCH_dgraphMapCompute(&grafdat, &mapdat, &stradat); /*Perform mapping*/
597 //SCOTCHdgraphMapExit(&grafdat, &mapdat);
600 // Hack:switch off fpu error trapping
602 int oldExcepts = fedisableexcept
612 Pout<< "SCOTCH_dgraphMap" << endl;
614 finalDecomp.setSize(xadj.size()-1);
622 &stradat, // const SCOTCH_Strat *
623 finalDecomp.begin() // parttab
629 feenableexcept(oldExcepts);
634 //finalDecomp.setSize(xadj.size()-1);
640 // nProcessors_, // partnbr
641 // &stradat, // const SCOTCH_Strat *
642 // finalDecomp.begin() // parttab
644 // "SCOTCH_graphPart"
649 Pout<< "SCOTCH_dgraphExit" << endl;
651 // Release storage for graph
652 SCOTCH_dgraphExit(&grafdat);
653 // Release storage for strategy
654 SCOTCH_stratExit(&stradat);
655 // Release storage for network topology
656 SCOTCH_archExit(&archdat);
662 // * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
664 Foam::ptscotchDecomp::ptscotchDecomp(const dictionary& decompositionDict)
666 decompositionMethod(decompositionDict)
670 // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
672 Foam::labelList Foam::ptscotchDecomp::decompose
674 const polyMesh& mesh,
675 const pointField& points,
676 const scalarField& pointWeights
679 if (points.size() != mesh.nCells())
683 "ptscotchDecomp::decompose(const pointField&, const scalarField&)"
685 << "Can use this decomposition method only for the whole mesh"
687 << "and supply one coordinate (cellCentre) for every cell." << endl
688 << "The number of coordinates " << points.size() << endl
689 << "The number of cells in the mesh " << mesh.nCells()
693 // // For running sequential ...
694 // if (Pstream::nProcs() <= 1)
696 // return scotchDecomp(decompositionDict_, mesh_)
697 // .decompose(points, pointWeights);
700 // Make Metis CSR (Compressed Storage Format) storage
701 // adjncy : contains neighbours (= edges in graph)
702 // xadj(celli) : start of information in adjncy for celli
705 CompactListList<label> cellCells;
706 calcCellCells(mesh, identity(mesh.nCells()), mesh.nCells(), cellCells);
708 // Decompose using default weights
709 List<int> finalDecomp;
712 mesh.time().path()/mesh.name(),
719 // Copy back to labelList
720 labelList decomp(finalDecomp.size());
723 decomp[i] = finalDecomp[i];
729 Foam::labelList Foam::ptscotchDecomp::decompose
731 const polyMesh& mesh,
732 const labelList& agglom,
733 const pointField& agglomPoints,
734 const scalarField& pointWeights
737 if (agglom.size() != mesh.nCells())
741 "ptscotchDecomp::decompose(const labelList&, const pointField&)"
742 ) << "Size of cell-to-coarse map " << agglom.size()
743 << " differs from number of cells in mesh " << mesh.nCells()
747 // // For running sequential ...
748 // if (Pstream::nProcs() <= 1)
750 // return scotchDecomp(decompositionDict_, mesh)
751 // .decompose(agglom, agglomPoints, pointWeights);
754 // Make Metis CSR (Compressed Storage Format) storage
755 // adjncy : contains neighbours (= edges in graph)
756 // xadj(celli) : start of information in adjncy for celli
757 CompactListList<label> cellCells;
758 calcCellCells(mesh, agglom, agglomPoints.size(), cellCells);
760 // Decompose using weights
761 List<int> finalDecomp;
764 mesh.time().path()/mesh.name(),
771 // Rework back into decomposition for original mesh
772 labelList fineDistribution(agglom.size());
774 forAll(fineDistribution, i)
776 fineDistribution[i] = finalDecomp[agglom[i]];
779 return fineDistribution;
783 Foam::labelList Foam::ptscotchDecomp::decompose
785 const labelListList& globalCellCells,
786 const pointField& cellCentres,
787 const scalarField& cWeights
790 if (cellCentres.size() != globalCellCells.size())
794 "ptscotchDecomp::decompose(const pointField&, const labelListList&)"
795 ) << "Inconsistent number of cells (" << globalCellCells.size()
796 << ") and number of cell centres (" << cellCentres.size()
797 << ")." << exit(FatalError);
800 // // For running sequential ...
801 // if (Pstream::nProcs() <= 1)
803 // return scotchDecomp(decompositionDict_, mesh)
804 // .decompose(globalCellCells, cellCentres, cWeights);
808 // Make Metis CSR (Compressed Storage Format) storage
809 // adjncy : contains neighbours (= edges in graph)
810 // xadj(celli) : start of information in adjncy for celli
812 CompactListList<label> cellCells(globalCellCells);
814 // Decompose using weights
815 List<int> finalDecomp;
825 // Copy back to labelList
826 labelList decomp(finalDecomp.size());
829 decomp[i] = finalDecomp[i];
835 // ************************************************************************* //