1 /*---------------------------------------------------------------------------*\
3 \\ / F ield | cfMesh: A library for mesh generation
5 \\ / A nd | Author: Franjo Juretic (franjo.juretic@c-fields.com)
6 \\/ M anipulation | Copyright (C) Creative Fields, Ltd.
7 -------------------------------------------------------------------------------
9 This file is part of cfMesh.
11 cfMesh is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by the
13 Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 cfMesh is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 You should have received a copy of the GNU General Public License
22 along with cfMesh. If not, see <http://www.gnu.org/licenses/>.
24 \*---------------------------------------------------------------------------*/
26 #include "VRWGraphSMPModifier.H"
27 #include "labelPair.H"
33 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
38 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
40 VRWGraphSMPModifier::VRWGraphSMPModifier(VRWGraph& graph)
45 VRWGraphSMPModifier::~VRWGraphSMPModifier()
48 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
50 void VRWGraphSMPModifier::mergeGraphs(const List<VRWGraph>& graphParts)
52 const label nGraphs = graphParts.size();
53 const label nRows = graphParts[0].size();
56 if( nRows != graphParts[i].size() )
59 "inline void Foam::VRWGraph::mergeGraphs(const List<VRWGraph>&)"
60 ) << "Cannot merge graphs" << abort(FatalError);
63 //- find the number of elements in each row
64 labelLongList nElmtsInRow(nRows);
67 # pragma omp parallel for schedule(static, 1)
69 for(label rowI=0;rowI<nRows;++rowI)
72 for(label i=0;i<nGraphs;++i)
73 sum += graphParts[i].sizeOfRow(rowI);
75 nElmtsInRow[rowI] = sum;
78 //- set the size of graph
79 setSizeAndRowSize(nElmtsInRow);
81 //- Finally, assemble the merged graph
83 # pragma omp parallel for schedule(static, 1)
85 for(label rowI=0;rowI<nRows;++rowI)
89 const VRWGraph& gp = graphParts[i];
90 for(label j=0;j<gp.sizeOfRow(rowI);++j)
91 graph_(rowI, --nElmtsInRow[rowI]) = gp(rowI, j);
96 void VRWGraphSMPModifier::reverseAddressing(const VRWGraph& origGraph)
99 labelLongList nAppearances;
102 label nThreads = 3 * omp_get_num_procs();
103 if( origGraph.size() < 1000 )
106 const label nThreads(1);
109 label minRow(INT_MAX), maxRow(-1);
110 List<List<LongList<labelPair> > > dataForOtherThreads(nThreads);
113 # pragma omp parallel num_threads(nThreads)
117 const label threadI = omp_get_thread_num();
119 const label threadI(0);
122 List<LongList<labelPair> >& dot = dataForOtherThreads[threadI];
123 dot.setSize(nThreads);
125 //- find min and max entry in the graph
126 //- they are used for assigning ranges of values local for each process
127 label localMinRow(INT_MAX), localMaxRow(-1);
129 # pragma omp for schedule(static)
131 forAll(origGraph, rowI)
133 forAllRow(origGraph, rowI, i)
135 const label entryI = origGraph(rowI, i);
136 localMaxRow = Foam::max(localMaxRow, entryI);
137 localMinRow = Foam::min(localMinRow, entryI);
144 # pragma omp critical
147 minRow = Foam::min(minRow, localMinRow);
148 maxRow = Foam::max(maxRow, localMaxRow);
150 nAppearances.setSize(maxRow);
157 //- initialise appearances
159 # pragma omp for schedule(static)
161 for(label i=0;i<maxRow;++i)
168 const label range = (maxRow - minRow) / nThreads + 1;
169 const label localMin = minRow + threadI * range;
170 const label localMax = Foam::min(localMin + range, maxRow);
172 //- find the number of appearances of each element in the original graph
174 # pragma omp for schedule(static)
176 forAll(origGraph, rowI)
178 forAllRow(origGraph, rowI, j)
180 const label entryI = origGraph(rowI, j);
182 const label threadNo = (entryI - minRow) / range;
184 if( threadNo == threadI )
186 ++nAppearances[entryI];
190 dot[threadNo].append(labelPair(entryI, rowI));
199 //- count the appearances which are not local to the processor
200 for(label i=0;i<nThreads;++i)
202 const LongList<labelPair>& data =
203 dataForOtherThreads[i][threadI];
206 ++nAppearances[data[j].first()];
217 setSizeAndRowSize(nAppearances);
223 for(label i=localMin;i<localMax;++i)
228 //- start filling reverse addressing graph
229 //- update data from processors with smaller labels
230 for(label i=0;i<threadI;++i)
232 const LongList<labelPair>& data =
233 dataForOtherThreads[i][threadI];
237 const label entryI = data[j].first();
238 graph_(entryI, nAppearances[entryI]++) = data[j].second();
242 //- update data local to the processor
244 # pragma omp for schedule(static)
246 forAll(origGraph, rowI)
248 forAllRow(origGraph, rowI, j)
250 const label entryI = origGraph(rowI, j);
252 if( (entryI >= localMin) && (entryI < localMax) )
253 graph_(entryI, nAppearances[entryI]++) = rowI;
257 //- update data from the processors with higher labels
258 for(label i=threadI+1;i<nThreads;++i)
260 const LongList<labelPair>& data =
261 dataForOtherThreads[i][threadI];
265 const label entryI = data[j].first();
266 graph_(entryI, nAppearances[entryI]++) = data[j].second();
272 void VRWGraphSMPModifier::optimizeMemoryUsage()
275 label nThreads = 3 * omp_get_num_procs();
276 if( graph_.size() < 1000 )
279 const label nThreads(1);
282 DynList<label> nRows, nEntries;
283 nRows.setSize(nThreads);
284 nEntries.setSize(nThreads);
286 LongList<rowElement> newRows;
287 labelLongList newData;
290 # pragma omp parallel num_threads(nThreads)
294 const label threadI = omp_get_thread_num();
296 const label threadI(0);
299 nEntries[threadI] = 0;
302 # pragma omp for schedule(static)
304 forAll(graph_.rows_, rowI)
306 if( graph_.rows_[rowI].start() == VRWGraph::INVALIDROW )
310 nEntries[threadI] += graph_.rows_[rowI].size();
319 //- find the number of rows
324 newRows.setSize(counter);
326 //- find the number of data entries
329 counter += nEntries[i];
331 newData.setSize(counter);
338 //- find the starting position for each thread
339 label rowStart(0), entryStart(0);
340 for(label i=0;i<threadI;++i)
342 rowStart += nRows[i];
343 entryStart += nEntries[i];
346 //- copy the data into the location
348 # pragma omp for schedule(static)
352 rowElement& el = newRows[rowStart];
353 el.start() += entryStart;
356 el.size() = graph_.sizeOfRow(rowI);
357 forAllRow(graph_, rowI, i)
359 newData[entryStart] = graph_(rowI, i);
365 //- replace the original data with the compressed data
366 graph_.rows_.transfer(newRows);
367 graph_.data_.transfer(newData);
370 void VRWGraphSMPModifier::operator=(const VRWGraph& og)
372 graph_.data_.setSize(og.data_.size());
373 graph_.rows_.setSize(og.rows_.size());
376 # pragma omp parallel
380 # pragma omp for schedule(static, 1)
382 forAll(graph_.data_, i)
383 graph_.data_[i] = og.data_[i];
386 # pragma omp for schedule(static, 1)
388 forAll(graph_.rows_, rowI)
389 graph_.rows_[rowI] = og.rows_[rowI];
393 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
395 } // End namespace Foam
397 // ************************************************************************* //