src/mesh/cfMesh/utilities/containers/VRWGraph/VRWGraphSMPModifier.C

   1 /*---------------------------------------------------------------------------*\
   2   =========                 |
   3   \\      /  F ield         | cfMesh: A library for mesh generation
   4    \\    /   O peration     |
   5     \\  /    A nd           | Author: Franjo Juretic (franjo.juretic@c-fields.com)
   6      \\/     M anipulation  | Copyright (C) Creative Fields, Ltd.
   7 -------------------------------------------------------------------------------
   8 License
   9     This file is part of cfMesh.
  10
  11     cfMesh is free software; you can redistribute it and/or modify it
  12     under the terms of the GNU General Public License as published by the
  13     Free Software Foundation; either version 3 of the License, or (at your
  14     option) any later version.
  15
  16     cfMesh is distributed in the hope that it will be useful, but WITHOUT
  17     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  18     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  19     for more details.
  20
  21     You should have received a copy of the GNU General Public License
  22     along with cfMesh.  If not, see <http://www.gnu.org/licenses/>.
  23
  24 \*---------------------------------------------------------------------------*/
  25
  26 #include "VRWGraphSMPModifier.H"
  27 #include "labelPair.H"
  28
  29 # ifdef USE_OMP
  30 #include <omp.h>
  31 # endif
  32
  33 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
  34
  35 namespace Foam
  36 {
  37
  38 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
  39
  40 VRWGraphSMPModifier::VRWGraphSMPModifier(VRWGraph& graph)
  41 :
  42     graph_(graph)
  43 {}
  44
  45 VRWGraphSMPModifier::~VRWGraphSMPModifier()
  46 {}
  47
  48 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
  49
  50 void VRWGraphSMPModifier::mergeGraphs(const List<VRWGraph>& graphParts)
  51 {
  52     const label nGraphs = graphParts.size();
  53     const label nRows = graphParts[0].size();
  54     forAll(graphParts, i)
  55     {
  56         if( nRows != graphParts[i].size() )
  57             FatalErrorIn
  58             (
  59                 "inline void Foam::VRWGraph::mergeGraphs(const List<VRWGraph>&)"
  60             ) << "Cannot merge graphs" << abort(FatalError);
  61     }
  62
  63     //- find the number of elements in each row
  64     labelLongList nElmtsInRow(nRows);
  65
  66     # ifdef USE_OMP
  67     # pragma omp parallel for schedule(static, 1)
  68     # endif
  69     for(label rowI=0;rowI<nRows;++rowI)
  70     {
  71         label sum(0);
  72         for(label i=0;i<nGraphs;++i)
  73             sum += graphParts[i].sizeOfRow(rowI);
  74
  75         nElmtsInRow[rowI] = sum;
  76     }
  77
  78     //- set the size of graph
  79     setSizeAndRowSize(nElmtsInRow);
  80
  81     //- Finally, assemble the merged graph
  82     # ifdef USE_OMP
  83     # pragma omp parallel for schedule(static, 1)
  84     # endif
  85     for(label rowI=0;rowI<nRows;++rowI)
  86     {
  87         forAll(graphParts, i)
  88         {
  89             const VRWGraph& gp = graphParts[i];
  90             for(label j=0;j<gp.sizeOfRow(rowI);++j)
  91                 graph_(rowI, --nElmtsInRow[rowI]) = gp(rowI, j);
  92         }
  93     }
  94 }
  95
  96 void VRWGraphSMPModifier::reverseAddressing(const VRWGraph& origGraph)
  97 {
  98     graph_.setSize(0);
  99     labelLongList nAppearances;
 100
 101     # ifdef USE_OMP
 102     label nThreads = 3 * omp_get_num_procs();
 103     if( origGraph.size() < 1000 )
 104         nThreads = 1;
 105     # else
 106     const label nThreads(1);
 107     # endif
 108
 109     label minRow(INT_MAX), maxRow(-1);
 110     List<List<LongList<labelPair> > > dataForOtherThreads(nThreads);
 111
 112     # ifdef USE_OMP
 113     # pragma omp parallel num_threads(nThreads)
 114     # endif
 115     {
 116         # ifdef USE_OMP
 117         const label threadI = omp_get_thread_num();
 118         # else
 119         const label threadI(0);
 120         # endif
 121
 122         List<LongList<labelPair> >& dot = dataForOtherThreads[threadI];
 123         dot.setSize(nThreads);
 124
 125         //- find min and max entry in the graph
 126         //- they are used for assigning ranges of values local for each process
 127         label localMinRow(INT_MAX), localMaxRow(-1);
 128         # ifdef USE_OMP
 129         # pragma omp for schedule(static)
 130         # endif
 131         forAll(origGraph, rowI)
 132         {
 133             forAllRow(origGraph, rowI, i)
 134             {
 135                 const label entryI = origGraph(rowI, i);
 136                 localMaxRow = Foam::max(localMaxRow, entryI);
 137                 localMinRow = Foam::min(localMinRow, entryI);
 138             }
 139         }
 140
 141         ++localMaxRow;
 142
 143         # ifdef USE_OMP
 144         # pragma omp critical
 145         # endif
 146         {
 147             minRow = Foam::min(minRow, localMinRow);
 148             maxRow = Foam::max(maxRow, localMaxRow);
 149
 150             nAppearances.setSize(maxRow);
 151         }
 152
 153         # ifdef USE_OMP
 154         # pragma omp barrier
 155         # endif
 156
 157         //- initialise appearances
 158         # ifdef USE_OMP
 159         # pragma omp for schedule(static)
 160         # endif
 161         for(label i=0;i<maxRow;++i)
 162             nAppearances[i] = 0;
 163
 164         # ifdef USE_OMP
 165         # pragma omp barrier
 166         # endif
 167
 168         const label range = (maxRow - minRow) / nThreads + 1;
 169         const label localMin = minRow + threadI * range;
 170         const label localMax = Foam::min(localMin + range, maxRow);
 171
 172         //- find the number of appearances of each element in the original graph
 173         # ifdef USE_OMP
 174         # pragma omp for schedule(static)
 175         # endif
 176         forAll(origGraph, rowI)
 177         {
 178             forAllRow(origGraph, rowI, j)
 179             {
 180                 const label entryI = origGraph(rowI, j);
 181
 182                 const label threadNo = (entryI - minRow) / range;
 183
 184                 if( threadNo == threadI )
 185                 {
 186                     ++nAppearances[entryI];
 187                 }
 188                 else
 189                 {
 190                     dot[threadNo].append(labelPair(entryI, rowI));
 191                 }
 192             }
 193         }
 194
 195         # ifdef USE_OMP
 196         # pragma omp barrier
 197         # endif
 198
 199         //- count the appearances which are not local to the processor
 200         for(label i=0;i<nThreads;++i)
 201         {
 202             const LongList<labelPair>& data =
 203                 dataForOtherThreads[i][threadI];
 204
 205             forAll(data, j)
 206                 ++nAppearances[data[j].first()];
 207         }
 208
 209         # ifdef USE_OMP
 210         # pragma omp barrier
 211         # endif
 212
 213         //- allocate graph
 214         # ifdef USE_OMP
 215         # pragma omp master
 216         # endif
 217         setSizeAndRowSize(nAppearances);
 218
 219         # ifdef USE_OMP
 220         # pragma omp barrier
 221         # endif
 222
 223         for(label i=localMin;i<localMax;++i)
 224         {
 225             nAppearances[i] = 0;
 226         }
 227
 228         //- start filling reverse addressing graph
 229         //- update data from processors with smaller labels
 230         for(label i=0;i<threadI;++i)
 231         {
 232             const LongList<labelPair>& data =
 233                 dataForOtherThreads[i][threadI];
 234
 235             forAll(data, j)
 236             {
 237                 const label entryI = data[j].first();
 238                 graph_(entryI, nAppearances[entryI]++) = data[j].second();
 239             }
 240         }
 241
 242         //- update data local to the processor
 243         # ifdef USE_OMP
 244         # pragma omp for schedule(static)
 245         # endif
 246         forAll(origGraph, rowI)
 247         {
 248             forAllRow(origGraph, rowI, j)
 249             {
 250                 const label entryI = origGraph(rowI, j);
 251
 252                 if( (entryI >= localMin) && (entryI < localMax) )
 253                     graph_(entryI, nAppearances[entryI]++) = rowI;
 254             }
 255         }
 256
 257         //- update data from the processors with higher labels
 258         for(label i=threadI+1;i<nThreads;++i)
 259         {
 260             const LongList<labelPair>& data =
 261                 dataForOtherThreads[i][threadI];
 262
 263             forAll(data, j)
 264             {
 265                 const label entryI = data[j].first();
 266                 graph_(entryI, nAppearances[entryI]++) = data[j].second();
 267             }
 268         }
 269     }
 270 }
 271
 272 void VRWGraphSMPModifier::optimizeMemoryUsage()
 273 {
 274     # ifdef USE_OMP
 275     label nThreads = 3 * omp_get_num_procs();
 276     if( graph_.size() < 1000 )
 277         nThreads = 1;
 278     # else
 279     const label nThreads(1);
 280     # endif
 281
 282     DynList<label> nRows, nEntries;
 283     nRows.setSize(nThreads);
 284     nEntries.setSize(nThreads);
 285
 286     LongList<rowElement> newRows;
 287     labelLongList newData;
 288
 289     # ifdef USE_OMP
 290     # pragma omp parallel num_threads(nThreads)
 291     # endif
 292     {
 293         # ifdef USE_OMP
 294         const label threadI = omp_get_thread_num();
 295         # else
 296         const label threadI(0);
 297         # endif
 298         nRows[threadI] = 0;
 299         nEntries[threadI] = 0;
 300
 301         # ifdef USE_OMP
 302         # pragma omp for schedule(static)
 303         # endif
 304         forAll(graph_.rows_, rowI)
 305         {
 306             if( graph_.rows_[rowI].start() == VRWGraph::INVALIDROW )
 307                 continue;
 308
 309             ++nRows[threadI];
 310             nEntries[threadI] += graph_.rows_[rowI].size();
 311         }
 312
 313         # ifdef USE_OMP
 314         # pragma omp barrier
 315
 316         # pragma omp master
 317         # endif
 318         {
 319             //- find the number of rows
 320             label counter(0);
 321             forAll(nRows, i)
 322                 counter += nRows[i];
 323
 324             newRows.setSize(counter);
 325
 326             //- find the number of data entries
 327             counter = 0;
 328             forAll(nEntries, i)
 329                 counter += nEntries[i];
 330
 331             newData.setSize(counter);
 332         }
 333
 334         # ifdef USE_OMP
 335         # pragma omp barrier
 336         # endif
 337
 338         //- find the starting position for each thread
 339         label rowStart(0), entryStart(0);
 340         for(label i=0;i<threadI;++i)
 341         {
 342             rowStart += nRows[i];
 343             entryStart += nEntries[i];
 344         }
 345
 346         //- copy the data into the location
 347         # ifdef USE_OMP
 348         # pragma omp for schedule(static)
 349         # endif
 350         forAll(graph_, rowI)
 351         {
 352             rowElement& el = newRows[rowStart];
 353             el.start() += entryStart;
 354             ++rowStart;
 355
 356             el.size() = graph_.sizeOfRow(rowI);
 357             forAllRow(graph_, rowI, i)
 358             {
 359                 newData[entryStart] = graph_(rowI, i);
 360                 ++entryStart;
 361             }
 362         }
 363     }
 364
 365     //- replace the original data with the compressed data
 366     graph_.rows_.transfer(newRows);
 367     graph_.data_.transfer(newData);
 368 }
 369
 370 void VRWGraphSMPModifier::operator=(const VRWGraph& og)
 371 {
 372     graph_.data_.setSize(og.data_.size());
 373     graph_.rows_.setSize(og.rows_.size());
 374
 375     # ifdef USE_OMP
 376     # pragma omp parallel
 377     # endif
 378     {
 379         # ifdef USE_OMP
 380         # pragma omp for schedule(static, 1)
 381         # endif
 382         forAll(graph_.data_, i)
 383             graph_.data_[i] = og.data_[i];
 384
 385         # ifdef USE_OMP
 386         # pragma omp for schedule(static, 1)
 387         # endif
 388         forAll(graph_.rows_, rowI)
 389             graph_.rows_[rowI] = og.rows_[rowI];
 390     }
 391 }
 392
 393 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 394
 395 } // End namespace Foam
 396
 397 // ************************************************************************* //