src/foam/db/IOstreams/Pstreams/Pstream.C

   1 /*---------------------------------------------------------------------------*\
   2   =========                 |
   3   \\      /  F ield         | foam-extend: Open Source CFD
   4    \\    /   O peration     | Version:     3.2
   5     \\  /    A nd           | Web:         http://www.foam-extend.org
   6      \\/     M anipulation  | For copyright notice see file Copyright
   7 -------------------------------------------------------------------------------
   8 License
   9     This file is part of foam-extend.
  10
  11     foam-extend is free software: you can redistribute it and/or modify it
  12     under the terms of the GNU General Public License as published by the
  13     Free Software Foundation, either version 3 of the License, or (at your
  14     option) any later version.
  15
  16     foam-extend is distributed in the hope that it will be useful, but
  17     WITHOUT ANY WARRANTY; without even the implied warranty of
  18     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19     General Public License for more details.
  20
  21     You should have received a copy of the GNU General Public License
  22     along with foam-extend.  If not, see <http://www.gnu.org/licenses/>.
  23
  24 \*---------------------------------------------------------------------------*/
  25
  26
  27 #include <cstring>
  28 #include <cstdlib>
  29 #include <csignal>
  30
  31 #include "mpi.h"
  32
  33 #include "Pstream.H"
  34 #include "PstreamReduceOps.H"
  35 #include "debug.H"
  36 #include "dictionary.H"
  37 #include "OSspecific.H"
  38
  39 #if defined(WM_SP)
  40 #   define MPI_SCALAR MPI_FLOAT
  41 #elif defined(WM_DP)
  42 #   define MPI_SCALAR MPI_DOUBLE
  43 #elif defined(WM_LDP)
  44 #   define MPI_SCALAR MPI_LONG_DOUBLE
  45 #endif
  46
  47
  48 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
  49
  50 defineTypeNameAndDebug(Foam::Pstream, 0);
  51
  52 template<>
  53 const char* Foam::NamedEnum<Foam::Pstream::commsTypes, 3>::names[] =
  54 {
  55     "blocking",
  56     "scheduled",
  57     "nonBlocking"
  58 };
  59
  60 const Foam::NamedEnum<Foam::Pstream::commsTypes, 3>
  61     Foam::Pstream::commsTypeNames;
  62
  63
  64 // * * * * * * * * * * * * * Private Member Functions  * * * * * * * * * * * //
  65
  66 void Foam::Pstream::setParRun()
  67 {
  68     parRun_ = true;
  69
  70     Pout.prefix() = '[' +  name(myProcNo()) + "] ";
  71     Perr.prefix() = '[' +  name(myProcNo()) + "] ";
  72 }
  73
  74
  75 void Foam::Pstream::calcLinearComm(const label nProcs)
  76 {
  77     linearCommunication_.setSize(nProcs);
  78
  79     // Master
  80     labelList belowIDs(nProcs - 1);
  81     forAll(belowIDs, i)
  82     {
  83         belowIDs[i] = i + 1;
  84     }
  85
  86     linearCommunication_[0] = commsStruct
  87     (
  88         nProcs,
  89         0,
  90         -1,
  91         belowIDs,
  92         labelList(0)
  93     );
  94
  95     // Slaves. Have no below processors, only communicate up to master
  96     for (label procID = 1; procID < nProcs; procID++)
  97     {
  98         linearCommunication_[procID] = commsStruct
  99         (
 100             nProcs,
 101             procID,
 102             0,
 103             labelList(0),
 104             labelList(0)
 105         );
 106     }
 107 }
 108
 109
 110 // Append my children (and my children children etc.) to allReceives.
 111 void Foam::Pstream::collectReceives
 112 (
 113     const label procID,
 114     const List<dynamicLabelList >& receives,
 115     dynamicLabelList& allReceives
 116 )
 117 {
 118     const dynamicLabelList& myChildren = receives[procID];
 119
 120     forAll(myChildren, childI)
 121     {
 122         allReceives.append(myChildren[childI]);
 123         collectReceives(myChildren[childI], receives, allReceives);
 124     }
 125 }
 126
 127
 128 // Tree like schedule. For 8 procs:
 129 // (level 0)
 130 //      0 receives from 1
 131 //      2 receives from 3
 132 //      4 receives from 5
 133 //      6 receives from 7
 134 // (level 1)
 135 //      0 receives from 2
 136 //      4 receives from 6
 137 // (level 2)
 138 //      0 receives from 4
 139 //
 140 // The sends/receives for all levels are collected per processor (one send per
 141 // processor; multiple receives possible) creating a table:
 142 //
 143 // So per processor:
 144 // proc     receives from   sends to
 145 // ----     -------------   --------
 146 //  0       1,2,4           -
 147 //  1       -               0
 148 //  2       3               0
 149 //  3       -               2
 150 //  4       5               0
 151 //  5       -               4
 152 //  6       7               4
 153 //  7       -               6
 154 void Foam::Pstream::calcTreeComm(label nProcs)
 155 {
 156     label nLevels = 1;
 157     while ((1 << nLevels) < nProcs)
 158     {
 159         nLevels++;
 160     }
 161
 162     List<dynamicLabelList > receives(nProcs);
 163     labelList sends(nProcs, -1);
 164
 165     // Info<< "Using " << nLevels << " communication levels" << endl;
 166
 167     label offset = 2;
 168     label childOffset = offset/2;
 169
 170     for (label level = 0; level < nLevels; level++)
 171     {
 172         label receiveID = 0;
 173         while (receiveID < nProcs)
 174         {
 175             // Determine processor that sends and we receive from
 176             label sendID = receiveID + childOffset;
 177
 178             if (sendID < nProcs)
 179             {
 180                 receives[receiveID].append(sendID);
 181                 sends[sendID] = receiveID;
 182             }
 183
 184             receiveID += offset;
 185         }
 186
 187         offset <<= 1;
 188         childOffset <<= 1;
 189     }
 190
 191     // For all processors find the processors it receives data from
 192     // (and the processors they receive data from etc.)
 193     List<dynamicLabelList > allReceives(nProcs);
 194     for (label procID = 0; procID < nProcs; procID++)
 195     {
 196         collectReceives(procID, receives, allReceives[procID]);
 197     }
 198
 199
 200     treeCommunication_.setSize(nProcs);
 201
 202     for (label procID = 0; procID < nProcs; procID++)
 203     {
 204         treeCommunication_[procID] = commsStruct
 205         (
 206             nProcs,
 207             procID,
 208             sends[procID],
 209             receives[procID].shrink(),
 210             allReceives[procID].shrink()
 211         );
 212     }
 213 }
 214
 215
 216 // Callback from Pstream::init() : initialize linear and tree communication
 217 // schedules now that nProcs is known.
 218 void Foam::Pstream::initCommunicationSchedule()
 219 {
 220     calcLinearComm(nProcs());
 221     calcTreeComm(nProcs());
 222 }
 223
 224
 225 // NOTE:
 226 // valid parallel options vary between implementations, but flag common ones.
 227 // if they are not removed by MPI_Init(), the subsequent argument processing
 228 // will notice that they are wrong
 229 void Foam::Pstream::addValidParOptions(HashTable<string>& validParOptions)
 230 {
 231     validParOptions.insert("np", "");
 232     validParOptions.insert("p4pg", "PI file");
 233     validParOptions.insert("p4wd", "directory");
 234     validParOptions.insert("p4amslave", "");
 235     validParOptions.insert("p4yourname", "hostname");
 236     validParOptions.insert("GAMMANP", "number of instances");
 237     validParOptions.insert("machinefile", "machine file");
 238 }
 239
 240
 241 bool Foam::Pstream::init(int& argc, char**& argv)
 242 {
 243     MPI_Init(&argc, &argv);
 244
 245     int numprocs;
 246     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 247     MPI_Comm_rank(MPI_COMM_WORLD, &myProcNo_);
 248
 249     if (numprocs <= 1)
 250     {
 251         FatalErrorIn("Pstream::init(int& argc, char**& argv)")
 252             << "bool Pstream::init(int& argc, char**& argv) : "
 253                "attempt to run parallel on 1 processor"
 254             << Foam::abort(FatalError);
 255     }
 256
 257     procIDs_.setSize(numprocs);
 258
 259     forAll(procIDs_, procNo)
 260     {
 261         procIDs_[procNo] = procNo;
 262     }
 263
 264     setParRun();
 265
 266 #   ifndef SGIMPI
 267     string bufferSizeName = getEnv("MPI_BUFFER_SIZE");
 268
 269     if (bufferSizeName.size())
 270     {
 271         int bufferSize = atoi(bufferSizeName.c_str());
 272
 273         if (bufferSize)
 274         {
 275             MPI_Buffer_attach(new char[bufferSize], bufferSize);
 276         }
 277     }
 278     else
 279     {
 280         FatalErrorIn("Pstream::init(int& argc, char**& argv)")
 281             << "Pstream::init(int& argc, char**& argv) : "
 282             << "environment variable MPI_BUFFER_SIZE not defined"
 283             << Foam::abort(FatalError);
 284     }
 285 #   endif
 286
 287     int processorNameLen;
 288     char processorName[MPI_MAX_PROCESSOR_NAME];
 289
 290     MPI_Get_processor_name(processorName, &processorNameLen);
 291
 292     //signal(SIGABRT, stop);
 293
 294     // Now that nprocs is known construct communication tables.
 295     initCommunicationSchedule();
 296
 297     return true;
 298 }
 299
 300
 301 void Foam::Pstream::exit(int errnum)
 302 {
 303 #   ifndef SGIMPI
 304     int size;
 305     char* buff;
 306     MPI_Buffer_detach(&buff, &size);
 307     delete[] buff;
 308 #   endif
 309
 310     if (errnum == 0)
 311     {
 312         MPI_Finalize();
 313         ::exit(errnum);
 314     }
 315     else
 316     {
 317         MPI_Abort(MPI_COMM_WORLD, errnum);
 318     }
 319 }
 320
 321
 322 void Foam::Pstream::abort()
 323 {
 324     MPI_Abort(MPI_COMM_WORLD, 1);
 325 }
 326
 327
 328 void Foam::reduce(scalar& Value, const sumOp<scalar>& bop)
 329 {
 330     if (!Pstream::parRun())
 331     {
 332         return;
 333     }
 334
 335     if (Pstream::nProcs() <= Pstream::nProcsSimpleSum())
 336     {
 337         if (Pstream::master())
 338         {
 339             for
 340             (
 341                 int slave=Pstream::firstSlave();
 342                 slave<=Pstream::lastSlave();
 343                 slave++
 344             )
 345             {
 346                 scalar value;
 347
 348                 if
 349                 (
 350                     MPI_Recv
 351                     (
 352                         &value,
 353                         1,
 354                         MPI_SCALAR,
 355                         Pstream::procID(slave),
 356                         Pstream::msgType(),
 357                         MPI_COMM_WORLD,
 358                         MPI_STATUS_IGNORE
 359                     )
 360                 )
 361                 {
 362                     FatalErrorIn
 363                     (
 364                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 365                     )   << "MPI_Recv failed"
 366                         << Foam::abort(FatalError);
 367                 }
 368
 369                 Value = bop(Value, value);
 370             }
 371         }
 372         else
 373         {
 374             if
 375             (
 376                 MPI_Send
 377                 (
 378                     &Value,
 379                     1,
 380                     MPI_SCALAR,
 381                     Pstream::procID(Pstream::masterNo()),
 382                     Pstream::msgType(),
 383                     MPI_COMM_WORLD
 384                 )
 385             )
 386             {
 387                 FatalErrorIn
 388                 (
 389                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 390                 )   << "MPI_Send failed"
 391                     << Foam::abort(FatalError);
 392             }
 393         }
 394
 395
 396         if (Pstream::master())
 397         {
 398             for
 399             (
 400                 int slave=Pstream::firstSlave();
 401                 slave<=Pstream::lastSlave();
 402                 slave++
 403             )
 404             {
 405                 if
 406                 (
 407                     MPI_Send
 408                     (
 409                         &Value,
 410                         1,
 411                         MPI_SCALAR,
 412                         Pstream::procID(slave),
 413                         Pstream::msgType(),
 414                         MPI_COMM_WORLD
 415                     )
 416                 )
 417                 {
 418                     FatalErrorIn
 419                     (
 420                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 421                     )   << "MPI_Send failed"
 422                         << Foam::abort(FatalError);
 423                 }
 424             }
 425         }
 426         else
 427         {
 428             if
 429             (
 430                 MPI_Recv
 431                 (
 432                     &Value,
 433                     1,
 434                     MPI_SCALAR,
 435                     Pstream::procID(Pstream::masterNo()),
 436                     Pstream::msgType(),
 437                     MPI_COMM_WORLD,
 438                     MPI_STATUS_IGNORE
 439                 )
 440             )
 441             {
 442                 FatalErrorIn
 443                 (
 444                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 445                 )   << "MPI_Recv failed"
 446                     << Foam::abort(FatalError);
 447             }
 448         }
 449     }
 450     else
 451     {
 452         scalar sum;
 453         MPI_Allreduce(&Value, &sum, 1, MPI_SCALAR, MPI_SUM, MPI_COMM_WORLD);
 454         Value = sum;
 455
 456         /*
 457         int myProcNo = Pstream::myProcNo();
 458         int nProcs = Pstream::nProcs();
 459
 460         //
 461         // receive from children
 462         //
 463         int level = 1;
 464         int thisLevelOffset = 2;
 465         int childLevelOffset = thisLevelOffset/2;
 466         int childProcId = 0;
 467
 468         while
 469         (
 470             (childLevelOffset < nProcs)
 471          && (myProcNo % thisLevelOffset) == 0
 472         )
 473         {
 474             childProcId = myProcNo + childLevelOffset;
 475
 476             scalar value;
 477
 478             if (childProcId < nProcs)
 479             {
 480                 if
 481                 (
 482                     MPI_Recv
 483                     (
 484                         &value,
 485                         1,
 486                         MPI_SCALAR,
 487                         Pstream::procID(childProcId),
 488                         Pstream::msgType(),
 489                         MPI_COMM_WORLD,
 490                         MPI_STATUS_IGNORE
 491                     )
 492                 )
 493                 {
 494                     FatalErrorIn
 495                     (
 496                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 497                     )   << "MPI_Recv failed"
 498                         << Foam::abort(FatalError);
 499                 }
 500
 501                 Value = bop(Value, value);
 502             }
 503
 504             level++;
 505             thisLevelOffset <<= 1;
 506             childLevelOffset = thisLevelOffset/2;
 507         }
 508
 509         //
 510         // send and receive from parent
 511         //
 512         if (!Pstream::master())
 513         {
 514             int parentId = myProcNo - (myProcNo % thisLevelOffset);
 515
 516             if
 517             (
 518                 MPI_Send
 519                 (
 520                     &Value,
 521                     1,
 522                     MPI_SCALAR,
 523                     Pstream::procID(parentId),
 524                     Pstream::msgType(),
 525                     MPI_COMM_WORLD
 526                 )
 527             )
 528             {
 529                 FatalErrorIn
 530                 (
 531                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 532                 )   << "MPI_Send failed"
 533                     << Foam::abort(FatalError);
 534             }
 535
 536             if
 537             (
 538                 MPI_Recv
 539                 (
 540                     &Value,
 541                     1,
 542                     MPI_SCALAR,
 543                     Pstream::procID(parentId),
 544                     Pstream::msgType(),
 545                     MPI_COMM_WORLD,
 546                     MPI_STATUS_IGNORE
 547                 )
 548             )
 549             {
 550                 FatalErrorIn
 551                 (
 552                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 553                 )   << "MPI_Recv failed"
 554                     << Foam::abort(FatalError);
 555             }
 556         }
 557
 558
 559         //
 560         // distribute to my children
 561         //
 562         level--;
 563         thisLevelOffset >>= 1;
 564         childLevelOffset = thisLevelOffset/2;
 565
 566         while (level > 0)
 567         {
 568             childProcId = myProcNo + childLevelOffset;
 569
 570             if (childProcId < nProcs)
 571             {
 572                 if
 573                 (
 574                     MPI_Send
 575                     (
 576                         &Value,
 577                         1,
 578                         MPI_SCALAR,
 579                         Pstream::procID(childProcId),
 580                         Pstream::msgType(),
 581                         MPI_COMM_WORLD
 582                     )
 583                 )
 584                 {
 585                     FatalErrorIn
 586                     (
 587                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
 588                     )   << "MPI_Send failed"
 589                         << Foam::abort(FatalError);
 590                 }
 591             }
 592
 593             level--;
 594             thisLevelOffset >>= 1;
 595             childLevelOffset = thisLevelOffset/2;
 596         }
 597         */
 598     }
 599 }
 600
 601
 602 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
 603
 604 // Initialise my process number to 0 (the master)
 605 int Foam::Pstream::myProcNo_(0);
 606
 607
 608 // By default this is not a parallel run
 609 bool Foam::Pstream::parRun_(false);
 610
 611
 612 // List of process IDs
 613 Foam::List<int> Foam::Pstream::procIDs_(1, 0);
 614
 615
 616 // Standard transfer message type
 617 const int Foam::Pstream::msgType_(1);
 618
 619
 620 // Linear communication schedule
 621 Foam::List<Foam::Pstream::commsStruct> Foam::Pstream::linearCommunication_(0);
 622
 623
 624 // Multi level communication schedule
 625 Foam::List<Foam::Pstream::commsStruct> Foam::Pstream::treeCommunication_(0);
 626
 627
 628 // Should compact transfer be used in which floats replace doubles
 629 // reducing the bandwidth requirement at the expense of some loss
 630 // in accuracy
 631 const Foam::debug::optimisationSwitch
 632 Foam::Pstream::floatTransfer
 633 (
 634     "floatTransfer",
 635     0
 636 );
 637
 638
 639 // Number of processors at which the reduce algorithm changes from linear to
 640 // tree
 641 const Foam::debug::optimisationSwitch
 642 Foam::Pstream::nProcsSimpleSum
 643 (
 644     "nProcsSimpleSum",
 645     16
 646 );
 647
 648
 649 const Foam::debug::optimisationSwitch
 650 Foam::Pstream::defaultCommsType
 651 (
 652     "commsType",
 653 //     "nonBlocking",
 654 //     "scheduled",
 655     "blocking",
 656     "blocking, nonBlocking, scheduled"
 657 );
 658
 659
 660 // ************************************************************************* //