diff --git a/applications/test/parallel-comm2/Make/files b/applications/test/parallel-comm2/Make/files index a6841ec2a15d17c428cb46749fdbd5ec9f93ab7c..10cc0e281090a045ec14d23c4a993439b262d2fc 100644 --- a/applications/test/parallel-comm2/Make/files +++ b/applications/test/parallel-comm2/Make/files @@ -1,3 +1,3 @@ -Test-parallel-comm2.C +Test-parallel-comm2.cxx EXE = $(FOAM_USER_APPBIN)/Test-parallel-comm2 diff --git a/applications/test/parallel-comm2/Test-parallel-comm2.C b/applications/test/parallel-comm2/Test-parallel-comm2.cxx similarity index 88% rename from applications/test/parallel-comm2/Test-parallel-comm2.C rename to applications/test/parallel-comm2/Test-parallel-comm2.cxx index 8ed7684fc2d8e5c090fe228468c66282560d717f..9f9fe379b594fb6a5261504fdc2aa45cf2df16a0 100644 --- a/applications/test/parallel-comm2/Test-parallel-comm2.C +++ b/applications/test/parallel-comm2/Test-parallel-comm2.cxx @@ -69,6 +69,7 @@ int main(int argc, char *argv[]) argList::addBoolOption("info", "information"); argList::addBoolOption("print-tree", "Report tree(s) as graph"); argList::addBoolOption("no-test", "Disable general tests"); + argList::addBoolOption("split", "Test Pstream split-comm"); argList::addBoolOption("host-comm", "Test Pstream host-comm"); argList::addBoolOption("host-broadcast", "Test host-base broadcasts"); @@ -85,8 +86,8 @@ int main(int argc, char *argv[]) if (UPstream::parRun() && optPrintTree) { - Info<< "comms: " - << UPstream::whichCommunication(UPstream::worldComm) << nl; + // Info<< "comms: " + // << UPstream::whichCommunication(UPstream::worldComm) << nl; UPstream::printCommTree(UPstream::commWorld()); } @@ -102,6 +103,34 @@ int main(int argc, char *argv[]) << flatOutput(UPstream::procID(UPstream::commLocalNode())) << nl; } + if (UPstream::parRun() && args.found("split")) + { + Info<< "split: alternative ranks" << nl; + + const auto myRank = UPstream::myProcNo(); + + int colour = + ( + (myRank == 5 || myRank == 6) // Exclude these ones + ? -1 + : (myRank % 2) + ); + + UPstream::communicator comm = + UPstream::communicator::split(UPstream::commWorld(), colour, true); + + Pout<< "split ranks (colour=" << colour << ") " + << flatOutput(UPstream::procID(comm.comm())) << nl; + + comm.reset(); + comm = + UPstream::communicator::split(UPstream::commWorld(), colour, false); + + Pout<< "Split ranks (colour=" << colour << ") " + << flatOutput(UPstream::procID(comm.comm())) << nl; + } + + if (args.found("info")) { Info<< nl; @@ -135,8 +164,8 @@ int main(int argc, char *argv[]) << endl; { - Info<< "host-master: " - << UPstream::whichCommunication(commInterNode) << endl; + // Info<< "host-master: " + // << UPstream::whichCommunication(commInterNode) << endl; UPstream::printCommTree(commInterNode); UPstream::printCommTree(commLocalNode); diff --git a/bin/tools/foamCreateCompletionCache b/bin/tools/foamCreateCompletionCache index 7cc535afd9af04efe3dadd4674f171f376616649..51d67d159fd71f5c383b0716f34744c94662f25f 100755 --- a/bin/tools/foamCreateCompletionCache +++ b/bin/tools/foamCreateCompletionCache @@ -6,7 +6,7 @@ # \\ / A nd | www.openfoam.com # \\/ M anipulation | #------------------------------------------------------------------------------ -# Copyright (C) 2017-2023 OpenCFD Ltd. +# Copyright (C) 2017-2025 OpenCFD Ltd. #------------------------------------------------------------------------------ # License # This file is part of OpenFOAM, distributed under GPL-3.0-or-later. @@ -168,6 +168,7 @@ extractOptions() -e '/^-doc-source/d; /^-help-man/d;' \ -e '/^-hostRoots /d; /^-roots /d;' \ -e '/^-lib /d; /^-no-libs /d;' \ + -e '/^-mpi-.*/d;' \ -e '/^-[a-z]*-switch /d;' \ -e 'y/,/ /; s/=.*$/=/;' \ -e '/^-[^ ]* </{ s/^\(-[^ ]* <\).*$/\1/; p; d }' \ diff --git a/bin/tools/help-filter b/bin/tools/help-filter index df91b04d15de76c32342a215df4430efcef90c3b..fb4c492a20aabe0cb2d043f43bb323e23a2165ce 100755 --- a/bin/tools/help-filter +++ b/bin/tools/help-filter @@ -6,7 +6,7 @@ # \\ / A nd | www.openfoam.com # \\/ M anipulation | #------------------------------------------------------------------------------- -# Copyright (C) 2020-2023 OpenCFD Ltd. +# Copyright (C) 2020-2025 OpenCFD Ltd. #------------------------------------------------------------------------------ # License # This file is part of OpenFOAM, distributed under GPL-3.0-or-later. @@ -28,6 +28,7 @@ sed -ne '1,/^[Oo]ptions:/d' \ -e '/^-doc-source/d; /^-help-man/d;' \ -e '/^-hostRoots /d; /^-roots /d;' \ -e '/^-lib /d; /^-no-libs /d;' \ + -e '/^-mpi-.*/d;' \ -e '/^-[a-z]*-switch /d;' \ -e 'y/,/ /; s/=.*$/=/;' \ -e '/^-[^ ]* </{ s/^\(-[^ ]* <\).*$/\1/; p; d }' \ diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C index 8bd928338291ab8fe55c690b015b1b07f2185d65..99d147720c88bedb6371c37140d42204583aa239 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C @@ -445,7 +445,8 @@ Foam::label Foam::UPstream::dupCommunicator Foam::label Foam::UPstream::splitCommunicator ( const label parentIndex, - const int colour + const int colour, + const bool two_step ) { #ifdef FULLDEBUG @@ -465,7 +466,8 @@ Foam::label Foam::UPstream::splitCommunicator { Perr<< "Split communicator [" << index << "] from [" << parentIndex - << "] using colour=" << colour << endl; + << "] using colour=" << colour + << " (two_step=" << two_step << ")" << endl; } // Initially treat as unknown, @@ -475,7 +477,7 @@ Foam::label Foam::UPstream::splitCommunicator if (UPstream::parRun()) { - splitCommunicatorComponents(parentIndex, index, colour); + splitCommunicatorComponents(parentIndex, index, colour, two_step); } return index; @@ -857,6 +859,8 @@ bool Foam::UPstream::parRun_(false); bool Foam::UPstream::haveThreads_(false); +bool Foam::UPstream::noInitialCommDup_(false); + int Foam::UPstream::msgType_(1); diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H index 76ff4f975e3a106002228d069939ca83f80ec28c..3f7905e3fc4825edb16ba7410d9188f3d306a3ce 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H @@ -397,6 +397,9 @@ private: //- Have support for threads? static bool haveThreads_; + //- Initial MPI_Comm_dup(MPI_COMM_WORLD,...) disabled? (default: false) + static bool noInitialCommDup_; + //- Standard transfer message type static int msgType_; @@ -495,7 +498,9 @@ private: ( const label parentIndex, const label index, - const int colour + const int colour, + //! Use MPI_Allgather+MPI_Comm_create_group vs MPI_Comm_split + const bool two_step = true ); //- Free MPI components of communicator. @@ -877,7 +882,10 @@ public: //! The colouring to select which ranks to include. //! Negative values correspond to 'ignore' - const int colour + const int colour, + + //! Use MPI_Allgather+MPI_Comm_create_group vs MPI_Comm_split + const bool two_step = true ); //- Free a previously allocated communicator. @@ -949,11 +957,14 @@ public: const label parentComm, //! The colouring to select which ranks to include. //! Negative values correspond to 'ignore' - const int colour + const int colour, + //! Use MPI_Allgather+MPI_Comm_create_group vs MPI_Comm_split + const bool two_step = true ) { communicator c; - c.comm_ = UPstream::splitCommunicator(parentComm, colour); + c.comm_ = + UPstream::splitCommunicator(parentComm, colour, two_step); return c; } diff --git a/src/OpenFOAM/global/argList/argList.C b/src/OpenFOAM/global/argList/argList.C index d04c26860aa200608448048a8fb655c5abac7266..02223f1bd79ca3136a8b77bad2e5a21105415c2a 100644 --- a/src/OpenFOAM/global/argList/argList.C +++ b/src/OpenFOAM/global/argList/argList.C @@ -128,7 +128,14 @@ Foam::argList::initValidTables::initValidTables() ( "mpi-threads", "Request use of MPI threads", - true // advanced option + true // advanced option + ); + + argList::addBoolOption + ( + "mpi-no-comm-dup", + "Disable initial MPI_Comm_dup()", + true // advanced option ); argList::addOption @@ -192,6 +199,12 @@ Foam::argList::initValidTables::initValidTables() "name" ); + argList::addBoolOption + ( + "mpi-split-by-appnum", + "Split world communicator based on the APPNUM", + true // advanced option + ); // Some standard option aliases (with or without version warnings) // argList::addOptionCompat @@ -596,6 +609,8 @@ void Foam::argList::noParallel() removeOption("hostRoots"); removeOption("world"); removeOption("mpi-threads"); + removeOption("mpi-no-comm-dup"); + removeOption("mpi-split-by-appnum"); validParOptions.clear(); } diff --git a/src/Pstream/dummy/UPstream.C b/src/Pstream/dummy/UPstream.C index 4f70336800be6bb6fb4291aaf9792ee1cea6ec2d..fa8261e1c5fdd6d7b20e191862ad54799988c7a9 100644 --- a/src/Pstream/dummy/UPstream.C +++ b/src/Pstream/dummy/UPstream.C @@ -100,7 +100,8 @@ void Foam::UPstream::splitCommunicatorComponents ( const label parentIndex, const label index, - int colour + int colour, + const bool two_step ) {} diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C index 1b051b072fb2d20c3583dd8e591edaf3ae14b557..d573b5a4aaa759decd5b30d45e00bcb24d07f013 100644 --- a/src/Pstream/mpi/UPstream.C +++ b/src/Pstream/mpi/UPstream.C @@ -262,33 +262,83 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) } - // Check argument list for local world - label worldIndex = -1; + // Check argument list for any of the following: + // - local world + // -> Extract world name and filter out '-world <name>' from argv list + // - mpi-no-comm-dup option + // -> disable initial comm_dup and filter out the option + // - mpi-split-by-appnum option + // -> disable initial comm_dup, select split-by-appnum + // and filter out the option + + // Default handling of initial MPI_Comm_dup(MPI_COMM_WORLD,...) + UPstream::noInitialCommDup_ = false; + bool split_by_appnum = false; + + // Local world name + word worldName; + for (int argi = 1; argi < argc; ++argi) { - if (strcmp(argv[argi], "-world") == 0) + const char *optName = argv[argi]; + if (optName[0] != '-') + { + continue; + } + ++optName; // Looks like an option, skip leading '-' + + if (strcmp(optName, "world") == 0) { - worldIndex = argi; if (argi+1 >= argc) { FatalErrorInFunction << "Missing world name for option '-world'" << nl << Foam::abort(FatalError); } - break; + worldName = argv[argi+1]; + + // Remove two arguments (-world name) + for (int i = argi+2; i < argc; ++i) + { + argv[i-2] = argv[i]; + } + argc -= 2; + --argi; // re-examine } - } + else if (strcmp(optName, "mpi-no-comm-dup") == 0) + { + UPstream::noInitialCommDup_ = true; - // Extract world name and filter out '-world <name>' from argv list - word worldName; - if (worldIndex != -1) - { - worldName = argv[worldIndex+1]; - for (label i = worldIndex+2; i < argc; i++) + // Remove one argument + for (int i = argi+1; i < argc; ++i) + { + argv[i-1] = argv[i]; + } + --argc; + --argi; // re-examine + } + else if (strcmp(optName, "mpi-split-by-appnum") == 0) { - argv[i-2] = argv[i]; + split_by_appnum = true; + UPstream::noInitialCommDup_ = true; + + // Remove one argument + for (int i = argi+1; i < argc; ++i) + { + argv[i-1] = argv[i]; + } + --argc; + --argi; // re-examine } - argc -= 2; + } + + const bool hasLocalWorld(!worldName.empty()); + + if (hasLocalWorld && split_by_appnum) + { + FatalErrorInFunction + << "Cannot specify both -world and -mpi-split-by-appnum" << nl + << Foam::abort(FatalError); } int numProcs = 0, globalRanki = 0; @@ -314,7 +364,7 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) << " world:" << worldName << endl; } - if (worldIndex == -1 && numProcs <= 1) + if (numProcs <= 1 && !(hasLocalWorld || split_by_appnum)) { FatalErrorInFunction << "attempt to run parallel on 1 processor" @@ -324,7 +374,7 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) // Initialise parallel structure setParRun(numProcs, provided_thread_support == MPI_THREAD_MULTIPLE); - if (worldIndex != -1) + if (hasLocalWorld) { // Using local worlds. // During startup, so commWorld() == commGlobal() @@ -333,7 +383,7 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) // Gather the names of all worlds and determine unique names/indices. // - // Minimize communication and use low-level MPI to relying on any + // Minimize communication and use low-level MPI to avoid relying on any // OpenFOAM structures which not yet have been created { @@ -452,6 +502,90 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) Pout.prefix() = '[' + worldName + '/' + Foam::name(worldRanki) + "] "; Perr.prefix() = Pout.prefix(); } + else if (split_by_appnum) + { + // Splitting by APPNUM. + // + // During startup, so commWorld() == commGlobal() and both are + // guaranteed to be MPI_COMM_WORLD since the logic automatically + // sets UPstream::noInitialCommDup_ = true (ie, no MPI_Comm_dup) + + const auto mpiGlobalComm = + PstreamGlobals::MPICommunicators_[UPstream::commGlobal()]; + + int appNum(0); + + { + void* val; + int flag; + + MPI_Comm_get_attr(mpiGlobalComm, MPI_APPNUM, &val, &flag); + if (flag) + { + appNum = *static_cast<int*>(val); + } + else + { + appNum = 0; + Perr<< "UPstream::init : used -mpi-split-by-appnum" + " with a single application??" << endl; + } + } + + // New world communicator with comm-global as its parent. + // - the updated (const) world comm does not change after this. + + // Using MPI_APPNUM as the colour for splitting with MPI_Comm_split. + // Do **NOT** use Allgather+Comm_create_group two-step process here + // since other applications will not expect that (ie, deadlock) + + UPstream::constWorldComm_ = + UPstream::splitCommunicator(UPstream::commGlobal(), appNum, false); + + UPstream::worldComm = UPstream::constWorldComm_; + UPstream::warnComm = UPstream::constWorldComm_; + + const int worldRanki = UPstream::myProcNo(UPstream::constWorldComm_); + + // MPI_COMM_SELF : the processor number wrt the new world communicator + if (procIDs_[UPstream::commSelf()].size()) + { + procIDs_[UPstream::commSelf()].front() = worldRanki; + } + + // Name the old world communicator as '<openfoam:global>' + // - it is the inter-world communicator + if (MPI_COMM_NULL != mpiGlobalComm) + { + MPI_Comm_set_name(mpiGlobalComm, "<openfoam:global>"); + } + + const auto mpiWorldComm = + PstreamGlobals::MPICommunicators_[UPstream::constWorldComm_]; + + const word commName("app=" + Foam::name(appNum)); + + if (MPI_COMM_NULL != mpiWorldComm) + { + MPI_Comm_set_name(mpiWorldComm, commName.data()); + } + + if (UPstream::debug) + { + // Check + int newRanki, newSize; + MPI_Comm_rank(mpiWorldComm, &newRanki); + MPI_Comm_size(mpiWorldComm, &newSize); + + Perr<< "UPstream::init : app:" << appNum + << " using local communicator:" << constWorldComm_ + << " rank " << newRanki << " of " << newSize << endl; + } + + // Override Pout prefix (move to setParRun?) + Pout.prefix() = '[' + commName + '/' + Foam::name(worldRanki) + "] "; + Perr.prefix() = Pout.prefix(); + } else { // All processors use world 0 @@ -619,10 +753,10 @@ void Foam::UPstream::exit(int errNo) void Foam::UPstream::abort(int errNo) { - // TBD: only abort on our own communicator? - #if 0 MPI_Comm abortComm = MPI_COMM_WORLD; + // TBD: only abort on our own communicator? + #if 0 const label index = UPstream::commGlobal(); if (index > 0 && index < PstreamGlobals::MPICommunicators_.size()) @@ -633,10 +767,9 @@ void Foam::UPstream::abort(int errNo) abortComm = MPI_COMM_WORLD; } } - MPI_Abort(abortComm, errNo); #endif - MPI_Abort(MPI_COMM_WORLD, errNo); + MPI_Abort(abortComm, errNo); } @@ -665,11 +798,16 @@ void Foam::UPstream::allocateCommunicatorComponents } auto& mpiNewComm = PstreamGlobals::MPICommunicators_[index]; - // PstreamGlobals::pendingMPIFree_[index] = false; - // PstreamGlobals::MPICommunicators_[index] = MPI_COMM_WORLD; - - PstreamGlobals::pendingMPIFree_[index] = true; - MPI_Comm_dup(MPI_COMM_WORLD, &mpiNewComm); + if (UPstream::noInitialCommDup_) + { + PstreamGlobals::pendingMPIFree_[index] = false; + PstreamGlobals::MPICommunicators_[index] = MPI_COMM_WORLD; + } + else + { + PstreamGlobals::pendingMPIFree_[index] = true; + MPI_Comm_dup(MPI_COMM_WORLD, &mpiNewComm); + } MPI_Comm_rank(mpiNewComm, &myProcNo_[index]); @@ -807,7 +945,8 @@ void Foam::UPstream::splitCommunicatorComponents ( const label parentIndex, const label index, - int colour + int colour, + const bool two_step ) { PstreamGlobals::initCommunicator(index); @@ -823,7 +962,7 @@ void Foam::UPstream::splitCommunicatorComponents // the relative rank order when splitting). // // Since MPI_Comm_split() already does an MPI_Allgather() internally - // to pick out the colours (and do any sorting), we can simply to + // to pick out the colours (and do any sorting), we can simply // do the same thing: // // Do the Allgather first and pickout identical colours to define the @@ -840,39 +979,106 @@ void Foam::UPstream::splitCommunicatorComponents MPI_Comm_rank(mpiParentComm, &parentRank); MPI_Comm_size(mpiParentComm, &parentSize); - // Initialize, first marking the 'procIDs_' with the colours auto& procIds = procIDs_[index]; - myProcNo_[index] = -1; - procIds.resize_nocopy(parentSize); - procIds[parentRank] = colour; - - MPI_Allgather - ( - MPI_IN_PLACE, 0, MPI_INT, - procIds.data(), 1, MPI_INT, - mpiParentComm - ); - if (colour < 0) + if (two_step) { - procIds.clear(); + // First gather the colours + procIds.resize_nocopy(parentSize); + procIds[parentRank] = colour; + + MPI_Allgather + ( + MPI_IN_PLACE, 0, MPI_INT, + procIds.data(), 1, MPI_INT, + mpiParentComm + ); + + if (colour < 0) + { + // Not involved + procIds.clear(); + } + else + { + // Select ranks based on the matching colour + int nranks = 0; + for (int i = 0; i < parentSize; ++i) + { + if (procIds[i] == colour) + { + procIds[nranks++] = i; + } + } + procIds.resize(nranks); + } + + allocateCommunicatorComponents(parentIndex, index); } else { - auto last = - std::copy_if + auto& mpiNewComm = PstreamGlobals::MPICommunicators_[index]; + + MPI_Comm_split + ( + mpiParentComm, + (colour >= 0 ? colour : MPI_UNDEFINED), + 0, // maintain relative ordering + &mpiNewComm + ); + + if (MPI_COMM_NULL == mpiNewComm) + { + // Not involved + PstreamGlobals::pendingMPIFree_[index] = false; + procIds.clear(); + } + else + { + PstreamGlobals::pendingMPIFree_[index] = true; + MPI_Comm_rank(mpiNewComm, &myProcNo_[index]); + + // Starting from parent + MPI_Group parent_group; + MPI_Comm_group(mpiParentComm, &parent_group); + + MPI_Group new_group; + MPI_Comm_group(mpiNewComm, &new_group); + + // Parent ranks: identity map + List<int> parentIds(parentSize); + std::iota(parentIds.begin(), parentIds.end(), 0); + + // New ranks: + procIds.resize_nocopy(parentSize); + procIds = -1; // Some extra safety... + + MPI_Group_translate_ranks ( - procIds.cbegin(), - procIds.cend(), - procIds.begin(), - [=](int c){ return (c == colour); } + parent_group, parentSize, parentIds.data(), + new_group, procIds.data() ); - procIds.resize(std::distance(procIds.begin(), last)); - } + // Groups not needed after this... + MPI_Group_free(&parent_group); + MPI_Group_free(&new_group); + + // The corresponding ranks. + // - since old ranks are an identity map, can just use position - allocateCommunicatorComponents(parentIndex, index); + int nranks = 0; + for (int i = 0; i < parentSize; ++i) + { + // Exclude MPI_UNDEFINED and MPI_PROC_NULL etc... + if (procIds[i] >= 0 && procIds[i] < parentSize) + { + procIds[nranks++] = i; + } + } + procIds.resize(nranks); + } + } }