diff --git a/applications/test/globalIndex3/Make/files b/applications/test/globalIndex3/Make/files new file mode 100644 index 0000000000000000000000000000000000000000..257dcf67be1ae6fad655464713115e2346f73887 --- /dev/null +++ b/applications/test/globalIndex3/Make/files @@ -0,0 +1,3 @@ +Test-globalIndex3.cxx + +EXE = $(FOAM_USER_APPBIN)/Test-globalIndex3 diff --git a/applications/test/globalIndex3/Make/options b/applications/test/globalIndex3/Make/options new file mode 100644 index 0000000000000000000000000000000000000000..4ef31e8ea7512fe6edbd055a7563ae78dec5bc90 --- /dev/null +++ b/applications/test/globalIndex3/Make/options @@ -0,0 +1,4 @@ +include $(GENERAL_RULES)/mpi-rules + +EXE_INC = $(PFLAGS) $(PINC) +EXE_LIBS = $(PLIBS) diff --git a/applications/test/globalIndex3/Test-globalIndex3.cxx b/applications/test/globalIndex3/Test-globalIndex3.cxx new file mode 100644 index 0000000000000000000000000000000000000000..19a72499d3290652b284becd53b5dc6597523dfc --- /dev/null +++ b/applications/test/globalIndex3/Test-globalIndex3.cxx @@ -0,0 +1,578 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | www.openfoam.com + \\/ M anipulation | +------------------------------------------------------------------------------- + Copyright (C) 2025 OpenCFD Ltd. +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. + +Application + Test-globalIndex3 + +Description + Tests for globalIndex with node-wise splitting + +\*---------------------------------------------------------------------------*/ + +#include "globalIndex.H" +#include "globalMeshData.H" +#include "argList.H" +#include "Time.H" +#include "polyMesh.H" +#include "IndirectList.H" +#include "IOstreams.H" +#include "Random.H" +#include "openfoam_mpi.H" + +// pre-scan for "-split-size NUM" +int option_splitsize(int argc, char *argv[]) +{ + int ivalue = -1; + for (int argi = 1; argi < argc-1; ++argi) + { + if (strcmp(argv[argi], "-split-size") == 0) + { + ++argi; + ivalue = atoi(argv[argi]); + } + } + + return ivalue; +} + +using namespace Foam; + +template<class T> +void printList(Ostream& os, const UList<T>& list) +{ + os << list.size() << " " << flatOutput(list) << nl; +} + +void printGlobalIndex(Ostream& os, const globalIndex& gi) +{ + printList(os, gi.offsets()); +} + + +template<class ProcIDsContainer, class Type> +void globalIndexGather +( + const labelUList& off, // needed on master only + const label comm, + const ProcIDsContainer& procIDs, + const UList<Type>& fld, + UList<Type>& allFld, // must be adequately sized on master + const int tag, + UPstream::commsTypes commsType, + bool useWindow = false +) +{ + // low-level: no parRun guard + const int masterProci = procIDs.size() ? procIDs[0] : 0; + + // Protection for disjoint calls + if (FOAM_UNLIKELY(!UPstream::is_rank(comm))) + { + FatalErrorInFunction + << "Calling with process not on the communicator" + << Foam::abort(FatalError); + } + + // Require contiguous data for non-blocking + if constexpr (!is_contiguous_v<Type>) + { + if (commsType == UPstream::commsTypes::nonBlocking) + { + commsType = UPstream::commsTypes::scheduled; + } + } + + const label startOfRequests = UPstream::nRequests(); + + + // Very hard-coded at the moment + int returnCode = MPI_SUCCESS; + const int nCmpts = pTraits<Type>::nComponents; + + MPI_Win win; + MPI_Datatype dataType = MPI_DOUBLE; + if (useWindow) + { + using cmptType = typename pTraits<Type>::cmptType; + + if (std::is_same<float, cmptType>::value) + { + dataType = MPI_FLOAT; + } + else if (std::is_same<double, cmptType>::value) + { + dataType = MPI_DOUBLE; + } + else + { + // Not supported + useWindow = false; + } + } + + if (useWindow) + { + MPI_Comm mpiComm = + PstreamUtils::Cast::to_mpi(UPstream::Communicator::lookup(comm)); + + char commName[MPI_MAX_OBJECT_NAME]; + int nameLen = 0; + + if + ( + MPI_COMM_NULL != mpiComm + && MPI_SUCCESS == MPI_Comm_get_name(mpiComm, commName, &nameLen) + && (nameLen > 0) + ) + { + Pout<< "window on " << commName << nl; + } + + if (UPstream::myProcNo(comm) == masterProci || fld.empty()) + { + // Collective + returnCode = MPI_Win_create + ( + nullptr, + 0, + 1, // disp_units + MPI_INFO_NULL, + mpiComm, + &win + ); + } + else + { + // Collective + returnCode = MPI_Win_create + ( + const_cast<char *>(fld.cdata_bytes()), + fld.size_bytes(), + sizeof(Type), // disp_units + MPI_INFO_NULL, + mpiComm, + &win + ); + } + + if (MPI_SUCCESS != returnCode || MPI_WIN_NULL == win) + { + FatalErrorInFunction + << "MPI_Win_create() failed" + << Foam::abort(FatalError); + // return nullptr; + } + } + + + if (UPstream::myProcNo(comm) == masterProci) + { + const label total = off.back(); // == totalSize() + + if (allFld.size() < total) + { + FatalErrorInFunction + << "[out] UList size=" << allFld.size() + << " too small to receive " << total << nl + << Foam::abort(FatalError); + } + + + // Assign my local data - respect offset information + // so that we can request 0 entries to be copied. + // Also handle the case where we have a slice of the full + // list. + { + SubList<Type> dst(allFld, off[1]-off[0], off[0]); + SubList<Type> src(fld, off[1]-off[0]); + + if (!dst.empty() && (dst.data() != src.data())) + { + dst = src; + } + } + + if (useWindow) + { + MPI_Win_lock_all(MPI_MODE_NOCHECK, win); + } + + for (label i = 1; i < procIDs.size(); ++i) + { + SubList<Type> slot(allFld, off[i+1]-off[i], off[i]); + + if (slot.empty()) + { + // Nothing to do + } + else if (useWindow) + { + returnCode = MPI_Get + ( + // origin + slot.data(), + slot.size()*(nCmpts), + dataType, + + // target + procIDs[i], + 0, // displacement + slot.size()*(nCmpts), + dataType, + win + ); + + if (MPI_SUCCESS != returnCode) + { + FatalErrorInFunction + << "MPI_Get failed" + << Foam::abort(FatalError); + // return nullptr; + } + } + else if constexpr (is_contiguous_v<Type>) + { + UIPstream::read + ( + commsType, + procIDs[i], + slot, + tag, + comm + ); + } + else + { + IPstream::recv(slot, procIDs[i], tag, comm); + } + } + + if (useWindow) + { + MPI_Win_unlock_all(win); + } + } + else if (!useWindow) + { + if (fld.empty()) + { + // Nothing to do + } + else if constexpr (is_contiguous_v<Type>) + { + UOPstream::write + ( + commsType, + masterProci, + fld, + tag, + comm + ); + } + else + { + OPstream::send(fld, commsType, masterProci, tag, comm); + } + } + + if (useWindow) + { + // Collective + MPI_Win_free(&win); + } + + if (commsType == UPstream::commsTypes::nonBlocking) + { + // Wait for outstanding requests + UPstream::waitRequests(startOfRequests); + } +} + + +// Report inter-node/intra-node offsets +static void reportOffsets(const globalIndex& gi) +{ + labelList interNodeOffsets; + labelList localNodeOffsets; + labelRange nodeRange; + + const label numProc = UPstream::nProcs(UPstream::commConstWorld()); + + gi.splitNodeOffsets + ( + interNodeOffsets, + localNodeOffsets, + UPstream::worldComm + ); + + const auto interNodeComm = UPstream::commInterNode(); + + // Only communicate to the node leaders + labelList allOffsets; + if (UPstream::is_rank(interNodeComm)) + { + // Send top-level offsets to the node leaders + if (UPstream::master(interNodeComm)) + { + allOffsets = gi.offsets(); + } + else // ie, UPstream::is_subrank(interNodeComm) + { + allOffsets.resize_nocopy(numProc+1); + } + + UPstream::broadcast + ( + allOffsets.data_bytes(), + allOffsets.size_bytes(), + interNodeComm + ); + } + + // Ranges (node leaders only) + if (UPstream::is_rank(interNodeComm)) + { + const auto& procIds = UPstream::procID(interNodeComm); + const int ranki = UPstream::myProcNo(interNodeComm); + + // For reporting + nodeRange.reset + ( + procIds[ranki], + ( + (ranki+1 < procIds.size() ? procIds[ranki+1] : numProc) + - procIds[ranki] + ) + ); + } + + Pout<< "node-range: " << nodeRange << nl; + Pout<< "all-offset: "; printList(Pout, allOffsets); + Pout<< "inter-offset: "; printList(Pout, interNodeOffsets); + Pout<< "intra-offset: "; printList(Pout, localNodeOffsets); +} + + +template<class Type> +void globalIndexGather +( + const globalIndex& gi, + const UList<Type>& sendData, + List<Type>& allData, + const int tag, + const UPstream::commsTypes commsType, + const label comm = UPstream::worldComm, + bool useWindow = false +) +{ + if (!UPstream::parRun()) + { + // Serial: direct copy + allData = sendData; + return; + } + + if (UPstream::master(comm)) + { + allData.resize_nocopy(gi.offsets().back()); // == totalSize() + } + else + { + allData.clear(); // zero-size on non-master + } + + + const auto& offsets = gi.offsets(); // needed on master only + + Info<< "Using node-comms: " << UPstream::usingNodeComms(comm) << nl; + + const auto interNodeComm = UPstream::commInterNode(); + const auto localNodeComm = UPstream::commLocalNode(); + + if (UPstream::usingNodeComms(comm)) + { + // Stage 0 : The inter-node/intra-node offsets + labelList interNodeOffsets; + labelList localNodeOffsets; + + gi.splitNodeOffsets(interNodeOffsets, localNodeOffsets, comm); + + // The first node re-uses the output (allData) when collecting + // content. All other nodes require temporary node-local storage. + + List<Type> tmpNodeData; + if (UPstream::is_subrank(interNodeComm)) + { + tmpNodeData.resize(localNodeOffsets.back()); + } + + List<Type>& nodeData = + ( + UPstream::master(interNodeComm) ? allData : tmpNodeData + ); + + + // Stage 1 : Gather data within the node + { + globalIndexGather + ( + localNodeOffsets, // (master only) + localNodeComm, + UPstream::allProcs(localNodeComm), + sendData, + nodeData, + tag, + commsType, + useWindow + ); + } + + // Stage 2 : Gather data between nodes + if (UPstream::is_rank(interNodeComm)) + { + globalIndexGather + ( + interNodeOffsets, // (master only) + interNodeComm, + UPstream::allProcs(interNodeComm), + nodeData, + allData, + tag, + commsType, + useWindow + ); + } + } + else + { + globalIndexGather + ( + offsets, // needed on master only + comm, + UPstream::allProcs(comm), // All communicator ranks + sendData, + allData, + tag, + commsType, + useWindow + ); + } +} + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // +// Main program: + +int main(int argc, char *argv[]) +{ + argList::noCheckProcessorDirectories(); + argList::addVerboseOption("Set UPstream::debug level"); + argList::addOption("split-size", "NUM", "split with ncores/node"); + argList::addBoolOption("builtin", "only use builtin globalIndex::gather"); + argList::addBoolOption("window", "get data via window"); + + // Check -verbose before initialisation + UPstream::debug = argList::verbose(argc, argv); + + // Check -split-size before initialisation + { + int splitSize = option_splitsize(argc, argv); + + if (splitSize >= 0) + { + UPstream::nodeCommsControl_ = splitSize; + } + } + + #include "setRootCase.H" + + const bool useLocalComms = UPstream::usingNodeComms(); + bool useWindow = args.found("window"); + bool useBuiltin = args.found("builtin"); + + Info<< nl + << "Getting local-comms: " << Switch::name(useLocalComms) << nl + << "Getting data with window: " << Switch::name(useWindow) << nl + << nl; + + if (useWindow && useBuiltin) + { + Info<< "Selected '-window' and '-builtin' : ignoring -builtin'" + << nl; + useBuiltin = false; + } + + Random rng(31 + 2*UPstream::myProcNo()); + + const label localSize = (5*rng.position<label>(1, 15)); + + globalIndex globIndex + ( + globalIndex::gatherOnly{}, + localSize, + UPstream::commWorld() + ); + + Info<< "global-index: "; + printGlobalIndex(Info, globIndex); + reportOffsets(globIndex); + + Field<scalar> allData; + Field<scalar> localFld(localSize, scalar(UPstream::myProcNo())); + + if (useBuiltin) + { + globIndex.gather + ( + localFld, + allData, + UPstream::msgType(), + UPstream::commsTypes::nonBlocking, + UPstream::commWorld() + ); + } + else + { + globalIndexGather + ( + globIndex, + localFld, + allData, + UPstream::msgType(), + UPstream::commsTypes::nonBlocking, + UPstream::commWorld(), + useWindow + ); + } + + Pout<< "local: " << flatOutput(localFld) << nl; + Info<< "field: " << flatOutput(allData) << nl; + + Info<< "\nEnd\n" << endl; + return 0; +} + + +// ************************************************************************* // diff --git a/applications/test/nodeTopology/Make/files b/applications/test/nodeTopology/Make/files new file mode 100644 index 0000000000000000000000000000000000000000..aa402b759deb27407abb981bb9af93ffa9a30a74 --- /dev/null +++ b/applications/test/nodeTopology/Make/files @@ -0,0 +1,3 @@ +Test-nodeTopology.cxx + +EXE = $(FOAM_USER_APPBIN)/Test-nodeTopology diff --git a/applications/test/nodeTopology/Make/options b/applications/test/nodeTopology/Make/options new file mode 100644 index 0000000000000000000000000000000000000000..18e6fe47afacb902cddccf82632772447704fd88 --- /dev/null +++ b/applications/test/nodeTopology/Make/options @@ -0,0 +1,2 @@ +/* EXE_INC = */ +/* EXE_LIBS = */ diff --git a/applications/test/nodeTopology/Test-nodeTopology.cxx b/applications/test/nodeTopology/Test-nodeTopology.cxx new file mode 100644 index 0000000000000000000000000000000000000000..db4a5eeaf4382fb6771438a9bb8b384a18fdebb7 --- /dev/null +++ b/applications/test/nodeTopology/Test-nodeTopology.cxx @@ -0,0 +1,198 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | www.openfoam.com + \\/ M anipulation | +------------------------------------------------------------------------------- + Copyright (C) 2025 OpenCFD Ltd. +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. + +Application + Test-nodeTopology + +Description + Simple reporting of node topology + +\*---------------------------------------------------------------------------*/ + +#include "argList.H" +#include "IOstreams.H" + +using namespace Foam; + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +int main(int argc, char *argv[]) +{ + argList::noBanner(); + argList::noCheckProcessorDirectories(); + argList::addOption + ( + "numProcs", + "int", + "Num of ranks to simulate (default: 16)" + ); + argList::addOption + ( + "cores", + "int", + "Num of cores to simulate (default: 4)" + ); + + #include "setRootCase.H" + + label nProcs = UPstream::nProcs(UPstream::worldComm); + + List<int> interNodeProcs_fake; + + if (UPstream::parRun()) + { + if (args.found("numProcs")) + { + InfoErr<< "ignoring -np option in parallel" << nl; + } + if (args.found("cores")) + { + InfoErr<< "ignoring -cores option in parallel" << nl; + } + } + else + { + // serial + nProcs = args.getOrDefault<label>("numProcs", 16); + label nCores = args.getOrDefault<label>("cores", 4); + + if (nCores > 1 && nCores < nProcs) + { + const label numNodes + = (nProcs/nCores) + ((nProcs % nCores) ? 1 : 0); + + interNodeProcs_fake.resize(numNodes); + + for (label nodei = 0; nodei < numNodes; ++nodei) + { + interNodeProcs_fake[nodei] = nodei * nCores; + } + } + } + + const List<int>& interNodeProcs = + ( + UPstream::parRun() + ? UPstream::procID(UPstream::commInterNode()) + : interNodeProcs_fake + ); + + + // Generate the graph + if (UPstream::master(UPstream::worldComm)) + { + auto& os = Info.stream(); + + os << "// node topology graph:" << nl; + os.beginBlock("graph"); + + // Prefer left-to-right layout for large graphs + os << indent << "rankdir=LR" << nl; + + int pos = 0; + + // First level are the inter-node connections + const label parent = 0; + for (const auto proci : interNodeProcs) + { + if (parent == proci) continue; + + if (pos) + { + os << " "; + } + else + { + os << indent; + } + os << parent << " -- " << proci; + + if (++pos >= 4) // Max 4 items per line + { + pos = 0; + os << nl; + } + } + + if (pos) + { + pos = 0; + os << nl; + } + + // Next level are within the nodes + for (label nodei = 0; nodei < interNodeProcs.size(); ++nodei) + { + pos = 0; + + label firstProc = interNodeProcs[nodei]; + const label lastProc = + ( + (nodei+1 < interNodeProcs.size()) + ? interNodeProcs[nodei+1] + : nProcs + ); + + os << indent << "// inter-node " << nodei + << " [" << firstProc + << ".." << lastProc-1 << "]" << nl; + + for (label proci = firstProc; proci < lastProc; ++proci) + { + if (firstProc == proci) continue; + + if (pos) + { + os << " "; + } + else + { + os << indent; + } + os << firstProc << " -- " << proci; + + if (++pos >= 4) // Max 4 items per line + { + pos = 0; + os << nl; + } + } + if (pos) + { + pos = 0; + os << nl; + } + } + + os.endBlock(); + os << "// end graph" << nl; + } + + InfoErr << "\nDone" << nl; + return 0; +} + + +// ************************************************************************* // diff --git a/applications/test/parallel-comm1/Test-parallel-comm1.C b/applications/test/parallel-comm1/Test-parallel-comm1.C index 33a6bd5bdcb54a81a0abd1eb98a01eb22d412026..b9b10f442be5a87042c0935fdc6c3d05b18d8bcc 100644 --- a/applications/test/parallel-comm1/Test-parallel-comm1.C +++ b/applications/test/parallel-comm1/Test-parallel-comm1.C @@ -158,7 +158,7 @@ int main(int argc, char *argv[]) for (label count = 0; count < repeat; ++count) { - label comm = UPstream::allocateCommunicator(UPstream::worldComm, top); + label comm = UPstream::newCommunicator(UPstream::worldComm, top); scalar localValue = 111*UPstream::myProcNo(UPstream::worldComm); diff --git a/applications/test/parallel-comm2/Test-parallel-comm2.C b/applications/test/parallel-comm2/Test-parallel-comm2.C index 38fc45361f90d185bb732a2b0bc0c3c64a4289ca..b5f01bc5040092da6300440cf7d7f8498d2bff3f 100644 --- a/applications/test/parallel-comm2/Test-parallel-comm2.C +++ b/applications/test/parallel-comm2/Test-parallel-comm2.C @@ -68,14 +68,14 @@ int main(int argc, char *argv[]) argList::noCheckProcessorDirectories(); argList::addBoolOption("info", "information"); argList::addBoolOption("print-tree", "Report tree(s) as graph"); - argList::addBoolOption("comm-split", "Test simple comm split"); - argList::addBoolOption("mpi-host-comm", "Test DIY host-comm split"); + argList::addBoolOption("no-test", "Disable general tests"); argList::addBoolOption("host-comm", "Test Pstream host-comm"); argList::addBoolOption("host-broadcast", "Test host-base broadcasts"); #include "setRootCase.H" const bool optPrintTree = args.found("print-tree"); + bool generalTest = !args.found("no-test"); Info<< nl << "parallel:" << UPstream::parRun() @@ -89,6 +89,18 @@ int main(int argc, char *argv[]) UPstream::printCommTree(UPstream::commWorld()); } + if (UPstream::parRun()) + { + Pout<< "world ranks: 0.." + << UPstream::nProcs(UPstream::commWorld())-1 << nl; + + Pout<< "inter-node ranks: " << UPstream::numNodes() << ' ' + << flatOutput(UPstream::procID(UPstream::commInterNode())) << nl; + + Pout<< "local-node ranks: " + << flatOutput(UPstream::procID(UPstream::commLocalNode())) << nl; + } + if (args.found("info")) { Info<< nl; @@ -104,334 +116,29 @@ int main(int argc, char *argv[]) Pout<< endl; } - bool generalTest = true; - - if (UPstream::parRun() && args.found("comm-split")) - { - generalTest = false; - - int world_nprocs = 0; - int world_rank = -1; - MPI_Comm_size(MPI_COMM_WORLD, &world_nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - - int host_nprocs = 0; - int host_rank = -1; - MPI_Comm commIntraHost; - MPI_Comm_split_type - ( - MPI_COMM_WORLD, - MPI_COMM_TYPE_SHARED, // OMPI_COMM_TYPE_NODE - 0, MPI_INFO_NULL, &commIntraHost - ); - - MPI_Comm_size(commIntraHost, &host_nprocs); - MPI_Comm_rank(commIntraHost, &host_rank); - - int leader_nprocs = 0; - int leader_rank = -1; - MPI_Comm commInterHost; - - if (false) - { - // Easy enough to use MPI_Comm_split, but slightly annoying - // that it returns MPI_COMM_NULL for unused ranks... - MPI_Comm commInterHost; - MPI_Comm_split - ( - MPI_COMM_WORLD, - (host_rank == 0) ? 0 : MPI_UNDEFINED, - 0, &commInterHost - ); - - if (commInterHost != MPI_COMM_NULL) - { - MPI_Comm_size(commInterHost, &leader_nprocs); - MPI_Comm_rank(commInterHost, &leader_rank); - } - } - else - { - boolList isHostLeader(world_nprocs, false); - isHostLeader[world_rank] = (host_rank == 0); - - MPI_Allgather - ( - // recv is also send - MPI_IN_PLACE, 1, MPI_C_BOOL, - isHostLeader.data(), 1, MPI_C_BOOL, - MPI_COMM_WORLD - ); - - Pout<< "leaders: " << isHostLeader << endl; - - DynamicList<int> subRanks(isHostLeader.size()); - forAll(isHostLeader, proci) - { - if (isHostLeader[proci]) - { - subRanks.push_back(proci); - } - } - // Starting from parent - MPI_Group parent_group; - MPI_Comm_group(MPI_COMM_WORLD, &parent_group); - - MPI_Group active_group; - MPI_Group_incl - ( - parent_group, - subRanks.size(), - subRanks.cdata(), - &active_group - ); - - // Create new communicator for this group - MPI_Comm_create_group - ( - MPI_COMM_WORLD, - active_group, - UPstream::msgType(), - &commInterHost - ); - - // Groups not needed after this... - MPI_Group_free(&parent_group); - MPI_Group_free(&active_group); - - MPI_Comm_size(commInterHost, &leader_nprocs); - MPI_Comm_rank(commInterHost, &leader_rank); - } - - Pout<< nl << "[MPI_Comm_split_type]" << nl - << "Host rank " << host_rank << " / " << host_nprocs - << " on " << hostName() - << " inter-rank: " << leader_rank << " / " << leader_nprocs - << " host leader:" << (leader_rank == 0) - << " sub-rank:" << (leader_rank > 0) - << nl; - - if (commInterHost != MPI_COMM_NULL) - { - MPI_Comm_free(&commInterHost); - } - if (commIntraHost != MPI_COMM_NULL) - { - MPI_Comm_free(&commIntraHost); - } - } - - if (UPstream::parRun() && args.found("mpi-host-comm")) - { - generalTest = false; - - // Host communicator, based on the current world communicator - // Use hostname - // Lowest rank per hostname is the IO rank - - label numprocs = UPstream::nProcs(UPstream::commGlobal()); - - // Option 1: using hostnames - // - pro: trivial coding - // - con: unequal lengths, more allocations and 'hops' - stringList hosts(numprocs); - hosts[Pstream::myProcNo(UPstream::commGlobal())] = hostName(); - Pstream::gatherList(hosts, UPstream::msgType(), UPstream::commGlobal()); - - - // Option 2: using SHA1 of hostnames - // - con: uglier coding (but only needed locally!) - // - pro: fixed digest length enables direct MPI calls - // can avoid Pstream::gatherList() during setup... - - List<SHA1Digest> digests; - if (UPstream::master(UPstream::commGlobal())) - { - digests.resize(numprocs); - } - - { - const SHA1Digest myDigest(SHA1(hostName()).digest()); - - UPstream::mpiGather - ( - myDigest.cdata_bytes(), // Send - digests.data_bytes(), // Recv - SHA1Digest::max_size(), // Num send/recv per rank - UPstream::commGlobal() - ); - } - - - labelList hostIDs(numprocs); - DynamicList<label> subRanks(numprocs); - - Info<< "digests: " << digests << nl; - - // Compact numbering - if (UPstream::master(UPstream::commGlobal())) - { - DynamicList<word> hostNames(numprocs); - - forAll(hosts, proci) - { - const word& host = hosts[proci]; - - hostIDs[proci] = hostNames.find(host); - - if (hostIDs[proci] < 0) - { - // First appearance of host (encode as leader) - hostIDs[proci] = -(hostNames.size() + 1); - hostNames.push_back(host); - } - } - hostIDs = -1; - - - DynamicList<SHA1Digest> uniqDigests(numprocs); - - forAll(digests, proci) - { - const SHA1Digest& dig = digests[proci]; - - hostIDs[proci] = uniqDigests.find(dig); - - if (hostIDs[proci] < 0) - { - // First appearance of host (encode as leader) - hostIDs[proci] = -(uniqDigests.size() + 1); - uniqDigests.push_back(dig); - } - } - } - - - Info<< "hosts = " << hosts << endl; - Info<< "hostIDs = " << hostIDs << endl; - - UPstream::broadcast - ( - hostIDs.data_bytes(), - hostIDs.size_bytes(), - UPstream::commGlobal(), - UPstream::masterNo() - ); - - // Ranks for world to inter-host communicator - // - very straightforward - - #if 0 - subRanks.clear(); - forAll(hostIDs, proci) - { - // Is host leader? - if (hostIDs[proci] < 0) - { - subRanks.push_back(proci); - - // Flip back to generic host id - hostIDs[proci] = -(hostIDs[proci] + 1); - } - } - - // From world to hostMaster - const label commInterHost = - UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks); - #endif - - const label myWorldProci = UPstream::myProcNo(UPstream::commGlobal()); - - label myHostId = hostIDs[myWorldProci]; - if (myHostId < 0) myHostId = -(myHostId + 1); // Flip to generic id - - // Ranks for within a host - subRanks.clear(); - forAll(hostIDs, proci) - { - label id = hostIDs[proci]; - if (id < 0) id = -(id + 1); // Flip to generic id - - if (id == myHostId) - { - subRanks.push_back(proci); - } - } - - // The intra-host ranks - const label commIntraHost = - UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks); - - - // Test what if we have intra-host comm and we want host-master - - List<bool> isHostMaster(numprocs, false); - if (UPstream::master(commIntraHost)) - { - isHostMaster[myWorldProci] = true; - } - - UPstream::mpiAllGather - ( - isHostMaster.data_bytes(), - sizeof(bool), - UPstream::commGlobal() - ); - - // Ranks for world to hostMaster - // - very straightforward - subRanks.clear(); - forAll(isHostMaster, proci) - { - if (isHostMaster[proci]) - { - subRanks.push_back(proci); - } - } - - // From world to hostMaster - const label commInterHost = - UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks); - - - Pout<< nl << "[manual split]" << nl - << nl << "Host rank " << UPstream::myProcNo(commIntraHost) - << " / " << UPstream::nProcs(commIntraHost) - << " on " << hostName() - << ", inter-rank: " << UPstream::myProcNo(commInterHost) - << " / " << UPstream::nProcs(commInterHost) - << " host leader:" << UPstream::master(commInterHost) - << " sub-rank:" << UPstream::is_subrank(commInterHost) - << nl; - - UPstream::freeCommunicator(commInterHost); - UPstream::freeCommunicator(commIntraHost); - } - if (UPstream::parRun() && args.found("host-comm")) { generalTest = false; Info<< nl << "[pstream host-comm]" << nl << endl; - const label commInterHost = UPstream::commInterHost(); - const label commIntraHost = UPstream::commIntraHost(); + const label commInterNode = UPstream::commInterNode(); + const label commLocalNode = UPstream::commLocalNode(); - Pout<< "Host rank " << UPstream::myProcNo(commIntraHost) - << " / " << UPstream::nProcs(commIntraHost) + Pout<< "Host rank " << UPstream::myProcNo(commLocalNode) + << " / " << UPstream::nProcs(commLocalNode) << " on " << hostName() - << ", inter-rank: " << UPstream::myProcNo(commInterHost) - << " / " << UPstream::nProcs(commInterHost) - << ", host leader:" << UPstream::master(commInterHost) - << " sub-rank:" << UPstream::is_subrank(commInterHost) + << ", inter-rank: " << UPstream::myProcNo(commInterNode) + << " / " << UPstream::nProcs(commInterNode) + << ", host leader:" << UPstream::master(commInterNode) + << " sub-rank:" << UPstream::is_subrank(commInterNode) << endl; - { Info<< "host-master: " - << UPstream::whichCommunication(commInterHost) << endl; + << UPstream::whichCommunication(commInterNode) << endl; - UPstream::printCommTree(commInterHost); - UPstream::printCommTree(commIntraHost); + UPstream::printCommTree(commInterNode); + UPstream::printCommTree(commLocalNode); } } @@ -440,32 +147,32 @@ int main(int argc, char *argv[]) generalTest = false; Info<< nl << "[pstream host-broadcast]" << nl << endl; - const label commInterHost = UPstream::commInterHost(); - const label commIntraHost = UPstream::commIntraHost(); + const label commInterNode = UPstream::commInterNode(); + const label commLocalNode = UPstream::commLocalNode(); Pout<< "world rank: " << UPstream::myProcNo(UPstream::commWorld()) << " host-leader rank: " - << UPstream::myProcNo(UPstream::commInterHost()) + << UPstream::myProcNo(UPstream::commInterNode()) << " intra-host rank: " - << UPstream::myProcNo(UPstream::commIntraHost()) + << UPstream::myProcNo(UPstream::commLocalNode()) << endl; label value1(0), value2(0), value3(0); - label hostIndex = UPstream::myProcNo(commInterHost); + label hostIndex = UPstream::myProcNo(commInterNode); - if (UPstream::master(commInterHost)) + if (UPstream::master(commInterNode)) { value1 = 100; value2 = 200; } - if (UPstream::master(commIntraHost)) + if (UPstream::master(commLocalNode)) { value3 = 300; } - Pstream::broadcast(value1, commInterHost); - Pstream::broadcast(value2, commIntraHost); - Pstream::broadcast(hostIndex, commIntraHost); + Pstream::broadcast(value1, commInterNode); + Pstream::broadcast(value2, commLocalNode); + Pstream::broadcast(hostIndex, commLocalNode); Pout<< "host: " << hostIndex << " broadcast 1: " @@ -474,7 +181,7 @@ int main(int argc, char *argv[]) << value3 << endl; // re-broadcast - Pstream::broadcast(value1, commIntraHost); + Pstream::broadcast(value1, commLocalNode); Pout<< "host: " << hostIndex << " broadcast 2: " << value1 << endl; @@ -483,42 +190,42 @@ int main(int argc, char *argv[]) label reduced1 = value1; label reduced2 = value1; - reduce + Foam::reduce ( reduced1, sumOp<label>(), UPstream::msgType(), - commIntraHost + commLocalNode ); - reduce + Foam::reduce ( reduced2, sumOp<label>(), UPstream::msgType(), - commInterHost + commInterNode ); Pout<< "value1: (host) " << reduced1 << " (leader) " << reduced2 << endl; - // Pout<< "ranks: " << UPstream::nProcs(commInterHost) << endl; + // Pout<< "ranks: " << UPstream::nProcs(commInterNode) << endl; wordList strings; - if (UPstream::is_rank(commInterHost)) + if (UPstream::is_rank(commInterNode)) { - strings.resize(UPstream::nProcs(commInterHost)); - strings[UPstream::myProcNo(commInterHost)] = name(pid()); + strings.resize(UPstream::nProcs(commInterNode)); + strings[UPstream::myProcNo(commInterNode)] = name(pid()); } // Some basic gather/scatter - Pstream::allGatherList(strings, UPstream::msgType(), commInterHost); + Pstream::allGatherList(strings, UPstream::msgType(), commInterNode); Pout<< "pids " << flatOutput(strings) << endl; Foam::reverse(strings); - Pstream::broadcast(strings, commIntraHost); + Pstream::broadcast(strings, commLocalNode); Pout<< "PIDS " << flatOutput(strings) << endl; } diff --git a/applications/test/treeComms/Test-treeComms.C b/applications/test/treeComms/Test-treeComms.C index 832f1459f7ab8557a3791c33f1a31aa5310fa9e5..7408358464c16d7137bf10dea8b42006542f1f1a 100644 --- a/applications/test/treeComms/Test-treeComms.C +++ b/applications/test/treeComms/Test-treeComms.C @@ -51,7 +51,7 @@ void printConnection(Ostream& os, const label proci, const labelUList& below) // The number of receives - as per gatherList (v2112) void printRecvCount_gatherList ( - const UList<UPstream::commsStruct>& comms, + const UPstream::commsStructList& comms, const label comm = UPstream::worldComm ) { @@ -91,7 +91,7 @@ void printRecvCount_gatherList // The number of sends - as per scatterList (v2112) void printSendCount_scatterList ( - const UList<UPstream::commsStruct>& comms, + const UPstream::commsStructList& comms, const label comm = UPstream::worldComm ) { @@ -131,7 +131,7 @@ void printSendCount_scatterList // Transmission widths (contiguous data) void printWidths ( - const UList<UPstream::commsStruct>& comms, + const UPstream::commsStructList& comms, const label comm = UPstream::worldComm ) { diff --git a/etc/controlDict b/etc/controlDict index 5ca48e8d73e0494c07dcc487faf69832951886ad..700a988be1b17132c4d64bacce40de45104bb2be 100644 --- a/etc/controlDict +++ b/etc/controlDict @@ -135,6 +135,18 @@ OptimisationSwitches // Default communication type (nonBlocking | scheduled | buffered) commsType nonBlocking; + // Use host/node topology-aware routines + // 0: disabled + // 1: split by hostname [default] + // 2: split by shared + // >=4: (debug/manual) split with given number per node + nodeComms 1; + + // Minimum number of nodes before topology-aware routines are enabled + // <= 2 : always + // >= 3 : when there are more than N nodes + nodeComms.min 0; + // Transfer double as float for processor boundaries. Mostly defunct. floatTransfer 0; diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H index 04e3009503767a627469eb0ad385daa40cb11942..05dc64eb5135757473d450a61390151621965a8f 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H @@ -310,7 +310,7 @@ public: template<class T> static void gatherList ( - const UList<commsStruct>& comms, + const UPstream::commsStructList& comms, //! [in,out] UList<T>& values, const int tag, @@ -349,7 +349,7 @@ public: template<class T> static void scatterList ( - const UList<commsStruct>& comms, + const UPstream::commsStructList& comms, UList<T>& values, const int tag, const label comm diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C index 267d3d1c98cc19bf416cca1b7f4df688af788304..edc269a067819e71fbf064bcbab83b3f7bb52ed5 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C @@ -45,7 +45,7 @@ Description template<class T> void Foam::Pstream::gatherList ( - const UList<UPstream::commsStruct>& comms, + const UPstream::commsStructList& comms, UList<T>& values, const int tag, const label comm @@ -190,7 +190,7 @@ void Foam::Pstream::gatherList template<class T> void Foam::Pstream::scatterList ( - const UList<UPstream::commsStruct>& comms, + const UPstream::commsStructList& comms, UList<T>& values, const int tag, const label comm diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C index 9553a4bb73b97fc80f36c850a1bbb55bc897e9ab..4f47d5f98377194a56fa177de4bc9a8f18624094 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2015-2023 OpenCFD Ltd. + Copyright (C) 2015-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -58,82 +58,6 @@ Foam::UPstream::commsTypeNames }); -// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * // - -namespace Foam -{ - -// Determine host grouping. -// Uses SHA1 of hostname instead of MPI_Comm_split or MPI_Comm_split_type -// for two reasons: -// - Comm_split returns an MPI_COMM_NULL on non-participating process -// which does not easily fit into the OpenFOAM framework -// -// - use the SHA1 of hostname allows a single MPI_Gather, determination of -// the inter-host vs intra-host (on the master) followed by a single -// broadcast of integers. -// -// Returns: the unique host indices with the leading hosts encoded -// with negative values -static List<int> getHostGroupIds(const label parentCommunicator) -{ - const label numProcs = UPstream::nProcs(parentCommunicator); - - List<SHA1Digest> digests; - if (UPstream::master(parentCommunicator)) - { - digests.resize(numProcs); - } - - // Could also add lowercase etc, but since hostName() - // will be consistent within the same node, there is no need. - SHA1Digest myDigest(SHA1(hostName()).digest()); - - // The fixed-length digest allows use of MPI_Gather - UPstream::mpiGather - ( - myDigest.cdata_bytes(), // Send - digests.data_bytes(), // Recv - SHA1Digest::size_bytes(), // Num send/recv data per rank - parentCommunicator - ); - - List<int> hostIDs(numProcs); - - // Compact numbering of hosts. - if (UPstream::master(parentCommunicator)) - { - DynamicList<SHA1Digest> uniqDigests; - - forAll(digests, proci) - { - const SHA1Digest& dig = digests[proci]; - - hostIDs[proci] = uniqDigests.find(dig); - - if (hostIDs[proci] < 0) - { - // First appearance of host. Encode as leader - hostIDs[proci] = -(uniqDigests.size() + 1); - uniqDigests.push_back(dig); - } - } - } - - UPstream::broadcast - ( - hostIDs.data_bytes(), - hostIDs.size_bytes(), - parentCommunicator, - UPstream::masterNo() - ); - - return hostIDs; -} - -} // End namespace Foam - - // * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * // void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads) @@ -158,7 +82,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads) freeCommunicator(UPstream::commGlobal()); // 0: COMM_WORLD : commWorld() / commGlobal() - comm = allocateCommunicator(-1, singleProc, false); + comm = newCommunicator(-1, singleProc, false); if (comm != UPstream::commGlobal()) { // Failed sanity check @@ -169,7 +93,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads) } // 1: COMM_SELF - comm = allocateCommunicator(-2, singleProc, false); + comm = newCommunicator(-2, singleProc, false); if (comm != UPstream::commSelf()) { // Failed sanity check @@ -192,7 +116,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads) freeCommunicator(UPstream::commGlobal()); // 0: COMM_WORLD : commWorld() / commGlobal() - comm = allocateCommunicator(-1, labelRange(nProcs), true); + comm = newCommunicator(-1, labelRange(nProcs), true); if (comm != UPstream::commGlobal()) { // Failed sanity check @@ -202,10 +126,12 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads) << Foam::exit(FatalError); } + const int globalRanki = UPstream::myProcNo(UPstream::commGlobal()); + // 1: COMM_SELF // - Processor number wrt world communicator - singleProc.start() = UPstream::myProcNo(UPstream::commGlobal()); - comm = allocateCommunicator(-2, singleProc, true); + singleProc.start() = globalRanki; + comm = newCommunicator(-2, singleProc, true); if (comm != UPstream::commSelf()) { // Failed sanity check @@ -215,7 +141,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads) << Foam::exit(FatalError); } - Pout.prefix() = '[' + Foam::name(myProcNo(commGlobal())) + "] "; + Pout.prefix() = '[' + std::to_string(globalRanki) + "] "; Perr.prefix() = Pout.prefix(); } @@ -243,6 +169,7 @@ Foam::label Foam::UPstream::getAvailableCommIndex(const label parentIndex) parentComm_[index] = parentIndex; procIDs_[index].clear(); + // Sizing and filling are demand-driven linearCommunication_[index].clear(); treeCommunication_[index].clear(); } @@ -255,15 +182,16 @@ Foam::label Foam::UPstream::getAvailableCommIndex(const label parentIndex) parentComm_.push_back(parentIndex); procIDs_.emplace_back(); - linearCommunication_.emplace_back(); - treeCommunication_.emplace_back(); + // Sizing and filling are demand-driven + linearCommunication_.emplace_back(index); + treeCommunication_.emplace_back(index); } return index; } -Foam::label Foam::UPstream::allocateCommunicator +Foam::label Foam::UPstream::newCommunicator ( const label parentIndex, const labelRange& subRanks, @@ -274,57 +202,46 @@ Foam::label Foam::UPstream::allocateCommunicator if (debug) { - Perr<< "Allocating communicator " << index << nl - << " parent : " << parentIndex << nl - << " procs : " << subRanks << nl + Perr<< "Allocate communicator [" + << index << "] from [" << parentIndex + << "] ranks : " << subRanks << nl << endl; } // Initially treat as master, // overwritten by allocateCommunicatorComponents myProcNo_[index] = UPstream::masterNo(); + auto& procIds = procIDs_[index]; // The selected sub-ranks. // - transcribe from label to int - // - already in incremental order - auto& procIds = procIDs_[index]; - procIds.resize_nocopy(subRanks.size()); - - label numSubRanks = 0; - for (const label subRanki : subRanks) + // - already in monotonic order + if + ( + (withComponents && UPstream::parRun()) + ? (parentIndex < 0 || subRanks.contains(myProcNo_[parentIndex])) + : !subRanks.empty() + ) { - procIds[numSubRanks] = subRanki; - ++numSubRanks; + procIds.resize_nocopy(subRanks.size()); + std::iota(procIds.begin(), procIds.end(), subRanks.start()); + } + else + { + // Not involved + procIds.clear(); } - // Sizing and filling are demand-driven - linearCommunication_[index].clear(); - treeCommunication_[index].clear(); - - if (withComponents && parRun()) + if (withComponents && UPstream::parRun()) { allocateCommunicatorComponents(parentIndex, index); - - // Could 'remember' locations of uninvolved ranks - /// if (myProcNo_[index] < 0 && parentIndex >= 0) - /// { - /// // As global rank - /// myProcNo_[index] = -(myProcNo_[worldComm]+1); - /// - /// OR: - /// // As parent rank number - /// if (myProcNo_[parentIndex] >= 0) - /// { - /// myProcNo_[index] = -(myProcNo_[parentIndex]+1); - /// } - /// } } return index; } -Foam::label Foam::UPstream::allocateCommunicator +Foam::label Foam::UPstream::newCommunicator ( const label parentIndex, const labelUList& subRanks, @@ -335,236 +252,288 @@ Foam::label Foam::UPstream::allocateCommunicator if (debug) { - Perr<< "Allocating communicator " << index << nl - << " parent : " << parentIndex << nl - << " procs : " << flatOutput(subRanks) << nl + Perr<< "Allocate communicator [" + << index << "] from [" << parentIndex + << "] ranks : " << flatOutput(subRanks) << nl << endl; } // Initially treat as master, // overwritten by allocateCommunicatorComponents myProcNo_[index] = UPstream::masterNo(); - - // The selected sub-ranks. - // - transcribe from label to int. Treat negative values as 'ignore' - // - enforce incremental order (so index is rank in next communicator) - auto& procIds = procIDs_[index]; - procIds.resize_nocopy(subRanks.size()); - label numSubRanks = 0; - bool monotonicOrder = true; - for (const label subRanki : subRanks) + // The selected sub-ranks. + // - transcribe from label to int + // - sort into monotonic order (if needed) + if + ( + (withComponents && UPstream::parRun()) + ? (parentIndex < 0 || subRanks.contains(myProcNo_[parentIndex])) + : !subRanks.empty() + ) { - if (subRanki < 0) + procIds.resize_nocopy(subRanks.size()); + + label count = 0; + bool monotonicOrder = true; + for (const auto ranki : subRanks) { - continue; + if (ranki < 0) + { + continue; + } + // Could also flag/ignore out-of-range ranks + // (ranki >= numProcs) + + if (monotonicOrder && count) + { + monotonicOrder = (procIds[count-1] < ranki); + } + + procIds[count] = ranki; + ++count; } - if (monotonicOrder && numSubRanks) + + if (!monotonicOrder) { - monotonicOrder = (procIds[numSubRanks-1] < subRanki); + auto last = procIds.begin() + count; + std::sort(procIds.begin(), last); + last = std::unique(procIds.begin(), last); + count = label(last - procIds.begin()); } - procIds[numSubRanks] = subRanki; - ++numSubRanks; + procIds.resize(count); } - - if (!monotonicOrder) + else { - auto last = procIds.begin() + numSubRanks; - std::sort(procIds.begin(), last); - last = std::unique(procIds.begin(), last); - numSubRanks = label(last - procIds.begin()); + // Not involved + procIds.clear(); } - procIds.resize(numSubRanks); - - // Sizing and filling are demand-driven - linearCommunication_[index].clear(); - treeCommunication_[index].clear(); - - if (withComponents && parRun()) + if (withComponents && UPstream::parRun()) { allocateCommunicatorComponents(parentIndex, index); - - // Could 'remember' locations of uninvolved ranks - /// if (myProcNo_[index] < 0 && parentIndex >= 0) - /// { - /// // As global rank - /// myProcNo_[index] = -(myProcNo_[worldComm]+1); - /// - /// OR: - /// // As parent rank number - /// if (myProcNo_[parentIndex] >= 0) - /// { - /// myProcNo_[index] = -(myProcNo_[parentIndex]+1); - /// } - /// } } return index; } -Foam::label Foam::UPstream::allocateInterHostCommunicator +Foam::label Foam::UPstream::dupCommunicator ( - const label parentCommunicator + const label parentIndex ) { - List<int> hostIDs = getHostGroupIds(parentCommunicator); + #ifdef FULLDEBUG + if (FOAM_UNLIKELY(parentIndex < 0)) + { + // Failed sanity check + FatalErrorInFunction + << "Attempted to duplicate an invalid communicator: " + << parentIndex + << Foam::exit(FatalError); + } + #endif - DynamicList<label> subRanks(hostIDs.size()); + const label index = getAvailableCommIndex(parentIndex); - // From master to host-leader. Ranks between hosts. - forAll(hostIDs, proci) + if (debug) { - // Is host leader? - if (hostIDs[proci] < 0) - { - subRanks.push_back(proci); - } + Perr<< "Duplicate communicator [" + << index << "] from [" << parentIndex << "]" << endl; + } + + // Initially treat as unknown, + // overwritten by dupCommunicatorComponents + myProcNo_[index] = -1; + procIDs_[index].clear(); + + if (UPstream::parRun()) + { + dupCommunicatorComponents(parentIndex, index); } - return allocateCommunicator(parentCommunicator, subRanks); + return index; } -Foam::label Foam::UPstream::allocateIntraHostCommunicator +Foam::label Foam::UPstream::splitCommunicator ( - const label parentCommunicator + const label parentIndex, + const int colour ) { - List<int> hostIDs = getHostGroupIds(parentCommunicator); - - DynamicList<label> subRanks(hostIDs.size()); + #ifdef FULLDEBUG + if (FOAM_UNLIKELY(parentIndex < 0)) + { + // Failed sanity check + FatalErrorInFunction + << "Attempted to split an invalid communicator: " + << parentIndex + << Foam::exit(FatalError); + } + #endif - // Intra-host ranks. Ranks within a host - int myHostId = hostIDs[UPstream::myProcNo(parentCommunicator)]; - if (myHostId < 0) myHostId = -(myHostId + 1); // Flip to generic id + const label index = getAvailableCommIndex(parentIndex); - forAll(hostIDs, proci) + if (debug) { - int id = hostIDs[proci]; - if (id < 0) id = -(id + 1); // Flip to generic id + Perr<< "Split communicator [" + << index << "] from [" << parentIndex + << "] using colour=" << colour << endl; + } - if (id == myHostId) - { - subRanks.push_back(proci); - } + // Initially treat as unknown, + // overwritten by splitCommunicatorComponents + myProcNo_[index] = -1; + procIDs_[index].clear(); + + if (UPstream::parRun()) + { + splitCommunicatorComponents(parentIndex, index, colour); } - return allocateCommunicator(parentCommunicator, subRanks); + return index; } -bool Foam::UPstream::allocateHostCommunicatorPairs() +bool Foam::UPstream::setHostCommunicators(const int numPerNode) { - // Use the world communicator (not global communicator) - const label parentCommunicator = worldComm; + // Uses the world communicator (not global communicator) // Skip if non-parallel - if (!parRun()) + if (!UPstream::parRun()) { + numNodes_ = 1; return false; } - if (interHostComm_ >= 0 || intraHostComm_ >= 0) + if (FOAM_UNLIKELY(commInterNode_ >= 0 || commLocalNode_ >= 0)) { // Failed sanity check FatalErrorInFunction - << "Host communicator(s) already created!" << endl - << Foam::exit(FatalError); + << "Node communicator(s) already created!" << endl + << Foam::abort(FatalError); return false; } - interHostComm_ = getAvailableCommIndex(parentCommunicator); - intraHostComm_ = getAvailableCommIndex(parentCommunicator); + commInterNode_ = getAvailableCommIndex(constWorldComm_); + commLocalNode_ = getAvailableCommIndex(constWorldComm_); + + // Overwritten later + myProcNo_[commInterNode_] = UPstream::masterNo(); + myProcNo_[commLocalNode_] = UPstream::masterNo(); // Sorted order, purely cosmetic - if (intraHostComm_ < interHostComm_) + if (commLocalNode_ < commInterNode_) { - std::swap(intraHostComm_, interHostComm_); + std::swap(commLocalNode_, commInterNode_); } - // Overwritten later - myProcNo_[intraHostComm_] = UPstream::masterNo(); - myProcNo_[interHostComm_] = UPstream::masterNo(); - if (debug) { - Perr<< "Allocating host communicators " - << interHostComm_ << ", " << intraHostComm_ << nl - << " parent : " << parentCommunicator << nl + Perr<< "Allocating node communicators " + << commInterNode_ << ", " << commLocalNode_ + << " on parent : " << constWorldComm_ << nl << endl; } - List<int> hostIDs = getHostGroupIds(parentCommunicator); + const int worldRank = UPstream::myProcNo(constWorldComm_); + const int worldSize = UPstream::nProcs(constWorldComm_); - DynamicList<int> subRanks(hostIDs.size()); - - // From master to host-leader. Ranks between hosts. + if (numPerNode > 1) { - subRanks.clear(); - forAll(hostIDs, proci) + // Manual splitting based on given number of ranks per node + const int myNodeId = (worldRank/numPerNode); + + // Establish the topology { - // Is host leader? - if (hostIDs[proci] < 0) - { - subRanks.push_back(proci); + DynamicList<int> nodeGroup(numPerNode); + DynamicList<int> nodeLeader(1+worldSize/numPerNode); - // Flip to generic host id - hostIDs[proci] = -(hostIDs[proci] + 1); + for (int proci = 0; proci < worldSize; ++proci) + { + if (myNodeId == (proci/numPerNode)) + { + nodeGroup.push_back(proci); + } + + if ((proci % numPerNode) == 0) + { + // Local rank 0 is a node leader + nodeLeader.push_back(proci); + } } - } - - const label index = interHostComm_; - - // Direct copy (subRanks is also int) - procIDs_[index] = subRanks; - // Implicitly: withComponents = true - if (parRun()) // Already checked... - { - allocateCommunicatorComponents(parentCommunicator, index); + procIDs_[commInterNode_] = std::move(nodeLeader); + procIDs_[commLocalNode_] = std::move(nodeGroup); } - - // Sizing and filling are demand-driven - linearCommunication_[index].clear(); - treeCommunication_[index].clear(); } - - // Intra-host ranks. Ranks within a host + else { - int myHostId = hostIDs[UPstream::myProcNo(parentCommunicator)]; - if (myHostId < 0) myHostId = -(myHostId + 1); // Flip to generic id + // Determine inter-host/inter-host grouping based on the SHA1 of the + // hostnames. This allows a single initial Allgather to establish + // the overall topology. The alternative is to use MPI_Split_comm_type() + // on SHARED and then MPI_Comm_split() on the leader ranks. + + // Could also add lowercase etc, but since hostName() + // will be consistent within the same node, there is no need. + const SHA1Digest myDigest(SHA1(hostName()).digest()); + + List<SHA1Digest> digests(worldSize); + digests[worldRank] = myDigest; + + // The fixed-length digest allows use of MPI_Allgather. + UPstream::mpiAllGather + ( + digests.data_bytes(), // Send/Rev + SHA1Digest::size_bytes(), // Num send/recv data per rank + UPstream::constWorldComm_ + ); - subRanks.clear(); - forAll(hostIDs, proci) + // Establish the topology { - int id = hostIDs[proci]; - if (id < 0) id = -(id + 1); // Flip to generic id + DynamicList<int> nodeGroup(64); + DynamicList<int> nodeLeader(64); + DynamicList<SHA1Digest> uniqDigests(64); - if (id == myHostId) + for (int proci = 0; proci < worldSize; ++proci) { - subRanks.push_back(proci); + const auto& dig = digests[proci]; + + if (myDigest == dig) + { + nodeGroup.push_back(proci); + } + + if (!uniqDigests.contains(dig)) + { + // First appearance of host + uniqDigests.push_back(dig); + nodeLeader.push_back(proci); + } } + + procIDs_[commInterNode_] = std::move(nodeLeader); + procIDs_[commLocalNode_] = std::move(nodeGroup); } + } - const label index = intraHostComm_; - // Direct copy (subRanks is also int) - procIDs_[index] = subRanks; + // Capture the size (number of nodes) before doing anything further + numNodes_ = procIDs_[commInterNode_].size(); - // Implicitly: withComponents = true - if (parRun()) // Already checked... - { - allocateCommunicatorComponents(parentCommunicator, index); - } + // ~~~~~~~~~ + // IMPORTANT + // ~~~~~~~~~ + // Always retain knowledge of the inter-node leaders, + // even if this process is not on that communicator. + // This will help when constructing topology-aware communication. - // Sizing and filling are demand-driven - linearCommunication_[index].clear(); - treeCommunication_[index].clear(); - } + // Allocate backend MPI components + allocateCommunicatorComponents(constWorldComm_, commInterNode_); + allocateCommunicatorComponents(constWorldComm_, commLocalNode_); return true; } @@ -582,10 +551,6 @@ void Foam::UPstream::freeCommunicator return; } - // Update demand-driven communicators - if (interHostComm_ == communicator) interHostComm_ = -1; - if (intraHostComm_ == communicator) intraHostComm_ = -1; - if (debug) { Perr<< "Communicators : Freeing communicator " << communicator @@ -651,26 +616,24 @@ Foam::label Foam::UPstream::procNo } -const Foam::List<Foam::UPstream::commsStruct>& +const Foam::UPstream::commsStructList& Foam::UPstream::linearCommunication(const label communicator) { if (linearCommunication_[communicator].empty()) { - linearCommunication_[communicator] = - List<commsStruct>(UPstream::nProcs(communicator)); + linearCommunication_[communicator].init(communicator); } return linearCommunication_[communicator]; } -const Foam::List<Foam::UPstream::commsStruct>& +const Foam::UPstream::commsStructList& Foam::UPstream::treeCommunication(const label communicator) { if (treeCommunication_[communicator].empty()) { - treeCommunication_[communicator] = - List<commsStruct>(UPstream::nProcs(communicator)); + treeCommunication_[communicator].init(communicator); } return treeCommunication_[communicator]; @@ -683,50 +646,28 @@ void Foam::UPstream::printCommTree(const label communicator) if (UPstream::master(communicator)) { - commsStruct::printGraph(Info(), comms); - } -} - - -Foam::label Foam::UPstream::commIntraHost() -{ - if (!parRun()) - { - return worldComm; // Don't know anything better to return - } - if (intraHostComm_ < 0) - { - allocateHostCommunicatorPairs(); - } - return intraHostComm_; -} - - -Foam::label Foam::UPstream::commInterHost() -{ - if (!parRun()) - { - return worldComm; // Don't know anything better to return - } - if (interHostComm_ < 0) - { - allocateHostCommunicatorPairs(); + comms.printGraph(Info()); } - return interHostComm_; } -bool Foam::UPstream::hasHostComms() +bool Foam::UPstream::usingNodeComms(const label communicator) { - return (intraHostComm_ >= 0 || interHostComm_ >= 0); -} - + // Starting point must be "real" world-communicator + // ("real" means without any local trickery with worldComm) + // Avoid corner cases: + // - everthing is on one node + // - everthing is on different nodes -void Foam::UPstream::clearHostComms() -{ - // Always with Pstream - freeCommunicator(intraHostComm_, true); - freeCommunicator(interHostComm_, true); + return + ( + parRun_ && (constWorldComm_ == communicator) + && (nodeCommsControl_ > 0) + // More than one node and above defined threshold + && (numNodes_ > 1) && (numNodes_ >= nodeCommsMin_) + // Some processes do share nodes + && (numNodes_ < procIDs_[constWorldComm_].size()) + ); } @@ -749,17 +690,19 @@ Foam::DynamicList<Foam::List<int>> Foam::UPstream::procIDs_(16); Foam::DynamicList<Foam::label> Foam::UPstream::parentComm_(16); Foam::DynamicList<Foam::label> Foam::UPstream::freeComms_; -Foam::DynamicList<Foam::List<Foam::UPstream::commsStruct>> +Foam::DynamicList<Foam::UPstream::commsStructList> Foam::UPstream::linearCommunication_(16); -Foam::DynamicList<Foam::List<Foam::UPstream::commsStruct>> +Foam::DynamicList<Foam::UPstream::commsStructList> Foam::UPstream::treeCommunication_(16); -Foam::label Foam::UPstream::intraHostComm_(-1); -Foam::label Foam::UPstream::interHostComm_(-1); +Foam::label Foam::UPstream::constWorldComm_(0); +Foam::label Foam::UPstream::numNodes_(1); +Foam::label Foam::UPstream::commInterNode_(-1); +Foam::label Foam::UPstream::commLocalNode_(-1); -Foam::label Foam::UPstream::worldComm(0); +Foam::label Foam::UPstream::worldComm(0); // Initially same as constWorldComm_ Foam::label Foam::UPstream::warnComm(-1); @@ -767,16 +710,39 @@ Foam::label Foam::UPstream::warnComm(-1); // These are overwritten in parallel mode (by UPstream::setParRun()) const Foam::label nPredefinedComm = []() { - // 0: COMM_WORLD : commWorld() / commGlobal() - (void) Foam::UPstream::allocateCommunicator(-1, Foam::labelRange(1), false); + // 0: COMM_WORLD : commGlobal(), constWorldComm_, worldComm + (void) Foam::UPstream::newCommunicator(-1, Foam::labelRange(1), false); // 1: COMM_SELF - (void) Foam::UPstream::allocateCommunicator(-2, Foam::labelRange(1), false); + (void) Foam::UPstream::newCommunicator(-2, Foam::labelRange(1), false); return Foam::UPstream::nComms(); }(); +int Foam::UPstream::nodeCommsControl_ +( + Foam::debug::optimisationSwitch("nodeComms", 1) +); +registerOptSwitch +( + "nodeComms", + int, + Foam::UPstream::nodeCommsControl_ +); + +int Foam::UPstream::nodeCommsMin_ +( + Foam::debug::optimisationSwitch("nodeComms.min", 0) +); +registerOptSwitch +( + "nodeComms.min", + int, + Foam::UPstream::nodeCommsMin_ +); + + bool Foam::UPstream::floatTransfer ( Foam::debug::optimisationSwitch("floatTransfer", 0) diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H index e737bc71ff97fadb540178e26224fedb7d935796..b7df5c279ee4da7d96784d24025b54242e3783bb 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H @@ -151,19 +151,10 @@ public: // Member Functions - //- Print un-directed graph in graphviz dot format - static void printGraph - ( - Ostream& os, - const UList<UPstream::commsStruct>& comms, - const label proci = 0 // starting node - ); - - // Access //- The number of processors addressed by the structure - label nProcs() const; + label nProcs() const noexcept; //- The procID of the processor \em directly above label above() const noexcept { return above_; } @@ -188,8 +179,14 @@ public: //- Reset to default constructed state void reset(); - //- Reset with automatic linear/tree selection - void reset(const label procID, const label numProcs); + //- Reset (automatic linear/tree selection), + //- possibly with communicator-specific adjustments + void reset + ( + const label procID, + const label numProcs, + const label comm = -1 + ); // Member / Friend Operators @@ -200,6 +197,67 @@ public: friend Ostream& operator<<(Ostream&, const commsStruct&); }; + //- Collection of communication structures + class commsStructList + { + // Private Data + + //- The communicator index + label comm_; + + //- The communication tree + List<commsStruct> tree_; + + public: + + // Constructors + + //- Construct empty with invalid communicator + commsStructList() noexcept : comm_(-1) {} + + //- Construct empty with given communicator + commsStructList(label comm) noexcept : comm_(comm) {} + + + // Static Functions + + //- An empty structure. Used for placeholders etc. + static const commsStructList& null(); + + + // Member Functions + + //- True if communicator is non-negative (ie, was assigned) + bool good() const noexcept { return (comm_ >= 0); } + + //- The communicator label + label comm() const noexcept { return comm_; } + + //- Clear the list + void clear() { return tree_.clear(); } + + //- True if the list is empty + bool empty() const noexcept { return tree_.empty(); } + + //- The number of entries + label size() const noexcept { return tree_.size(); } + + //- Reset communicator index and clear demand-driven entries + void init(const label comm); + + //- Get existing or create (demand-driven) entry + const UPstream::commsStruct& get(const label proci) const; + + //- Get existing or create (demand-driven) entry + const UPstream::commsStruct& operator[](const label proci) const + { + return get(proci); + } + + //- Print un-directed graph in graphviz dot format + void printGraph(Ostream& os, label proci = 0) const; + }; + private: @@ -220,18 +278,28 @@ private: //- Standard transfer message type static int msgType_; + //- Index to the world-communicator as defined at startup + //- (after any multi-world definitions). + //- Is unaffected by any later changes to worldComm. + static label constWorldComm_; + + //- The number of shared/host nodes in the (const) world communicator. + static label numNodes_; + + //- Index to the inter-node communicator (between nodes), + //- defined based on constWorldComm_ + static label commInterNode_; + + //- Index to the intra-host communicator (within a node), + //- defined based on constWorldComm_ + static label commLocalNode_; + //- Names of all worlds static wordList allWorlds_; //- Per processor the world index (into allWorlds_) static labelList worldIDs_; - //- Intra-host communicator - static label intraHostComm_; - - //- Inter-host communicator (between host leaders) - static label interHostComm_; - // Communicator specific data @@ -248,10 +316,10 @@ private: static DynamicList<label> freeComms_; //- Linear communication schedule - static DynamicList<List<commsStruct>> linearCommunication_; + static DynamicList<commsStructList> linearCommunication_; //- Multi level communication schedule - static DynamicList<List<commsStruct>> treeCommunication_; + static DynamicList<commsStructList> treeCommunication_; // Private Member Functions @@ -259,24 +327,59 @@ private: //- Set data for parallel running static void setParRun(const label nProcs, const bool haveThreads); - //- Initialise entries for new communicator. Return the index + //- Initialise entries for new communicator. + // + // Resets corresponding entry in myProcNo_, procIDs_, + // linearCommunication_, treeCommunication_ + // \return the communicator index static label getAvailableCommIndex(const label parentIndex); - //- Allocate MPI components of communicator with given index + //- Define inter-host/intra-host communicators (uses commConstWorld). + // Optionally specify a given number per node. + static bool setHostCommunicators(const int numPerNode = 0); + + //- Define inter-host/intra-host communicators based on + //- shared-memory information. Uses comm-world. + static bool setSharedMemoryCommunicators(); + + //- Allocate MPI components of communicator with given index. + // This represents a "top-down" approach, creating a communicator + // based on the procIDs_ groupings. + // + // Modifies myProcNo_, reads and modifies procIDs_ static void allocateCommunicatorComponents ( const label parentIndex, const label index ); + //- Allocate MPI components as duplicate of the parent communicator + // + // Modifies myProcNo_, procIDs_ + static void dupCommunicatorComponents + ( + const label parentIndex, + const label index + ); + + //- Allocate MPI components for the given index by splitting + //- the parent communicator on the given \em colour. + // This represents a "bottom-up" approach, when the individual ranks + // only know which group they should belong to, but don't yet know + // which other ranks will be in their group. + // + // Modifies myProcNo_, procIDs_ + static void splitCommunicatorComponents + ( + const label parentIndex, + const label index, + const int colour + ); + //- Free MPI components of communicator. // Does not touch the first two communicators (SELF, WORLD) static void freeCommunicatorComponents(const label index); - //- Allocate inter-host, intra-host communicators - //- with comm-world as parent - static bool allocateHostCommunicatorPairs(); - public: @@ -286,6 +389,18 @@ public: // Static Data + //- Use of host/node topology-aware routines + // 0: disabled + // 1: split by hostname [default] + // 2: split by shared + // >=4: (debug) split with given number per node + static int nodeCommsControl_; + + //- Minimum number of nodes before topology-aware routines are enabled + // <= 2 : always + // >= 3 : when there are more than N nodes + static int nodeCommsMin_; + //- Should compact transfer be used in which floats replace doubles //- reducing the bandwidth requirement at the expense of some loss //- in accuracy @@ -323,12 +438,19 @@ public: //- Debugging: warn for use of any communicator differing from warnComm static label warnComm; - //- Communicator for all ranks, irrespective of any local worlds + //- Communicator for all ranks, irrespective of any local worlds. + // This value \em never changes during a simulation. static constexpr label commGlobal() noexcept { return 0; } //- Communicator within the current rank only + // This value \em never changes during a simulation. static constexpr label commSelf() noexcept { return 1; } + //- Communicator for all ranks (respecting any local worlds). + // This value \em never changes after startup. Unlike the commWorld() + // which can be temporarily overriden. + static label commConstWorld() noexcept { return constWorldComm_; } + //- Communicator for all ranks (respecting any local worlds) static label commWorld() noexcept { return worldComm; } @@ -343,6 +465,7 @@ public: //- Alter communicator debugging setting. //- Warns for use of any communicator differing from specified. + //- Negative values disable. // \returns the previous warn index static label commWarn(const label communicator) noexcept { @@ -360,17 +483,33 @@ public: // Host Communicators - //- Demand-driven: Intra-host communicator (respects any local worlds) - static label commIntraHost(); + //- Communicator between nodes/hosts (respects any local worlds) + static label commInterNode() noexcept + { + return (parRun_ ? commInterNode_ : constWorldComm_); + } - //- Demand-driven: Inter-host communicator (respects any local worlds) - static label commInterHost(); + //- Communicator within the node/host (respects any local worlds) + static label commLocalNode() noexcept + { + return (parRun_ ? commLocalNode_ : constWorldComm_); + } - //- Test for presence of any intra or inter host communicators - static bool hasHostComms(); + //- Both inter-node and local-node communicators have been created + static bool hasNodeCommunicators() noexcept + { + return + ( + (commInterNode_ > constWorldComm_) + && (commLocalNode_ > constWorldComm_) + ); + } - //- Remove any existing intra and inter host communicators - static void clearHostComms(); + //- True if node topology-aware routines have been enabled, + //- it is running in parallel, the starting point is the + //- world-communicator and it is not an odd corner case + //- (ie, all processes on one node, all processes on different nodes) + static bool usingNodeComms(const label communicator = worldComm); // Constructors @@ -384,9 +523,8 @@ public: // Member Functions - //- Allocate new communicator with contiguous sub-ranks - //- on the parent communicator. - static label allocateCommunicator + //- Create new communicator with sub-ranks on the parent communicator + static label newCommunicator ( //! The parent communicator const label parent, @@ -398,8 +536,8 @@ public: const bool withComponents = true ); - //- Allocate new communicator with sub-ranks on the parent communicator - static label allocateCommunicator + //- Creaet new communicator with sub-ranks on the parent communicator + static label newCommunicator ( //! The parent communicator const label parent, @@ -411,27 +549,36 @@ public: const bool withComponents = true ); - //- Free a previously allocated communicator. - // Ignores placeholder (negative) communicators. - static void freeCommunicator + //- Duplicate the parent communicator + // + // Always calls dupCommunicatorComponents() internally + static label dupCommunicator ( - const label communicator, - const bool withComponents = true + //! The parent communicator + const label parent ); - //- Allocate an inter-host communicator - static label allocateInterHostCommunicator + //- Allocate a new communicator by splitting the parent communicator + //- on the given \em colour. + // Always calls splitCommunicatorComponents() internally + static label splitCommunicator ( - const label parentCommunicator = worldComm + //! The parent communicator + const label parent, + + //! The colouring to select which ranks to include. + //! Negative values correspond to 'ignore' + const int colour ); - //- Allocate an intra-host communicator - static label allocateIntraHostCommunicator + //- Free a previously allocated communicator. + // Ignores placeholder (negative) communicators. + static void freeCommunicator ( - const label parentCommunicator = worldComm + const label communicator, + const bool withComponents = true ); - //- Wrapper class for allocating/freeing communicators. Always invokes //- allocateCommunicatorComponents() and freeCommunicatorComponents() class communicator @@ -457,12 +604,11 @@ public: ( //! The parent communicator const label parentComm, - //! The contiguous sub-ranks of parent to use const labelRange& subRanks ) : - comm_(UPstream::allocateCommunicator(parentComm, subRanks)) + comm_(UPstream::newCommunicator(parentComm, subRanks)) {} //- Allocate communicator for sub-ranks on given parent @@ -470,14 +616,38 @@ public: ( //! The parent communicator const label parentComm, - //! The sub-ranks of parent to use (negative values ignored) const labelUList& subRanks ) : - comm_(UPstream::allocateCommunicator(parentComm, subRanks)) + comm_(UPstream::newCommunicator(parentComm, subRanks)) {} + //- Factory Method : + //- Duplicate the given communicator + static communicator duplicate(const label parentComm) + { + communicator c; + c.comm_ = UPstream::dupCommunicator(parentComm); + return c; + } + + //- Factory Method : + //- Split the communicator on the given \em colour. + static communicator split + ( + //! The parent communicator + const label parentComm, + //! The colouring to select which ranks to include. + //! Negative values correspond to 'ignore' + const int colour + ) + { + communicator c; + c.comm_ = UPstream::splitCommunicator(parentComm, colour); + return c; + } + //- Free allocated communicator ~communicator() { UPstream::freeCommunicator(comm_); } @@ -498,14 +668,14 @@ public: void reset(label parent, const labelRange& subRanks) { UPstream::freeCommunicator(comm_); - comm_ = UPstream::allocateCommunicator(parent, subRanks); + comm_ = UPstream::newCommunicator(parent, subRanks); } //- Allocate with sub-ranks of parent communicator void reset(label parent, const labelUList& subRanks) { UPstream::freeCommunicator(comm_); - comm_ = UPstream::allocateCommunicator(parent, subRanks); + comm_ = UPstream::newCommunicator(parent, subRanks); } //- Take ownership, free allocated communicator @@ -805,7 +975,7 @@ public: } //- Rank of this process in the communicator (starting from masterNo()). - //- Can be negative if the process is not a rank in the communicator + //- Negative if the process is not a rank in the communicator. static int myProcNo(const label communicator = worldComm) { return myProcNo_[communicator]; @@ -817,11 +987,11 @@ public: return myProcNo_[communicator] == masterNo(); } - //- True if process corresponds to any rank (master or sub-rank) + //- True if process corresponds to \b any rank (master or sub-rank) //- in the given communicator static bool is_rank(const label communicator = worldComm) { - return myProcNo_[communicator] >= 0; + return myProcNo_[communicator] >= masterNo(); } //- True if process corresponds to a sub-rank in the given communicator @@ -842,6 +1012,12 @@ public: ); } + //- The number of shared/host nodes in the (const) world communicator. + static label numNodes() noexcept + { + return numNodes_; + } + //- The parent communicator static label parent(const label communicator) { @@ -899,15 +1075,13 @@ public: } //- Communication schedule for linear all-to-master (proc 0) - static const List<commsStruct>& - linearCommunication + static const commsStructList& linearCommunication ( const label communicator = worldComm ); //- Communication schedule for tree all-to-master (proc 0) - static const List<commsStruct>& - treeCommunication + static const commsStructList& treeCommunication ( const label communicator = worldComm ); @@ -915,7 +1089,7 @@ public: //- Communication schedule for all-to-master (proc 0) as //- linear/tree/none with switching based on UPstream::nProcsSimpleSum //- and the is_parallel() state - static const List<commsStruct>& whichCommunication + static const commsStructList& whichCommunication ( const label communicator = worldComm ) @@ -930,8 +1104,8 @@ public: return ( np <= 1 - ? List<commsStruct>::null() - : np < nProcsSimpleSum + ? commsStructList::null() + : (np <= 2 || np < nProcsSimpleSum) ? linearCommunication(communicator) : treeCommunication(communicator) ); @@ -983,7 +1157,7 @@ public: static void shutdown(int errNo = 0); //- Call MPI_Abort with no other checks or cleanup - static void abort(); + static void abort(int errNo = 1); //- Shutdown (finalize) MPI as required and exit program with errNo. static void exit(int errNo = 1); @@ -1205,27 +1379,43 @@ public: // Housekeeping - //- Wait for all requests to finish. - // \deprecated(2023-01) Probably not what you want. - // Should normally be restricted to a particular starting request. - FOAM_DEPRECATED_FOR(2023-01, "waitRequests(int) method") - static void waitRequests() { waitRequests(0); } - - //- Process index of first sub-process - // \deprecated(2020-09) use subProcs() method instead - FOAM_DEPRECATED_FOR(2020-09, "subProcs() method") - static constexpr int firstSlave() noexcept + //- Create new communicator with sub-ranks on the parent communicator + // \deprecated(2025-02) + static label allocateCommunicator + ( + const label parent, + const labelRange& subRanks, + const bool withComponents = true + ) { - return 1; + return newCommunicator(parent, subRanks, withComponents); } - //- Process index of last sub-process - // \deprecated(2020-09) use subProcs() method instead - FOAM_DEPRECATED_FOR(2020-09, "subProcs() or allProcs() method") - static int lastSlave(const label communicator = worldComm) + //- Create new communicator with sub-ranks on the parent communicator + // \deprecated(2025-02) + static label allocateCommunicator + ( + const label parent, + const labelUList& subRanks, + const bool withComponents = true + ) { - return nProcs(communicator) - 1; + return newCommunicator(parent, subRanks, withComponents); } + + //- Communicator between nodes (respects any local worlds) + FOAM_DEPRECATED_FOR(2025-02, "commInterNode()") + static label commInterHost() noexcept { return commInterNode(); } + + //- Communicator within the node (respects any local worlds) + FOAM_DEPRECATED_FOR(2025-02, "commLocalNode()") + static label commIntraHost() noexcept { return commLocalNode(); } + + //- Wait for all requests to finish. + // \deprecated(2023-01) Probably not what you want. + // Should normally be restricted to a particular starting request. + FOAM_DEPRECATED_FOR(2023-01, "waitRequests(int) method") + static void waitRequests() { waitRequests(0); } }; @@ -1447,18 +1637,6 @@ public: Ostream& operator<<(Ostream&, const UPstream::commsStruct&); -// * * * * * * * * * * * * Template Specialisations * * * * * * * * * * * * // - -// Template specialisation for access of commsStruct -template<> -UPstream::commsStruct& -UList<UPstream::commsStruct>::operator[](const label procID); - -template<> -const UPstream::commsStruct& -UList<UPstream::commsStruct>::operator[](const label procID) const; - - // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // } // End namespace Foam diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C b/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C index 7aa432170bb2e2c281b0dfabfd9dfc1dfc95d381..72a41bba26823d08b76b6fe08157c33000aa7804 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2016 OpenFOAM Foundation - Copyright (C) 2021-2023 OpenCFD Ltd. + Copyright (C) 2021-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -28,6 +28,178 @@ License #include "UPstream.H" +// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * // + +namespace Foam +{ + +// This outputs as depth-first, but graphviz sorts that for us +static void printGraph_impl +( + Ostream& os, + const UPstream::commsStructList& comms, + const label proci, + label depth, + const label maxDepth = 1024 +) +{ + if (proci >= comms.size()) + { + // Corner case when only a single rank involved + // (eg, for node-local communicator) + return; + } + + const auto& below = comms[proci].below(); + + if (proci == 0) + { + os << nl << "// communication graph:" << nl; + os.beginBlock("graph"); + + // Prefer left-to-right layout for large graphs + os << indent << "rankdir=LR" << nl; + + if (below.empty()) + { + // A graph with a single-node (eg, self-comm) + os << indent << proci << nl; + } + } + + int pos = 0; + + for (const auto nbrProci : below) + { + if (pos) + { + os << " "; + } + else + { + os << indent; + } + os << proci << " -- " << nbrProci; + + if (++pos >= 4) // Max 4 items per line + { + pos = 0; + os << nl; + } + } + + if (pos) + { + os << nl; + } + + // Limit the maximum depth + ++depth; + if (depth >= maxDepth && (proci != 0)) + { + return; + } + + for (const auto nbrProci : below) + { + // if (proci == nbrProci) continue; // Extreme safety! + printGraph_impl(os, comms, nbrProci, depth, maxDepth); + } + + if (proci == 0) + { + os.endBlock(); + + os << "// end graph" << nl; + } +} + +} // End namespace Foam + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +// Create a tree-like schedule. For 8 procs: +// (level 0) +// 0 receives from 1 +// 2 receives from 3 +// 4 receives from 5 +// 6 receives from 7 +// (level 1) +// 0 receives from 2 +// 4 receives from 6 +// (level 2) +// 0 receives from 4 +// +// The sends/receives for all levels are collected per processor +// (one send per processor; multiple receives possible) creating +// a table: +// +// So per processor: +// proc receives from sends to +// ---- ------------- -------- +// 0 1,2,4 - +// 1 - 0 +// 2 3 0 +// 3 - 2 +// 4 5 0 +// 5 - 4 +// 6 7 4 +// 7 - 6 + +namespace Foam +{ + +static label simpleTree +( + const label procID, + const label numProcs, + + DynamicList<label>& below, + DynamicList<label>& allBelow +) +{ + label above(-1); + + for (label mod = 2, step = 1; step < numProcs; step = mod) + { + mod = step * 2; + + if (procID % mod) + { + // The rank above + above = procID - (procID % mod); + break; + } + else + { + for + ( + label j = procID + step; + j < numProcs && j < procID + mod; + j += step + ) + { + below.push_back(j); + } + for + ( + label j = procID + step; + j < numProcs && j < procID + mod; + j++ + ) + { + allBelow.push_back(j); + } + } + } + + return above; +} + +} // End namespace Foam + + // * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * // Foam::UPstream::commsStruct::commsStruct @@ -91,74 +263,26 @@ Foam::UPstream::commsStruct::commsStruct // * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * // -// This outputs as depth-first, but graphviz sorts that for us -void Foam::UPstream::commsStruct::printGraph +void Foam::UPstream::commsStructList::printGraph ( Ostream& os, - const UList<UPstream::commsStruct>& comms, const label proci -) +) const { - // if (proci >= comms.size()) return; // Extreme safety! - - const auto& below = comms[proci].below(); - - if (proci == 0) - { - os << nl << "// communication graph:" << nl; - os.beginBlock("graph"); - - if (below.empty()) - { - // A graph with a single-node (eg, self-comm) - os << indent << proci << nl; - } - } - - int pos = 0; - - for (const label nbrProci : below) - { - if (pos) - { - os << " "; - } - else - { - os << indent; - } - os << proci << " -- " << nbrProci; + // Print graph - starting at depth 0 + // Avoid corner case when only a single rank involved + // (eg, for node-local communicator) - if (++pos >= 4) // Max 4 items per line - { - pos = 0; - os << nl; - } - } - - if (pos) + if (proci < size()) { - os << nl; - } - - for (const label nbrProci : below) - { - // if (proci == nbrProci) continue; // Extreme safety! - printGraph(os, comms, nbrProci); - } - - if (proci == 0) - { - os.endBlock(); - - os << "// end graph" << nl; + printGraph_impl(os, *this, proci, 0); } } // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // -Foam::label Foam::UPstream::commsStruct::nProcs() const +Foam::label Foam::UPstream::commsStruct::nProcs() const noexcept { return (1 + allBelow_.size() + allNotBelow_.size()); } @@ -176,121 +300,88 @@ void Foam::UPstream::commsStruct::reset() void Foam::UPstream::commsStruct::reset ( const label procID, - const label numProcs + const label numProcs, + [[maybe_unused]] const label comm ) { reset(); - label above(-1); - DynamicList<label> below; - DynamicList<label> allBelow; - - if (numProcs < UPstream::nProcsSimpleSum) + if (numProcs <= 2 || numProcs < UPstream::nProcsSimpleSum) { - // Linear schedule + // Linear communication pattern + label above(-1); + labelList below; if (procID == 0) { below = identity(numProcs-1, 1); - allBelow = below; } else { above = 0; } - } - else - { - // Use tree like schedule. For 8 procs: - // (level 0) - // 0 receives from 1 - // 2 receives from 3 - // 4 receives from 5 - // 6 receives from 7 - // (level 1) - // 0 receives from 2 - // 4 receives from 6 - // (level 2) - // 0 receives from 4 - // - // The sends/receives for all levels are collected per processor - // (one send per processor; multiple receives possible) creating - // a table: - // - // So per processor: - // proc receives from sends to - // ---- ------------- -------- - // 0 1,2,4 - - // 1 - 0 - // 2 3 0 - // 3 - 2 - // 4 5 0 - // 5 - 4 - // 6 7 4 - // 7 - 6 - - label mod = 0; - - for (label step = 1; step < numProcs; step = mod) - { - mod = step * 2; - if (procID % mod) - { - above = procID - (procID % mod); - break; - } - else - { - for - ( - label j = procID + step; - j < numProcs && j < procID + mod; - j += step - ) - { - below.push_back(j); - } - for - ( - label j = procID + step; - j < numProcs && j < procID + mod; - j++ - ) - { - allBelow.push_back(j); - } - } - } + *this = UPstream::commsStruct(numProcs, procID, above, below, below); + return; } + + // Simple tree communication pattern + DynamicList<label> below; + DynamicList<label> allBelow; + + label above = simpleTree + ( + procID, + numProcs, + below, + allBelow + ); + *this = UPstream::commsStruct(numProcs, procID, above, below, allBelow); } -// * * * * * * * * * * * * * * * Specializations * * * * * * * * * * * * * * // +// * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * // -template<> -Foam::UPstream::commsStruct& -Foam::UList<Foam::UPstream::commsStruct>::operator[](const label procID) +const Foam::UPstream::commsStructList& +Foam::UPstream::commsStructList::null() { - auto& val = this->v_[procID]; // or this->data()[procID] + static std::unique_ptr<commsStructList> singleton; - if (val.nProcs() != size()) + if (!singleton) { - // Create/update - val.reset(procID, size()); + singleton = std::make_unique<commsStructList>(); } - return val; + return *singleton; +} + + +// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // + +void Foam::UPstream::commsStructList::init(const label comm) +{ + comm_ = comm; + tree_.clear(); + tree_.resize(UPstream::nProcs(comm)); } -template<> const Foam::UPstream::commsStruct& -Foam::UList<Foam::UPstream::commsStruct>::operator[](const label procID) const +Foam::UPstream::commsStructList::get(const label proci) const { - return const_cast<UList<UPstream::commsStruct>&>(*this).operator[](procID); + const UPstream::commsStruct& entry = tree_[proci]; + const auto numProcs = tree_.size(); + + if (entry.nProcs() != numProcs) + { + // Create/update + const_cast<UPstream::commsStruct&>(entry) + .reset(proci, numProcs, comm_); + } + + return entry; } diff --git a/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H b/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H index ad95d44d123fb7c15bb5811c6441ee939a11165b..d69afb0cc4e1213d9d222ed9f1c7242e0cd63635 100644 --- a/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H +++ b/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2014 OpenFOAM Foundation - Copyright (C) 2020-2023 OpenCFD Ltd. + Copyright (C) 2020-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -58,8 +58,10 @@ class prefixOSstream { // Private Data + //- Prefix printing is active bool printPrefix_; + //- The prefix to add string prefix_; @@ -84,9 +86,9 @@ public: // Member Functions - // Enquiry + // Decorators - //- Return the stream prefix + //- The stream prefix const string& prefix() const noexcept { return prefix_; } //- Return non-const access to the stream prefix diff --git a/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C b/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C index 0b5cdd1ab4daefdc96fb01ea5fe7bdd6311bbc0e..d90fe722be18ebb4f79342f22c92e2ccfff578de 100644 --- a/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C +++ b/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C @@ -34,7 +34,7 @@ License #include <cctype> #undef DetailInfo -#define DetailInfo if (::Foam::infoDetailLevel > 0) InfoErr +#define DetailInfo if (::Foam::infoDetailLevel > 0) ::Foam::InfoErr // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * // diff --git a/src/OpenFOAM/db/error/messageStream.H b/src/OpenFOAM/db/error/messageStream.H index 9cc61264f0dc6c3002c3377729e9c2cb7ca8bc7e..e22d8d0abec143ec168947b85f822bee5f35fa90 100644 --- a/src/OpenFOAM/db/error/messageStream.H +++ b/src/OpenFOAM/db/error/messageStream.H @@ -401,15 +401,15 @@ extern messageStream SeriousError; //- Write to Foam::Info if the Foam::infoDetailLevel is +ve non-zero (default) #define DetailInfo \ - if (::Foam::infoDetailLevel > 0) Info + if (::Foam::infoDetailLevel > 0) ::Foam::Info //- Report write to Foam::Info if the local log switch is true #define Log \ - if (log) Info + if (log) ::Foam::Info //- Report write to Foam::Info if the class log switch is true #define Log_ \ - if (this->log) Info + if (this->log) ::Foam::Info //- Report an IO information message using Foam::Info @@ -427,7 +427,7 @@ extern messageStream SeriousError; //- Report an information message using Foam::Info // if the local debug switch is true #define DebugInfo \ - if (debug) Info + if (debug) ::Foam::Info //- Report an information message using Foam::Info // for FUNCTION_NAME in file __FILE__ at line __LINE__ @@ -438,7 +438,7 @@ extern messageStream SeriousError; //- Report an information message using Foam::Pout // if the local debug switch is true #define DebugPout \ - if (debug) Pout + if (debug) ::Foam::Pout //- Report an information message using Foam::Pout // for FUNCTION_NAME in file __FILE__ at line __LINE__ diff --git a/src/OpenFOAM/global/argList/argList.C b/src/OpenFOAM/global/argList/argList.C index 1e316317e543882f755eb12fe1044f604f808211..8abb9a5457f4a07a0bb75a2073b0f79dc06b91c2 100644 --- a/src/OpenFOAM/global/argList/argList.C +++ b/src/OpenFOAM/global/argList/argList.C @@ -2093,8 +2093,24 @@ void Foam::argList::parse Info<< "Roots : " << roots << nl; } } + Info<< "Pstream initialized with:" << nl - << " floatTransfer : " + << " node communication : "; + if (UPstream::nodeCommsControl_ > 0) + { + Info<< Switch::name(UPstream::usingNodeComms()) + << " [min=" << UPstream::nodeCommsMin_ + << ", type=" << UPstream::nodeCommsControl_ + << "]"; + } + else + { + Info<< "disabled"; + } + Info<< " (" << UPstream::nProcs() << " ranks, " + << UPstream::numNodes() << " nodes)" << nl; + + Info<< " floatTransfer : " << Switch::name(UPstream::floatTransfer) << nl << " maxCommsSize : " << UPstream::maxCommsSize << nl diff --git a/src/OpenFOAM/global/argList/parRun.H b/src/OpenFOAM/global/argList/parRun.H index 0b7d8e3412f4eab3fea6d0cdcb124226573f0f60..1646cbdacee1e9eb4671ad97919a0584721bc33f 100644 --- a/src/OpenFOAM/global/argList/parRun.H +++ b/src/OpenFOAM/global/argList/parRun.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2018 OpenFOAM Foundation - Copyright (C) 2018-2021 OpenCFD Ltd. + Copyright (C) 2018-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -77,7 +77,7 @@ public: // Constructors //- Default construct - ParRunControl() noexcept + constexpr ParRunControl() noexcept : dryRun_(0), verbose_(0), @@ -92,7 +92,9 @@ public: { if (parallel_) { - Info<< "Finalising parallel run" << endl; + // Report shutdown (stdout or stderr) + (Foam::infoDetailLevel > 0 ? Info.stream() : InfoErr.stream()) + << "Finalising parallel run" << endl; } UPstream::shutdown(); } @@ -104,10 +106,10 @@ public: int dryRun() const noexcept { return dryRun_; } //- Increase the dry-run level - void incrDryRun(int level = 1) noexcept { dryRun_ += level; } + void incrDryRun(int i=1) noexcept { dryRun_ += i; } //- Change dry-run level, returns old value - int dryRun(const int level) noexcept + int dryRun(int level) noexcept { int old(dryRun_); dryRun_ = level; @@ -118,10 +120,10 @@ public: int verbose() const noexcept { return verbose_; } //- Increase the verbosity level - void incrVerbose(int level = 1) noexcept { verbose_ += level; } + void incrVerbose(int i=1) noexcept { verbose_ += i; } //- Change verbosity level, returns old value - int verbose(const int level) noexcept + int verbose(int level) noexcept { int old(verbose_); verbose_ = level; @@ -140,7 +142,7 @@ public: //- Set as parallel run on/off, return the previous value. // Use with \b extreme caution if runPar() has already been // called. - bool parRun(const bool on) noexcept + bool parRun(bool on) noexcept { bool old(parallel_); parallel_ = on; @@ -176,7 +178,10 @@ public: { if (!UPstream::init(argc, argv, needsThread_)) { - Info<< "Failed to start parallel run" << endl; + // Report failure (stdout or stderr) + (Foam::infoDetailLevel > 0 ? Info.stream() : InfoErr.stream()) + << "Failed to start parallel run" << endl; + UPstream::exit(1); } parallel_ = true; diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C index 5aa41da120035e07c6a4a852f5dd0e11201209e6..2f341fbabaf7b34aa72deb9d09d1511a0339b896 100644 --- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C +++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2017-2018 OpenFOAM Foundation - Copyright (C) 2019-2023 OpenCFD Ltd. + Copyright (C) 2019-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -290,14 +290,7 @@ Foam::OFstreamCollator::OFstreamCollator(const off_t maxBufferSize) maxBufferSize_(maxBufferSize), threadRunning_(false), localComm_(UPstream::worldComm), - threadComm_ - ( - UPstream::allocateCommunicator - ( - localComm_, - labelRange(UPstream::nProcs(localComm_)) - ) - ) + threadComm_(UPstream::dupCommunicator(localComm_)) {} @@ -310,14 +303,7 @@ Foam::OFstreamCollator::OFstreamCollator maxBufferSize_(maxBufferSize), threadRunning_(false), localComm_(comm), - threadComm_ - ( - UPstream::allocateCommunicator - ( - localComm_, - labelRange(UPstream::nProcs(localComm_)) - ) - ) + threadComm_(UPstream::dupCommunicator(localComm_)) {} diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C index 71303704958c4428571b01f809bb1857744eb9a2..a9dda1fcb7d61eb6c19f072bdcc17aecdd9520eb 100644 --- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C +++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C @@ -236,7 +236,7 @@ static Tuple2<label, labelList> getCommPattern() if (UPstream::parRun() && commAndIORanks.second().size() > 1) { // Multiple masters: ranks for my IO range - commAndIORanks.first() = UPstream::allocateCommunicator + commAndIORanks.first() = UPstream::newCommunicator ( UPstream::worldComm, fileOperation::subRanks(commAndIORanks.second()) diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C index 8ee087e691944ddd06eacc05607174eacd573bc4..999db2ed9c3cf5b69fd09dc924594537632700be 100644 --- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C +++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C @@ -84,7 +84,7 @@ static Tuple2<label, labelList> getCommPattern() if (UPstream::parRun() && commAndIORanks.second().size() > 1) { // Multiple masters: ranks for my IO range - commAndIORanks.first() = UPstream::allocateCommunicator + commAndIORanks.first() = UPstream::newCommunicator ( UPstream::worldComm, fileOperation::subRanks(commAndIORanks.second()) diff --git a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C index 5b05fe46c83f45192e29e28d23fdb160a69e7750..16d8fc64a0095e2bcc7e4ad889c3993ddfc60078 100644 --- a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C +++ b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C @@ -362,7 +362,7 @@ Foam::fileOperation::New_impl // Warning: MS-MPI currently uses MPI_Comm_create() instead of // MPI_Comm_create_group() so it will block there! - commAndIORanks.first() = UPstream::allocateCommunicator + commAndIORanks.first() = UPstream::newCommunicator ( UPstream::worldComm, siblings diff --git a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C index bad09da2b9b4ad71027f785506d16fb5e66b97dd..b325663c7d461cff7da7a4cd011c0907818a8dbc 100644 --- a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C +++ b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C @@ -106,54 +106,44 @@ Foam::labelRange Foam::fileOperation::subRanks(const labelUList& mainIOranks) Foam::labelList Foam::fileOperation::getGlobalHostIORanks() { - const label numProcs = UPstream::nProcs(UPstream::worldComm); + // Very similar to the code in UPstream::setHostCommunicators() + // except we need the leader information on *all* ranks! - // Use hostname - // Lowest rank per hostname is the IO rank - - List<SHA1Digest> digests; - if (UPstream::master(UPstream::worldComm)) - { - digests.resize(numProcs); - } + const label myProci = UPstream::myProcNo(UPstream::worldComm); + const label numProc = UPstream::nProcs(UPstream::worldComm); // Could also add lowercase etc, but since hostName() // will be consistent within the same node, there is no need. - SHA1Digest myDigest(SHA1(hostName()).digest()); + const SHA1Digest myDigest(SHA1(hostName()).digest()); + + List<SHA1Digest> digests(numProc); + digests[myProci] = myDigest; - // The fixed-length digest allows use of MPI_Gather - UPstream::mpiGather + // The fixed-length digest allows use of MPI_Allgather. + UPstream::mpiAllGather ( - myDigest.cdata_bytes(), // Send - digests.data_bytes(), // Recv + digests.data_bytes(), // Send/Recv SHA1Digest::size_bytes(), // Num send/recv per rank UPstream::worldComm ); - labelList ranks; - DynamicList<label> dynRanks; - if (UPstream::master(UPstream::worldComm)) - { - dynRanks.reserve(numProcs); - - dynRanks.push_back(0); // Always include master - label previ = 0; + DynamicList<label> hostLeaders(UPstream::numNodes()); - for (label proci = 1; proci < digests.size(); ++proci) + hostLeaders.push_back(0); // Always include master + for (label previ = 0, proci = 1; proci < digests.size(); ++proci) + { + if (digests[previ] != digests[proci]) { - if (digests[proci] != digests[previ]) - { - dynRanks.push_back(proci); - previ = proci; - } + hostLeaders.push_back(proci); + previ = proci; } - - ranks.transfer(dynRanks); } - Pstream::broadcast(ranks, UPstream::worldComm); - return ranks; + return labelList(std::move(hostLeaders)); + + // Alternative is to recover information from commInterNode() + // and broadcast via commLocalNode() } diff --git a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C index 1d610096901dbc69c6f19935f3a0b4bbe701cf51..95c5360b8f155a1a686cd251476965563a7b69e0 100644 --- a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C +++ b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C @@ -612,7 +612,7 @@ static Tuple2<label, labelList> getCommPattern() if (UPstream::parRun() && commAndIORanks.second().size() > 1) { // Multiple masters: ranks for my IO range - commAndIORanks.first() = UPstream::allocateCommunicator + commAndIORanks.first() = UPstream::newCommunicator ( UPstream::worldComm, fileOperation::subRanks(commAndIORanks.second()) diff --git a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C index e1541444bdcefd0b078618905da32c542ada7dda..ba35d1ba9d283281a9a69a5e0b82b005b6e868ee 100644 --- a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C +++ b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C @@ -85,7 +85,7 @@ static Tuple2<label, labelList> getCommPattern() if (UPstream::parRun() && commAndIORanks.second().size() > 1) { // Multiple masters: ranks for my IO range - commAndIORanks.first() = UPstream::allocateCommunicator + commAndIORanks.first() = UPstream::newCommunicator ( UPstream::worldComm, fileOperation::subRanks(commAndIORanks.second()) diff --git a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C index 180783d578cae562504578e00b51cae6a7663e82..ab41512a302aa81325550d89e64a2c8397bcc5ce 100644 --- a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C +++ b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C @@ -204,7 +204,7 @@ static Tuple2<label, labelList> getCommPattern() if (UPstream::parRun() && commAndIORanks.second().size() > 1) { // Multiple masters: ranks for my IO range - commAndIORanks.first() = UPstream::allocateCommunicator + commAndIORanks.first() = UPstream::newCommunicator ( UPstream::worldComm, fileOperation::subRanks(commAndIORanks.second()) diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C index bdbc65d281a0d41a90785b661948a716d26f56bb..f231f5d778bb7b54658075e662c5c3da9f8e37c4 100644 --- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C +++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2019-2023 OpenCFD Ltd. + Copyright (C) 2019-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -559,36 +559,65 @@ void Foam::GAMGAgglomeration::procAgglomerateRestrictAddressing const label levelIndex ) { - // Collect number of cells - labelList nFineCells; - globalIndex::gatherValues + const bool master = ( - comm, - procIDs, - restrictAddressing_[levelIndex].size(), - nFineCells, - - UPstream::msgType(), - UPstream::commsTypes::scheduled + UPstream::myProcNo(comm) == (procIDs.empty() ? 0 : procIDs[0]) ); - labelList fineOffsets(globalIndex::calcOffsets(nFineCells)); - // Combine and renumber nCoarseCells - labelList nCoarseCells; - globalIndex::gatherValues - ( - comm, - procIDs, - nCells_[levelIndex], - nCoarseCells, + // Determine the fine/coarse sizes (offsets) for gathering + labelList fineOffsets; + labelList coarseOffsets; + + { + List<labelPair> sizes = globalIndex::listGatherValues + ( + comm, + procIDs, + labelPair + ( + // fine + restrictAddressing_[levelIndex].size(), + // coarse + nCells_[levelIndex] + ), + UPstream::msgType(), + UPstream::commsTypes::scheduled + ); + + // Calculate offsets, as per globalIndex::calcOffsets() + // but extracting from the pair + if (master && !sizes.empty()) + { + const label len = sizes.size(); + + fineOffsets.resize(len+1); + coarseOffsets.resize(len+1); + + label fineCount = 0; + label coarseCount = 0; + + for (label i = 0; i < len; ++i) + { + fineOffsets[i] = fineCount; + fineCount += sizes[i].first(); + + coarseOffsets[i] = coarseCount; + coarseCount += sizes[i].second(); + } + + fineOffsets[len] = fineCount; + coarseOffsets[len] = coarseCount; + } + } - UPstream::msgType(), - UPstream::commsTypes::scheduled - ); - labelList coarseOffsets(globalIndex::calcOffsets(nCoarseCells)); // (cell)restrictAddressing labelList procRestrictAddressing; + if (master) + { + // pre-size on master + procRestrictAddressing.resize(fineOffsets.back()); + } globalIndex::gather ( fineOffsets, @@ -596,15 +625,13 @@ void Foam::GAMGAgglomeration::procAgglomerateRestrictAddressing procIDs, restrictAddressing_[levelIndex], procRestrictAddressing, - UPstream::msgType(), - Pstream::commsTypes::nonBlocking //Pstream::commsTypes::scheduled + UPstream::commsTypes::nonBlocking ); - - if (Pstream::myProcNo(comm) == procIDs[0]) + if (master) { - nCells_[levelIndex] = coarseOffsets.last(); // ie, totalSize() + nCells_[levelIndex] = coarseOffsets.back(); // ie, totalSize() // Renumber consecutively for (label proci = 1; proci < procIDs.size(); ++proci) diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C index 8d13291110e0b409a7669d9e410b4b8ad078501b..f236bde8a52872d6656e0c6a42e30759e57221bf 100644 --- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C +++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2023 OpenCFD Ltd. + Copyright (C) 2023-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -78,17 +78,17 @@ void Foam::GAMGAgglomeration::restrictField const label coarseComm = UPstream::parent(procCommunicator_[coarseLevelIndex]); - const List<label>& procIDs = agglomProcIDs(coarseLevelIndex); - const labelList& offsets = cellOffsets(coarseLevelIndex); + const auto& procIDs = agglomProcIDs(coarseLevelIndex); + const auto& offsets = cellOffsets(coarseLevelIndex); - globalIndex::gather + globalIndex::gatherInplace ( offsets, coarseComm, procIDs, cf, UPstream::msgType(), - Pstream::commsTypes::nonBlocking //Pstream::commsTypes::scheduled + UPstream::commsTypes::nonBlocking ); } } @@ -145,8 +145,8 @@ void Foam::GAMGAgglomeration::prolongField const label coarseComm = UPstream::parent(procCommunicator_[coarseLevelIndex]); - const List<label>& procIDs = agglomProcIDs(coarseLevelIndex); - const labelList& offsets = cellOffsets(coarseLevelIndex); + const auto& procIDs = agglomProcIDs(coarseLevelIndex); + const auto& offsets = cellOffsets(coarseLevelIndex); const label localSize = nCells_[levelIndex]; @@ -159,7 +159,7 @@ void Foam::GAMGAgglomeration::prolongField cf, allCf, UPstream::msgType(), - Pstream::commsTypes::nonBlocking //Pstream::commsTypes::scheduled + UPstream::commsTypes::nonBlocking ); forAll(fineToCoarse, i) @@ -195,8 +195,8 @@ const Foam::Field<Type>& Foam::GAMGAgglomeration::prolongField const label coarseComm = UPstream::parent(procCommunicator_[coarseLevelIndex]); - const List<label>& procIDs = agglomProcIDs(coarseLevelIndex); - const labelList& offsets = cellOffsets(coarseLevelIndex); + const auto& procIDs = agglomProcIDs(coarseLevelIndex); + const auto& offsets = cellOffsets(coarseLevelIndex); const label localSize = nCells_[levelIndex]; allCf.resize_nocopy(localSize); @@ -209,7 +209,7 @@ const Foam::Field<Type>& Foam::GAMGAgglomeration::prolongField cf, allCf, UPstream::msgType(), - Pstream::commsTypes::nonBlocking //Pstream::commsTypes::scheduled + UPstream::commsTypes::nonBlocking ); forAll(fineToCoarse, i) diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C index 33139bb24cf30461f9d6725d248f8c6150759ea0..da30b3bebaa43995eb9ed6ed5e9d4906777fcea9 100644 --- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C +++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C @@ -119,7 +119,7 @@ bool Foam::eagerGAMGProcAgglomeration::agglomerate() // Communicator for the processor-agglomerated matrix comms_.push_back ( - UPstream::allocateCommunicator + UPstream::newCommunicator ( levelComm, masterProcs diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C index b8906aaca0d53386fc92af43fc104b99a280964f..e33b2539a11c5df6de986954c219693ef3b293d5 100644 --- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C +++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C @@ -167,7 +167,7 @@ bool Foam::manualGAMGProcAgglomeration::agglomerate() // Communicator for the processor-agglomerated matrix comms_.push_back ( - UPstream::allocateCommunicator + UPstream::newCommunicator ( levelMesh.comm(), coarseToMaster diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C index 4eab56651592d4d343d05154aae3e3adea9fe97a..d5024bb9e9b88182887cce6f9b8eb9619abe761c 100644 --- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C +++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C @@ -194,7 +194,7 @@ bool Foam::masterCoarsestGAMGProcAgglomeration::agglomerate() // Communicator for the processor-agglomerated matrix comms_.push_back ( - UPstream::allocateCommunicator + UPstream::newCommunicator ( levelComm, masterProcs diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C index 52baecc75b3aae118ca611c442273c03c6d4b3a4..6a35bd8077e58f240c3ab7e29eec3ddcaaafe41e 100644 --- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C +++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C @@ -286,7 +286,7 @@ bool Foam::procFacesGAMGProcAgglomeration::agglomerate() // Communicator for the processor-agglomerated matrix comms_.push_back ( - UPstream::allocateCommunicator + UPstream::newCommunicator ( levelComm, masterProcs diff --git a/src/OpenFOAM/parallel/globalIndex/globalIndex.C b/src/OpenFOAM/parallel/globalIndex/globalIndex.C index 218869d33b388873bcbd9397f9a14ad55df4b9ce..95f64b161c6ca1f94a990b133e31d6909f1dffce 100644 --- a/src/OpenFOAM/parallel/globalIndex/globalIndex.C +++ b/src/OpenFOAM/parallel/globalIndex/globalIndex.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2016 OpenFOAM Foundation - Copyright (C) 2018-2023 OpenCFD Ltd. + Copyright (C) 2018-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -263,6 +263,117 @@ Foam::globalIndex::calcRanges } +bool Foam::globalIndex::splitNodeOffsets +( + labelList& interNodeOffsets, + labelList& localNodeOffsets, + const label communicator, + const bool absoluteLocalNodeOffsets +) const +{ + // Require const-world as the starting point + if (!UPstream::parRun() || communicator != UPstream::commConstWorld()) + { + interNodeOffsets.clear(); + localNodeOffsets.clear(); + return false; + } + + const auto interNodeComm = UPstream::commInterNode(); + + // Only generate information on the node leaders + if (!UPstream::is_rank(interNodeComm)) + { + interNodeOffsets.clear(); + localNodeOffsets.clear(); + return true; // Not involved, but return true to match others... + } + + const label numProc = UPstream::nProcs(UPstream::commConstWorld()); + const auto& procIds = UPstream::procID(interNodeComm); + const int ranki = UPstream::myProcNo(interNodeComm); + + if (FOAM_UNLIKELY(procIds.empty())) + { + // Should not happen... + interNodeOffsets.clear(); + localNodeOffsets.clear(); + return true; // Return true to match others... + } + + // The inter-node offsets from the node-specific segment of the + // overall offsets, but avoiding MPI_Scatterv (slow, doesn't + // handle overlaps) and using MPI_Bcast() instead. + + // Send top-level offsets to the node leaders. + // Could also be a mutable operation and use offsets_ directly. + // + // - number of overall offsets is always (nProc+1) [worldComm] + labelList allOffsets; + if (UPstream::master(interNodeComm)) + { + allOffsets = offsets_; + } + else // ie, UPstream::is_subrank(interNodeComm) + { + allOffsets.resize_nocopy(numProc+1); + } + + UPstream::broadcast + ( + allOffsets.data_bytes(), + allOffsets.size_bytes(), + interNodeComm + ); + + + if (FOAM_UNLIKELY(allOffsets.empty())) + { + // Should not happen... + interNodeOffsets.clear(); + localNodeOffsets.clear(); + return true; // Return true to match others... + } + + // The local node span + const label firstProc = procIds[ranki]; + const label lastProc = + ( + (ranki+1 < procIds.size()) + ? procIds[ranki+1] + : numProc + ); + + // Offsets (within a node) + localNodeOffsets = allOffsets.slice + ( + firstProc, + (lastProc - firstProc) + 1 // +1 since offsets + ); + + if (!absoluteLocalNodeOffsets && !localNodeOffsets.empty()) + { + const auto start0 = localNodeOffsets.front(); + for (auto& val : localNodeOffsets) + { + val -= start0; + } + } + + // Offsets (between nodes) + interNodeOffsets.resize_nocopy(procIds.size()+1); + { + forAll(procIds, i) + { + interNodeOffsets[i] = allOffsets[procIds[i]]; + } + interNodeOffsets.back() = allOffsets.back(); + } + + return true; +} + + // * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * // Foam::globalIndex::globalIndex(Istream& is) diff --git a/src/OpenFOAM/parallel/globalIndex/globalIndex.H b/src/OpenFOAM/parallel/globalIndex/globalIndex.H index 598c82e5521ce4eb01fa6804919b8d23c49d5968..19f6ef1a9d2a6a92790806bf521e8b983e19c4de 100644 --- a/src/OpenFOAM/parallel/globalIndex/globalIndex.H +++ b/src/OpenFOAM/parallel/globalIndex/globalIndex.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2018-2024 OpenCFD Ltd. + Copyright (C) 2018-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -87,9 +87,8 @@ class globalIndex // template<class Type> // inline static UPstream::commsTypes getCommsType // ( - // const UPstream::commsTypes preferred - // = UPstream::commsTypes::nonBlocking - // ); + // UPstream::commsTypes commsType + // ) noexcept; //- Report overflow at specified (non-negative) index static void reportOverflowAndExit @@ -582,17 +581,35 @@ public: const bool checkOverflow = false ); + //- Split the top-level offsets into inter-node and local-node + //- components suitable to a two-stage hierarchy. + bool splitNodeOffsets + ( + //! [out] Offsets between nodes (only non-empty on node leaders) + labelList& interNodeOffsets, + //! [out] Offsets within a node (only non-empty on node leaders) + labelList& localNodeOffsets, + //! The communicator. Must resolve to const world-comm + const label communicator = UPstream::worldComm, + //! Retain absolute values for the localNode offsets + const bool absoluteLocalNodeOffsets = false + ) const; + + + // Misc low-level gather routines + //- Collect single values in processor order on master (== procIDs[0]). // Handles contiguous/non-contiguous data. + // non-zero output field (master only) template<class ProcIDsContainer, class Type> - static void gatherValues + [[nodiscard]] + static List<Type> listGatherValues ( const label comm, //!< communicator const ProcIDsContainer& procIDs, const Type& localValue, - List<Type>& allValues, //! output field (master only) const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking ); //- Collect data in processor order on master (== procIDs[0]). @@ -603,10 +620,11 @@ public: const labelUList& offsets, //!< offsets (master only) const label comm, //!< communicator const ProcIDsContainer& procIDs, - const UList<Type>& fld, - List<Type>& allFld, //! output field (master only) + const UList<Type>& fld, //!< [in] all ranks + //! [out] result (master only). Must be adequately sized! + UList<Type>& allFld, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking ); //- Collect indirect data in processor order on master @@ -617,37 +635,24 @@ public: const labelUList& offsets, //!< offsets (master only) const label comm, //!< communicator const ProcIDsContainer& procIDs, - const IndirectListBase<Type, Addr>& fld, - List<Type>& allFld, //! output field (master only) + const IndirectListBase<Type, Addr>& fld, //!< [in] all ranks + //! [out] result (master only). Must be adequately sized! + UList<Type>& allFld, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking ); - - // Misc low-level gather routines - //- Inplace collect in processor order on master (== procIDs[0]). - // Note: adjust naming? template<class ProcIDsContainer, class Type> - static void gather + static void gatherInplace ( const labelUList& offsets, //!< offsets (master only) const label comm, //!< communicator const ProcIDsContainer& procIDs, - List<Type>& fld, //!< in/out field + List<Type>& fld, //!< [in,out] const int tag = UPstream::msgType(), - const UPstream::commsTypes ct = UPstream::commsTypes::nonBlocking - ) - { - List<Type> allData; - gather(offsets, comm, procIDs, fld, allData, tag, ct); - - const int masterProci = procIDs.size() ? procIDs[0] : 0; - if (UPstream::myProcNo(comm) == masterProci) - { - fld.transfer(allData); - } - } + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking + ); //- Collect data in processor order on master (== procIDs[0]). // \note the globalIndex offsets needed on master only. @@ -656,29 +661,26 @@ public: ( const label comm, //!< communicator const ProcIDsContainer& procIDs, - const UList<Type>& fld, //!< input field - List<Type>& allFld, //! output field (master only) + const UList<Type>& fld, //!< [in] input field + //! [out] resized to have results on master, empty elsewhere. + List<Type>& allFld, const int tag = UPstream::msgType(), - const UPstream::commsTypes ct = UPstream::commsTypes::nonBlocking - ) const - { - gather(offsets_, comm, procIDs, fld, allFld, tag, ct); - } + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking + ) const; //- Inplace collect in processor order on master (== procIDs[0]). // \note the globalIndex offsets needed on master only. - // Note: adjust naming? template<class ProcIDsContainer, class Type> - void gather + void gatherInplace ( const label comm, //!< communicator const ProcIDsContainer& procIDs, - List<Type>& fld, //!< in/out field + List<Type>& fld, //!< [in,out] const int tag = UPstream::msgType(), - const UPstream::commsTypes ct = UPstream::commsTypes::nonBlocking + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking ) const { - gather(offsets_, comm, procIDs, fld, tag, ct); + gatherInplace(offsets_, comm, procIDs, fld, tag, commsType); } @@ -690,10 +692,12 @@ public: template<class Type> void gather ( + //! [in] input on all ranks const UList<Type>& sendData, + //! [out] resized to have results on master, empty elsewhere. List<Type>& allData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ) const; @@ -703,10 +707,12 @@ public: template<class Type, class Addr> void gather ( + //! [in] input on all ranks const IndirectListBase<Type, Addr>& sendData, + //! [out] resized to have results on master, empty elsewhere. List<Type>& allData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ) const; @@ -716,11 +722,12 @@ public: // // \return output (master), zero-sized on non-master template<class Type, class OutputContainer = List<Type>> + [[nodiscard]] OutputContainer gather ( - const UList<Type>& sendData, + const UList<Type>& sendData, //!< [in] all ranks const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ) const; @@ -729,11 +736,12 @@ public: // // \return output (master), zero-sized on non-master template<class Type, class Addr, class OutputContainer = List<Type>> + [[nodiscard]] OutputContainer gather ( - const IndirectListBase<Type, Addr>& sendData, + const IndirectListBase<Type, Addr>& sendData, //!< [in] all ranks const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ) const; @@ -744,10 +752,9 @@ public: template<class Type> void gatherInplace ( - //! [in,out] - List<Type>& fld, + List<Type>& fld, //!< [in,out] const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ) const; @@ -765,7 +772,7 @@ public: const label comm = UPstream::worldComm, //!< communicator // For fallback routines: - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const int tag = UPstream::msgType() ) const; @@ -777,13 +784,14 @@ public: // // \return output (master), zero-sized on non-master template<class Type, class OutputContainer = List<Type>> + [[nodiscard]] OutputContainer mpiGather ( const UList<Type>& sendData, const label comm = UPstream::worldComm, //!< communicator // For fallback routines: - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const int tag = UPstream::msgType() ) const; @@ -803,7 +811,7 @@ public: const label comm = UPstream::worldComm, //!< communicator // For fallback routines: - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const int tag = UPstream::msgType() ) const; @@ -823,7 +831,7 @@ public: const label comm = UPstream::worldComm, //!< communicator // For fallback routines: - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const int tag = UPstream::msgType() ); @@ -833,13 +841,14 @@ public: // // \return output (master), zero-sized on non-master template<class Type, class OutputContainer = List<Type>> + [[nodiscard]] static OutputContainer mpiGatherOp ( const UList<Type>& sendData, const label comm = UPstream::worldComm, //!< communicator // For fallback routines: - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const int tag = UPstream::msgType() ); @@ -857,7 +866,7 @@ public: const label comm = UPstream::worldComm, //!< communicator // For fallback routines: - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const int tag = UPstream::msgType() ); @@ -871,7 +880,7 @@ public: //! [out] output on master, zero-sized on non-master List<Type>& allData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ); @@ -885,7 +894,7 @@ public: //! [out] output on master, zero-sized on non-master List<Type>& allData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ); @@ -895,11 +904,12 @@ public: // // \return output (master), zero-sized on non-master template<class Type, class OutputContainer = List<Type>> + [[nodiscard]] static OutputContainer gatherOp ( const UList<Type>& sendData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ); @@ -909,11 +919,12 @@ public: // // \return output (master), zero-sized on non-master template<class Type, class Addr, class OutputContainer = List<Type>> + [[nodiscard]] static OutputContainer gatherOp ( const IndirectListBase<Type, Addr>& sendData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ); @@ -928,7 +939,7 @@ public: //! [in,out] List<Type>& fld, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ); @@ -947,7 +958,7 @@ public: const UList<Type>& allFld, UList<Type>& fld, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking ); //- Distribute data in processor order. @@ -962,11 +973,10 @@ public: const UList<Type>& allFld, UList<Type>& fld, const int tag = UPstream::msgType(), - const UPstream::commsTypes ct = - UPstream::commsTypes::nonBlocking + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking ) const { - scatter(offsets_, comm, procIDs, allFld, fld, tag, ct); + scatter(offsets_, comm, procIDs, allFld, fld, tag, commsType); } //- Distribute data in processor order. @@ -979,7 +989,7 @@ public: const UList<Type>& allData, UList<Type>& localData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ) const; @@ -988,11 +998,12 @@ public: // Communication with default/specified communicator, message tag. // \note the globalIndex offsets needed on master only. template<class Type, class OutputContainer = List<Type>> + [[nodiscard]] OutputContainer scatter ( const UList<Type>& allData, const int tag = UPstream::msgType(), - const UPstream::commsTypes = UPstream::commsTypes::nonBlocking, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, const label comm = UPstream::worldComm //!< communicator ) const; diff --git a/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C b/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C index 75bf7d4a93baa6c345401e591737b7123e5bd723..6c3c46397d8ad6a26f7a945612996d6604a10483 100644 --- a/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C +++ b/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C @@ -32,22 +32,22 @@ License // Cannot use non-blocking for non-contiguous data. // template<class Type> -// inline Foam::UPstream::commsTypes getCommsType +// inline Foam::UPstream::commsTypes Foam::globalIndex::getCommsType // ( -// const UPstream::commsTypes preferred -// ) +// UPstream::commsTypes commsType +// ) noexcept // { -// return -// ( -// ( -// !is_contiguous_v<Type> -// && UPstream::commsTypes::nonBlocking == preferred -// ) -// ? UPstream::commsTypes::scheduled -// : preferred -// ); +// if constexpr (!is_contiguous_v<Type>) +// { +// return UPstream::commsTypes::scheduled; +// } +// else +// { +// return commsType; +// } // } +// Helpers template<class Addr> Foam::labelList @@ -121,33 +121,38 @@ Foam::globalIndex::calcListOffsets } +// Low-level + template<class ProcIDsContainer, class Type> -void Foam::globalIndex::gatherValues +Foam::List<Type> Foam::globalIndex::listGatherValues ( const label comm, const ProcIDsContainer& procIDs, const Type& localValue, - List<Type>& allValues, const int tag, - const UPstream::commsTypes preferredCommsType + UPstream::commsTypes commsType ) { - // low-level: no parRun guard + // low-level: no parRun guard? + const int masterProci = (procIDs.empty() ? 0 : procIDs[0]); - // Cannot use non-blocking for non-contiguous data. - const UPstream::commsTypes commsType = - ( - ( - !is_contiguous_v<Type> - && UPstream::commsTypes::nonBlocking == preferredCommsType - ) - ? UPstream::commsTypes::scheduled - : preferredCommsType - ); + // if (!UPstream::is_parallel(comm)) + // { + // List<Type> allValues(1); + // allValues[0] = localValue; + // return allValues; + // } - const label startOfRequests = UPstream::nRequests(); + List<Type> allValues; - const int masterProci = procIDs.size() ? procIDs[0] : 0; + // Cannot use non-blocking for non-contiguous data + if constexpr (!is_contiguous_v<Type>) + { + commsType = UPstream::commsTypes::scheduled; + } + + + const label startOfRequests = UPstream::nRequests(); if (UPstream::myProcNo(comm) == masterProci) { @@ -176,8 +181,6 @@ void Foam::globalIndex::gatherValues } else { - allValues.clear(); // safety: zero-size on non-master - if constexpr (is_contiguous_v<Type>) { UOPstream::write @@ -196,11 +199,10 @@ void Foam::globalIndex::gatherValues } } - if (commsType == UPstream::commsTypes::nonBlocking) - { - // Wait for outstanding requests - UPstream::waitRequests(startOfRequests); - } + // Process sync + UPstream::waitRequests(startOfRequests); + + return allValues; } @@ -211,45 +213,37 @@ void Foam::globalIndex::gather const label comm, const ProcIDsContainer& procIDs, const UList<Type>& fld, - List<Type>& allFld, + UList<Type>& allFld, // must be adequately sized on master const int tag, - const UPstream::commsTypes preferredCommsType + UPstream::commsTypes commsType ) { // low-level: no parRun guard + const int masterProci = (procIDs.empty() ? 0 : procIDs[0]); - // Cannot use non-blocking for non-contiguous data. - const UPstream::commsTypes commsType = - ( - ( - !is_contiguous_v<Type> - && UPstream::commsTypes::nonBlocking == preferredCommsType - ) - ? UPstream::commsTypes::scheduled - : preferredCommsType - ); + // Cannot use non-blocking for non-contiguous data + if constexpr (!is_contiguous_v<Type>) + { + commsType = UPstream::commsTypes::scheduled; + } const label startOfRequests = UPstream::nRequests(); - const int masterProci = procIDs.size() ? procIDs[0] : 0; - if (UPstream::myProcNo(comm) == masterProci) { - allFld.resize_nocopy(off.back()); // == totalSize() - - // Assign my local data - respect offset information - // so that we can request 0 entries to be copied. - // Also handle the case where we have a slice of the full - // list. - - SubList<Type>(allFld, off[1]-off[0], off[0]) = - SubList<Type>(fld, off[1]-off[0]); + if (FOAM_UNLIKELY(allFld.size() < off.back())) // ie, totalSize() + { + FatalErrorInFunction + << "[out] UList size=" << allFld.size() + << " too small to receive " << off.back() << nl + << Foam::abort(FatalError); + } for (label i = 1; i < procIDs.size(); ++i) { - SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]); + SubList<Type> slot(allFld, off[i+1]-off[i], off[i]); - if (procSlot.empty()) + if (slot.empty()) { // Nothing to do } @@ -259,14 +253,28 @@ void Foam::globalIndex::gather ( commsType, procIDs[i], - procSlot, + slot, tag, comm ); } else { - IPstream::recv(procSlot, procIDs[i], tag, comm); + IPstream::recv(slot, procIDs[i], tag, comm); + } + } + + // Assign my local data - respect offset information + // so that we can request 0 entries to be copied. + // Also handle the case where we have a slice of the full + // list. + { + SubList<Type> dst(allFld, off[1]-off[0], off[0]); + SubList<Type> src(fld, off[1]-off[0]); + + if (!dst.empty() && (dst.data() != src.data())) + { + dst = src; } } } @@ -293,11 +301,8 @@ void Foam::globalIndex::gather } } - if (commsType == UPstream::commsTypes::nonBlocking) - { - // Wait for outstanding requests - UPstream::waitRequests(startOfRequests); - } + // Process sync + UPstream::waitRequests(startOfRequests); } @@ -308,71 +313,74 @@ void Foam::globalIndex::gather const label comm, const ProcIDsContainer& procIDs, const IndirectListBase<Type, Addr>& fld, - List<Type>& allFld, + UList<Type>& allFld, // must be adequately sized on master const int tag, - const UPstream::commsTypes preferredCommsType + UPstream::commsTypes commsType ) { // low-level: no parRun guard + const int masterProci = (procIDs.empty() ? 0 : procIDs[0]); if constexpr (is_contiguous_v<Type>) { - // Flatten list (locally) so that we can benefit from using direct - // read/write of contiguous data + if (commsType == UPstream::commsTypes::nonBlocking) + { + // Contiguous data and requested nonBlocking. + // + // Flatten list (locally) so that we can benefit from using + // direct read/write of contiguous data - gather - ( - off, - comm, - procIDs, - List<Type>(fld), - allFld, - tag, - preferredCommsType - ); - return; - } + List<Type> flattened(fld); - // Cannot use non-blocking for non-contiguous data. - const UPstream::commsTypes commsType = - ( - ( - !is_contiguous_v<Type> - && UPstream::commsTypes::nonBlocking == preferredCommsType - ) - ? UPstream::commsTypes::scheduled - : preferredCommsType - ); + gather + ( + off, + comm, + procIDs, + flattened, + allFld, + tag, + commsType + ); + return; + } + } - const label startOfRequests = UPstream::nRequests(); - const int masterProci = procIDs.size() ? procIDs[0] : 0; + // Non-contiguous is always non-blocking if (UPstream::myProcNo(comm) == masterProci) { - allFld.resize_nocopy(off.back()); // == totalSize() - - // Assign my local data - respect offset information - // so that we can request 0 entries to be copied - - SubList<Type> localSlot(allFld, off[1]-off[0], off[0]); - if (!localSlot.empty()) + if (FOAM_UNLIKELY(allFld.size() < off.back())) // ie, totalSize() { - localSlot = fld; + FatalErrorInFunction + << "[out] UList size=" << allFld.size() + << " too small to receive " << off.back() << nl + << Foam::abort(FatalError); } - // Already verified commsType != nonBlocking for (label i = 1; i < procIDs.size(); ++i) { - SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]); + SubList<Type> slot(allFld, off[i+1]-off[i], off[i]); - if (procSlot.empty()) + if (slot.empty()) { // Nothing to do } else { - IPstream::recv(procSlot, procIDs[i], tag, comm); + IPstream::recv(slot, procIDs[i], tag, comm); + } + } + + // Assign my local data - respect offset information + // so that we can request 0 entries to be copied + { + SubList<Type> dst(allFld, off[1]-off[0], off[0]); + + if (!dst.empty() && (dst.size() == fld.size())) + { + dst.deepCopy(fld); } } } @@ -384,18 +392,84 @@ void Foam::globalIndex::gather } else { - OPstream::send(fld, commsType, masterProci, tag, comm); + OPstream::send(fld, masterProci, tag, comm); } } +} - if (commsType == UPstream::commsTypes::nonBlocking) + +template<class ProcIDsContainer, class Type> +void Foam::globalIndex::gatherInplace +( + const labelUList& off, // needed on master only + const label comm, + const ProcIDsContainer& procIDs, + List<Type>& fld, + const int tag, + UPstream::commsTypes commsType +) +{ + if (!UPstream::is_parallel(comm)) + { + // Serial: (no-op) + return; + } + + const bool master = + ( + UPstream::myProcNo(comm) == (procIDs.empty() ? 0 : procIDs[0]) + ); + + List<Type> allData; + if (master) { - // Wait for outstanding requests - UPstream::waitRequests(startOfRequests); + allData.resize_nocopy(off.back()); // == totalSize() + } + + globalIndex::gather(off, comm, procIDs, fld, allData, tag, commsType); + + if (master) + { + fld = std::move(allData); + } + else + { + fld.clear(); // zero-size on non-master } } +template<class ProcIDsContainer, class Type> +void Foam::globalIndex::gather +( + const label comm, + const ProcIDsContainer& procIDs, + const UList<Type>& fld, + List<Type>& allData, + const int tag, + UPstream::commsTypes commsType +) const +{ + if (!UPstream::is_parallel(comm)) + { + // Serial: (no-op) + return; + } + + if (UPstream::myProcNo(comm) == (procIDs.empty() ? 0 : procIDs[0])) + { + // presize => totalSize() + allData.resize_nocopy(offsets_.back()); + } + else + { + allData.clear(); // zero-size on non-master + } + + globalIndex::gather(offsets_, comm, procIDs, fld, allData, tag, commsType); +} + + // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // template<class Type> @@ -404,7 +478,7 @@ void Foam::globalIndex::gather const UList<Type>& sendData, List<Type>& allData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) const { @@ -415,6 +489,16 @@ void Foam::globalIndex::gather return; } + if (UPstream::master(comm)) + { + allData.resize_nocopy(offsets_.back()); // == totalSize() + } + else + { + allData.clear(); // zero-size on non-master + } + + if (!UPstream::usingNodeComms(comm)) { globalIndex::gather ( @@ -426,9 +510,61 @@ void Foam::globalIndex::gather tag, commsType ); - if (!UPstream::master(comm)) + } + else + { + // Using node-based hierarchy + + // Using comm-world and have node communication active + const auto interNodeComm = UPstream::commInterNode(); + const auto localNodeComm = UPstream::commLocalNode(); + + // Stage 0 : The inter-node/intra-node offsets + labelList interNodeOffsets; + labelList localNodeOffsets; + this->splitNodeOffsets(interNodeOffsets, localNodeOffsets, comm); + + // The first node re-uses the output (allData) when collecting + // content. All other nodes require temporary node-local storage. + + List<Type> tmpNodeData; + if (UPstream::is_subrank(interNodeComm)) + { + tmpNodeData.resize(localNodeOffsets.back()); + } + + List<Type>& nodeData = + ( + UPstream::master(interNodeComm) ? allData : tmpNodeData + ); + + // Stage 1 : Gather data within the node + { + globalIndex::gather + ( + localNodeOffsets, // (master only) + localNodeComm, + UPstream::allProcs(localNodeComm), + sendData, + nodeData, // node-local dest (or the allData parameter) + tag, + commsType + ); + } + + // Stage 2 : Gather data between nodes + if (UPstream::is_rank(interNodeComm)) { - allData.clear(); // safety: zero-size on non-master + globalIndex::gather + ( + interNodeOffsets, // (master only) + interNodeComm, + UPstream::allProcs(interNodeComm), + nodeData, + allData, + tag, + commsType + ); } } } @@ -440,7 +576,7 @@ void Foam::globalIndex::gather const IndirectListBase<Type, Addr>& sendData, List<Type>& allData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) const { @@ -450,6 +586,37 @@ void Foam::globalIndex::gather allData = sendData; return; } + else if constexpr (is_contiguous_v<Type>) + { + if (commsType == UPstream::commsTypes::nonBlocking) + { + // Contiguous data and requested nonBlocking. + // + // Flatten list (locally) so that we can benefit from using + // direct read/write of contiguous data + + List<Type> flattened(sendData); + + this->gather + ( + flattened, + allData, + tag, + commsType, + comm + ); + return; + } + } + + if (UPstream::master(comm)) + { + allData.resize_nocopy(offsets_.back()); // == totalSize() + } + else + { + allData.clear(); // zero-size on non-master + } { globalIndex::gather @@ -462,10 +629,6 @@ void Foam::globalIndex::gather tag, commsType ); - if (!UPstream::master(comm)) - { - allData.clear(); // safety: zero-size on non-master - } } } @@ -475,12 +638,12 @@ OutputContainer Foam::globalIndex::gather ( const UList<Type>& sendData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) const { OutputContainer allData; - gather(sendData, allData, tag, commsType, comm); + this->gather(sendData, allData, tag, commsType, comm); return allData; } @@ -490,12 +653,12 @@ OutputContainer Foam::globalIndex::gather ( const IndirectListBase<Type, Addr>& sendData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) const { OutputContainer allData; - gather(sendData, allData, tag, commsType, comm); + this->gather(sendData, allData, tag, commsType, comm); return allData; } @@ -505,18 +668,18 @@ void Foam::globalIndex::gatherInplace ( List<Type>& fld, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) const { if (UPstream::parRun()) { List<Type> allData; - gather(fld, allData, tag, commsType, comm); + this->gather(fld, allData, tag, commsType, comm); if (UPstream::master(comm)) { - fld.transfer(allData); + fld = std::move(allData); } else { @@ -533,8 +696,7 @@ void Foam::globalIndex::mpiGather const UList<Type>& sendData, OutputContainer& allData, const label comm, - - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const int tag ) const { @@ -696,8 +858,7 @@ OutputContainer Foam::globalIndex::mpiGather ( const UList<Type>& sendData, const label comm, - - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const int tag ) const { @@ -712,8 +873,7 @@ void Foam::globalIndex::mpiGatherInplace ( List<Type>& fld, const label comm, - - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const int tag ) const { @@ -724,7 +884,7 @@ void Foam::globalIndex::mpiGatherInplace if (UPstream::master(comm)) { - fld.transfer(allData); + fld = std::move(allData); } else { @@ -741,8 +901,7 @@ void Foam::globalIndex::mpiGatherOp const UList<Type>& sendData, OutputContainer& allData, const label comm, - - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const int tag ) { @@ -765,8 +924,7 @@ OutputContainer Foam::globalIndex::mpiGatherOp ( const UList<Type>& sendData, const label comm, - - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const int tag ) { @@ -781,8 +939,7 @@ void Foam::globalIndex::mpiGatherInplaceOp ( List<Type>& fld, const label comm, - - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const int tag ) { @@ -793,7 +950,7 @@ void Foam::globalIndex::mpiGatherInplaceOp if (UPstream::master(comm)) { - fld.transfer(allData); + fld = std::move(allData); } else { @@ -810,7 +967,7 @@ void Foam::globalIndex::gatherOp const UList<Type>& sendData, List<Type>& allData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) { @@ -834,7 +991,7 @@ void Foam::globalIndex::gatherOp const IndirectListBase<Type, Addr>& sendData, List<Type>& allData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) { @@ -857,7 +1014,7 @@ OutputContainer Foam::globalIndex::gatherOp ( const UList<Type>& sendData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) { @@ -872,7 +1029,7 @@ OutputContainer Foam::globalIndex::gatherOp ( const IndirectListBase<Type, Addr>& sendData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) { @@ -887,7 +1044,7 @@ void Foam::globalIndex::gatherInplaceOp ( List<Type>& fld, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) { @@ -910,33 +1067,28 @@ void Foam::globalIndex::scatter const UList<Type>& allFld, UList<Type>& fld, const int tag, - const UPstream::commsTypes preferredCommsType + UPstream::commsTypes commsType ) { // low-level: no parRun guard + const int masterProci = (procIDs.empty() ? 0 : procIDs[0]); - // Cannot use non-blocking for non-contiguous data. - const UPstream::commsTypes commsType = - ( - ( - !is_contiguous_v<Type> - && UPstream::commsTypes::nonBlocking == preferredCommsType - ) - ? UPstream::commsTypes::scheduled - : preferredCommsType - ); + // Cannot use non-blocking for non-contiguous data + if constexpr (!is_contiguous_v<Type>) + { + commsType = UPstream::commsTypes::scheduled; + } - const label startOfRequests = UPstream::nRequests(); - const int masterProci = procIDs.size() ? procIDs[0] : 0; + const label startOfRequests = UPstream::nRequests(); if (UPstream::myProcNo(comm) == masterProci) { for (label i = 1; i < procIDs.size(); ++i) { - const SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]); + const SubList<Type> slot(allFld, off[i+1]-off[i], off[i]); - if (procSlot.empty()) + if (slot.empty()) { // Nothing to do } @@ -946,14 +1098,14 @@ void Foam::globalIndex::scatter ( commsType, procIDs[i], - procSlot, + slot, tag, comm ); } else { - OPstream::send(procSlot, commsType, procIDs[i], tag, comm); + OPstream::send(slot, commsType, procIDs[i], tag, comm); } } @@ -962,8 +1114,15 @@ void Foam::globalIndex::scatter // Also handle the case where we have a slice of the full // list. - SubList<Type>(fld, off[1]-off[0]) = - SubList<Type>(allFld, off[1]-off[0], off[0]); + { + SubList<Type> dst(fld, off[1]-off[0]); + SubList<Type> src(allFld, off[1]-off[0], off[0]); + + if (!dst.empty() && (dst.data() != src.data())) + { + dst = src; + } + } } else { @@ -992,11 +1151,8 @@ void Foam::globalIndex::scatter } } - if (commsType == UPstream::commsTypes::nonBlocking) - { - // Wait for outstanding requests - UPstream::waitRequests(startOfRequests); - } + // Process sync + UPstream::waitRequests(startOfRequests); } @@ -1006,7 +1162,7 @@ void Foam::globalIndex::scatter const UList<Type>& allData, UList<Type>& localData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) const { @@ -1037,7 +1193,7 @@ OutputContainer Foam::globalIndex::scatter ( const UList<Type>& allData, const int tag, - const UPstream::commsTypes commsType, + UPstream::commsTypes commsType, const label comm ) const { @@ -1051,7 +1207,8 @@ OutputContainer Foam::globalIndex::scatter UPstream::listScatterValues<label>(this->localSizes(), comm) ); - OutputContainer localData(count); + OutputContainer localData; + localData.resize(count); this->scatter(allData, localData, tag, commsType, comm); return localData; diff --git a/src/Pstream/dummy/UPstream.C b/src/Pstream/dummy/UPstream.C index c935914db63e1da9dd74ba905e662133cac2dcf8..50cf97f6071b64310a52ffa9d3096d6dc810a1f4 100644 --- a/src/Pstream/dummy/UPstream.C +++ b/src/Pstream/dummy/UPstream.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2018 OpenFOAM Foundation - Copyright (C) 2016-2023 OpenCFD Ltd. + Copyright (C) 2016-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -55,6 +55,12 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) } +bool Foam::UPstream::setSharedMemoryCommunicators() +{ + return false; +} + + void Foam::UPstream::shutdown(int errNo) {} @@ -66,7 +72,7 @@ void Foam::UPstream::exit(int errNo) } -void Foam::UPstream::abort() +void Foam::UPstream::abort(int errNo) { // No MPI - just abort std::abort(); @@ -77,13 +83,29 @@ void Foam::UPstream::abort() void Foam::UPstream::allocateCommunicatorComponents ( - const label, - const label + const label parentIndex, + const label index +) +{} + + +void Foam::UPstream::dupCommunicatorComponents +( + const label parentIndex, + const label index +) +{} + +void Foam::UPstream::splitCommunicatorComponents +( + const label parentIndex, + const label index, + int colour ) {} -void Foam::UPstream::freeCommunicatorComponents(const label) +void Foam::UPstream::freeCommunicatorComponents(const label index) {} diff --git a/src/Pstream/mpi/PstreamGlobals.C b/src/Pstream/mpi/PstreamGlobals.C index 0da82d704363812d12a49c840dd31b8b8b69be70..e5383b722e0831fbb0c247de998666ef6e1d6ce0 100644 --- a/src/Pstream/mpi/PstreamGlobals.C +++ b/src/Pstream/mpi/PstreamGlobals.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2013-2015 OpenFOAM Foundation - Copyright (C) 2023 OpenCFD Ltd. + Copyright (C) 2023-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -35,22 +35,27 @@ Foam::DynamicList<MPI_Comm> Foam::PstreamGlobals::MPICommunicators_; Foam::DynamicList<MPI_Request> Foam::PstreamGlobals::outstandingRequests_; -// * * * * * * * * * * * * * * * Global Functions * * * * * * * * * * * * * // +// * * * * * * * * * * * * * * * Communicators * * * * * * * * * * * * * * * // -void Foam::PstreamGlobals::checkCommunicator -( - const label comm, - const label toProcNo -) +void Foam::PstreamGlobals::initCommunicator(const label index) { - if (comm < 0 || comm >= PstreamGlobals::MPICommunicators_.size()) + if (FOAM_UNLIKELY(index < 0 || index > MPICommunicators_.size())) { FatalErrorInFunction - << "toProcNo:" << toProcNo << " : illegal communicator " - << comm << nl - << "Communicator should be within range [0," - << PstreamGlobals::MPICommunicators_.size() - << ')' << abort(FatalError); + << "PstreamGlobals out of sync with UPstream data. Problem." + << Foam::abort(FatalError); + } + else if (index == MPICommunicators_.size()) + { + // Extend storage with null values + pendingMPIFree_.emplace_back(false); + MPICommunicators_.emplace_back(MPI_COMM_NULL); + } + else + { + // Init with null values + pendingMPIFree_[index] = false; + MPICommunicators_[index] = MPI_COMM_NULL; } } diff --git a/src/Pstream/mpi/PstreamGlobals.H b/src/Pstream/mpi/PstreamGlobals.H index c912c9876f217db227f439ed1bbe0b79a4c20ab2..48753956dbe44e773d32f7b648def8bf2744d034 100644 --- a/src/Pstream/mpi/PstreamGlobals.H +++ b/src/Pstream/mpi/PstreamGlobals.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2013-2015 OpenFOAM Foundation - Copyright (C) 2022-2023 OpenCFD Ltd. + Copyright (C) 2022-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -50,6 +50,8 @@ namespace Foam namespace PstreamGlobals { +// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * // + // Track if MPI_Comm_free is needed for communicator index in MPICommunicators_ extern DynamicList<bool> pendingMPIFree_; @@ -61,11 +63,27 @@ extern DynamicList<MPI_Comm> MPICommunicators_; extern DynamicList<MPI_Request> outstandingRequests_; -// * * * * * * * * * * * * * * * Global Functions * * * * * * * * * * * * * // +// * * * * * * * * * * * * * * * Communicators * * * * * * * * * * * * * * * // + +//- Initialize bookkeeping for MPI communicator index +void initCommunicator(const label index); + +//- Fatal if communicator is outside the allocated range +inline void checkCommunicator(int comm, int rank) +{ + if (FOAM_UNLIKELY(comm < 0 || comm >= MPICommunicators_.size())) + { + FatalErrorInFunction + << "rank:" << rank << " : illegal communicator " + << comm << nl + << "Communicator should be within range [0," + << MPICommunicators_.size() + << ')' << Foam::abort(FatalError); + } +} -//- Fatal if comm is outside the allocated range -void checkCommunicator(const label comm, const label toProcNo); +// * * * * * * * * * * * * * * * * Requests * * * * * * * * * * * * * * * * // //- Reset UPstream::Request to null and/or the index of the outstanding //- request to -1. diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C index 5314d8027aff1e82f0525f6ba06b3d3ea03cd8c2..91391c8442d246a6664ed7394f95132960fa744a 100644 --- a/src/Pstream/mpi/UPstream.C +++ b/src/Pstream/mpi/UPstream.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2016-2024 OpenCFD Ltd. + Copyright (C) 2016-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -26,14 +26,13 @@ License \*---------------------------------------------------------------------------*/ -#include "Pstream.H" -#include "PstreamReduceOps.H" +#include "UPstream.H" #include "PstreamGlobals.H" #include "profilingPstream.H" -#include "int.H" #include "UPstreamWrapping.H" #include "collatedFileOperation.H" +#include <algorithm> #include <cstdlib> #include <cstring> #include <memory> @@ -197,9 +196,8 @@ bool Foam::UPstream::initNull() bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) { - int numprocs = 0, myRank = 0; - int provided_thread_support = 0; int flag = 0; + int provided_thread_support = 0; MPI_Finalized(&flag); if (flag) @@ -231,19 +229,25 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) { Perr<< "UPstream::init : was already initialized\n"; } + + MPI_Query_thread(&provided_thread_support); } else { + // (SINGLE | FUNNELED | SERIALIZED | MULTIPLE) + int required_thread_support = + ( + needsThread + ? MPI_THREAD_MULTIPLE + : MPI_THREAD_SINGLE + ); + MPI_Init_thread ( &argc, &argv, - ( - needsThread - ? MPI_THREAD_MULTIPLE - : MPI_THREAD_SINGLE - ), - &provided_thread_support + required_thread_support, + &provided_thread_support ); ourMpi = true; @@ -251,26 +255,26 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) // Check argument list for local world label worldIndex = -1; - word world; for (int argi = 1; argi < argc; ++argi) { if (strcmp(argv[argi], "-world") == 0) { - worldIndex = argi++; - if (argi >= argc) + worldIndex = argi; + if (argi+1 >= argc) { FatalErrorInFunction - << "Missing world name to argument \"world\"" + << "Missing world name for option '-world'" << nl << Foam::abort(FatalError); } - world = argv[argi]; break; } } - // Filter 'world' option + // Extract world name and filter out '-world <name>' from argv list + word worldName; if (worldIndex != -1) { + worldName = argv[worldIndex+1]; for (label i = worldIndex+2; i < argc; i++) { argv[i-2] = argv[i]; @@ -278,14 +282,15 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) argc -= 2; } - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myRank); + int numProcs = 0, globalRanki = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &globalRanki); + MPI_Comm_size(MPI_COMM_WORLD, &numProcs); if (UPstream::debug) { Perr<< "UPstream::init :" << " thread-support : requested:" << needsThread - << " obtained:" + << " provided:" << ( (provided_thread_support == MPI_THREAD_SINGLE) ? "SINGLE" @@ -295,12 +300,12 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) ? "MULTIPLE" : "other" ) - << " procs:" << numprocs - << " rank:" << myRank - << " world:" << world << endl; + << " procs:" << numProcs + << " rank:" << globalRanki + << " world:" << worldName << endl; } - if (worldIndex == -1 && numprocs <= 1) + if (worldIndex == -1 && numProcs <= 1) { FatalErrorInFunction << "attempt to run parallel on 1 processor" @@ -308,46 +313,78 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) } // Initialise parallel structure - setParRun(numprocs, provided_thread_support == MPI_THREAD_MULTIPLE); + setParRun(numProcs, provided_thread_support == MPI_THREAD_MULTIPLE); if (worldIndex != -1) { + // Using local worlds. // During startup, so commWorld() == commGlobal() + const auto mpiGlobalComm = + PstreamGlobals::MPICommunicators_[UPstream::commGlobal()]; - wordList worlds(numprocs); - worlds[UPstream::myProcNo(UPstream::commGlobal())] = world; - Pstream::gatherList - ( - worlds, - UPstream::msgType(), - UPstream::commGlobal() - ); + // Gather the names of all worlds and determine unique names/indices. + // + // Minimize communication and use low-level MPI to relying on any + // OpenFOAM structures which not yet have been created - // Compact - if (UPstream::master(UPstream::commGlobal())) { - DynamicList<word> worldNames(numprocs); - worldIDs_.resize_nocopy(numprocs); + // Include a trailing nul character in the lengths + int stride = int(worldName.size()) + 1; + + // Use identical size on all ranks (avoids MPI_Allgatherv) + MPI_Allreduce + ( + MPI_IN_PLACE, + &stride, + 1, + MPI_INT, + MPI_MAX, + mpiGlobalComm + ); - forAll(worlds, proci) + // Gather as an extended C-string with embedded nul characters + auto buffer_storage = std::make_unique<char[]>(numProcs*stride); + char* allStrings = buffer_storage.get(); + + // Fill in local value, slot starts at (rank*stride) { - const word& world = worlds[proci]; + char* slot = (allStrings + (globalRanki*stride)); + std::fill_n(slot, stride, '\0'); + std::copy_n(worldName.data(), worldName.size(), slot); + } - worldIDs_[proci] = worldNames.find(world); + // Gather everything into the extended C-string + MPI_Allgather + ( + MPI_IN_PLACE, 0, MPI_CHAR, + allStrings, stride, MPI_CHAR, + mpiGlobalComm + ); + + worldIDs_.resize_nocopy(numProcs); + + // Transcribe and compact (unique world names) + DynamicList<word> uniqWorlds(numProcs); + + for (label proci = 0; proci < numProcs; ++proci) + { + // Create from C-string at slot=(rank*stride), + // relying on the embedded nul chars + word world(allStrings + (proci*stride)); + + worldIDs_[proci] = uniqWorlds.find(world); if (worldIDs_[proci] == -1) { - worldIDs_[proci] = worldNames.size(); - worldNames.push_back(world); + worldIDs_[proci] = uniqWorlds.size(); + uniqWorlds.push_back(std::move(world)); } } - allWorlds_.transfer(worldNames); + allWorlds_ = std::move(uniqWorlds); } - Pstream::broadcasts(UPstream::commGlobal(), allWorlds_, worldIDs_); - const label myWorldId = - worldIDs_[UPstream::myProcNo(UPstream::commGlobal())]; + const label myWorldId = worldIDs_[globalRanki]; DynamicList<label> subRanks; forAll(worldIDs_, proci) @@ -358,54 +395,107 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread) } } - // Allocate new communicator with comm-global as its parent - const label subComm = - UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks); + // New local-world communicator with comm-global as its parent. + // - the updated (const) world comm does not change after this. + + UPstream::constWorldComm_ = + UPstream::newCommunicator(UPstream::commGlobal(), subRanks); + UPstream::worldComm = UPstream::constWorldComm_; + UPstream::warnComm = UPstream::constWorldComm_; - // Override worldComm - UPstream::worldComm = subComm; - // For testing: warn use of non-worldComm - UPstream::warnComm = UPstream::worldComm; + const int worldRanki = UPstream::myProcNo(UPstream::constWorldComm_); // MPI_COMM_SELF : the processor number wrt the new world communicator if (procIDs_[UPstream::commSelf()].size()) { - procIDs_[UPstream::commSelf()].front() = - UPstream::myProcNo(subComm); + procIDs_[UPstream::commSelf()].front() = worldRanki; + } + + // Name the old world communicator as '<openfoam:global>' + // - it is the inter-world communicator + if (MPI_COMM_NULL != mpiGlobalComm) + { + MPI_Comm_set_name(mpiGlobalComm, "<openfoam:global>"); + } + + const auto mpiWorldComm = + PstreamGlobals::MPICommunicators_[UPstream::constWorldComm_]; + + if (MPI_COMM_NULL != mpiWorldComm) + { + MPI_Comm_set_name(mpiWorldComm, ("world=" + worldName).data()); } if (UPstream::debug) { // Check - int subNumProcs, subRank; - MPI_Comm_size - ( - PstreamGlobals::MPICommunicators_[subComm], - &subNumProcs - ); - MPI_Comm_rank - ( - PstreamGlobals::MPICommunicators_[subComm], - &subRank - ); + int newRanki, newSize; + MPI_Comm_rank(mpiWorldComm, &newRanki); + MPI_Comm_size(mpiWorldComm, &newSize); - Perr<< "UPstream::init : in world:" << world - << " using local communicator:" << subComm - << " rank " << subRank - << " of " << subNumProcs - << endl; + Perr<< "UPstream::init : in world:" << worldName + << " using local communicator:" << constWorldComm_ + << " rank " << newRanki << " of " << newSize << endl; } // Override Pout prefix (move to setParRun?) - Pout.prefix() = '[' + world + '/' + name(myProcNo(subComm)) + "] "; + Pout.prefix() = '[' + worldName + '/' + Foam::name(worldRanki) + "] "; Perr.prefix() = Pout.prefix(); } else { // All processors use world 0 - worldIDs_.resize_nocopy(numprocs); + worldIDs_.resize_nocopy(numProcs); worldIDs_ = 0; + + const auto mpiWorldComm = + PstreamGlobals::MPICommunicators_[UPstream::constWorldComm_]; + + // Name the world communicator as '<openfoam:world>' + if (MPI_COMM_NULL != mpiWorldComm) + { + MPI_Comm_set_name(mpiWorldComm, "<openfoam:world>"); + } + } + + + // Define inter-node and intra-node communicators + if (UPstream::nodeCommsControl_ >= 4) + { + // Debugging: split with given number per node + setHostCommunicators(UPstream::nodeCommsControl_); + } + #ifndef MSMPI_VER /* Uncertain if this would work with MSMPI */ + else if (UPstream::nodeCommsControl_ == 2) + { + // Defined based on shared-memory hardware information + setSharedMemoryCommunicators(); + } + #endif + else + { + // Defined based on hostname, even if nominally disabled + setHostCommunicators(); + } + + + // Provide some names for these communicators + if (MPI_COMM_NULL != PstreamGlobals::MPICommunicators_[commInterNode_]) + { + MPI_Comm_set_name + ( + PstreamGlobals::MPICommunicators_[commInterNode_], + "<openfoam:inter-node>" + ); + } + if (MPI_COMM_NULL != PstreamGlobals::MPICommunicators_[commLocalNode_]) + { + MPI_Comm_set_name + ( + PstreamGlobals::MPICommunicators_[commLocalNode_], + "<openfoam:local-node>" + ); } attachOurBuffers(); @@ -455,7 +545,7 @@ void Foam::UPstream::shutdown(int errNo) if (errNo != 0) { - MPI_Abort(MPI_COMM_WORLD, errNo); + UPstream::abort(errNo); return; } @@ -515,9 +605,26 @@ void Foam::UPstream::exit(int errNo) } -void Foam::UPstream::abort() +void Foam::UPstream::abort(int errNo) { - MPI_Abort(MPI_COMM_WORLD, 1); + // TBD: only abort on our own communicator? + #if 0 + MPI_Comm abortComm = MPI_COMM_WORLD; + + const label index = UPstream::commGlobal(); + + if (index > 0 && index < PstreamGlobals::MPICommunicators_.size()) + { + abortComm = PstreamGlobals::MPICommunicators_[index]; + if (MPI_COMM_NULL == abortComm) + { + abortComm = MPI_COMM_WORLD; + } + } + MPI_Abort(abortComm, errNo); + #endif + + MPI_Abort(MPI_COMM_WORLD, errNo); } @@ -529,19 +636,9 @@ void Foam::UPstream::allocateCommunicatorComponents const label index ) { - if (index == PstreamGlobals::MPICommunicators_.size()) - { - // Extend storage with null values - PstreamGlobals::pendingMPIFree_.emplace_back(false); - PstreamGlobals::MPICommunicators_.emplace_back(MPI_COMM_NULL); - } - else if (index > PstreamGlobals::MPICommunicators_.size()) - { - FatalErrorInFunction - << "PstreamGlobals out of sync with UPstream data. Problem." - << Foam::exit(FatalError); - } + PstreamGlobals::initCommunicator(index); + int returnCode = MPI_SUCCESS; if (parentIndex == -1) { @@ -554,27 +651,19 @@ void Foam::UPstream::allocateCommunicatorComponents << UPstream::commGlobal() << Foam::exit(FatalError); } + auto& mpiNewComm = PstreamGlobals::MPICommunicators_[index]; - PstreamGlobals::pendingMPIFree_[index] = false; - PstreamGlobals::MPICommunicators_[index] = MPI_COMM_WORLD; + // PstreamGlobals::pendingMPIFree_[index] = false; + // PstreamGlobals::MPICommunicators_[index] = MPI_COMM_WORLD; - // TBD: MPI_Comm_dup(MPI_COMM_WORLD, ...); - // with pendingMPIFree_[index] = true - // Note: freeCommunicatorComponents() may need an update + PstreamGlobals::pendingMPIFree_[index] = true; + MPI_Comm_dup(MPI_COMM_WORLD, &mpiNewComm); - MPI_Comm_rank - ( - PstreamGlobals::MPICommunicators_[index], - &myProcNo_[index] - ); + MPI_Comm_rank(mpiNewComm, &myProcNo_[index]); // Set the number of ranks to the actual number - int numProcs; - MPI_Comm_size - ( - PstreamGlobals::MPICommunicators_[index], - &numProcs - ); + int numProcs = 0; + MPI_Comm_size(mpiNewComm, &numProcs); // identity [0-numProcs], as 'int' procIDs_[index].resize_nocopy(numProcs); @@ -589,21 +678,6 @@ void Foam::UPstream::allocateCommunicatorComponents MPI_Comm_rank(MPI_COMM_SELF, &myProcNo_[index]); - // Number of ranks is always 1 (self communicator) - - #ifdef FULLDEBUG - int numProcs; - MPI_Comm_size(MPI_COMM_SELF, &numProcs); - - if (numProcs != 1) - { - // Already finalized - this is an error - FatalErrorInFunction - << "MPI_COMM_SELF had " << numProcs << " != 1 ranks!\n" - << Foam::abort(FatalError); - } - #endif - // For MPI_COMM_SELF : the process IDs within the world communicator. // Uses MPI_COMM_WORLD in case called before UPstream::commGlobal() // was initialized @@ -613,17 +687,20 @@ void Foam::UPstream::allocateCommunicatorComponents } else { - // General sub-communicator + // General sub-communicator. + // Create based on the groupings predefined by procIDs_ + + const auto mpiParentComm = + PstreamGlobals::MPICommunicators_[parentIndex]; + + auto& mpiNewComm = + PstreamGlobals::MPICommunicators_[index]; PstreamGlobals::pendingMPIFree_[index] = true; // Starting from parent MPI_Group parent_group; - MPI_Comm_group - ( - PstreamGlobals::MPICommunicators_[parentIndex], - &parent_group - ); + MPI_Comm_group(mpiParentComm, &parent_group); MPI_Group active_group; MPI_Group_incl @@ -638,18 +715,18 @@ void Foam::UPstream::allocateCommunicatorComponents // ms-mpi (10.0 and others?) does not have MPI_Comm_create_group MPI_Comm_create ( - PstreamGlobals::MPICommunicators_[parentIndex], + mpiParentComm, active_group, - &PstreamGlobals::MPICommunicators_[index] + &mpiNewComm ); #else // Create new communicator for this group MPI_Comm_create_group ( - PstreamGlobals::MPICommunicators_[parentIndex], + mpiParentComm, active_group, UPstream::msgType(), - &PstreamGlobals::MPICommunicators_[index] + &mpiNewComm ); #endif @@ -657,27 +734,34 @@ void Foam::UPstream::allocateCommunicatorComponents MPI_Group_free(&parent_group); MPI_Group_free(&active_group); - if (PstreamGlobals::MPICommunicators_[index] == MPI_COMM_NULL) + if (MPI_COMM_NULL == mpiNewComm) { - // No communicator created + // This process is not involved in the new communication pattern myProcNo_[index] = -1; PstreamGlobals::pendingMPIFree_[index] = false; + + // ~~~~~~~~~ + // IMPORTANT + // ~~~~~~~~~ + // Always retain knowledge of the inter-node leaders, + // even if this process is not on that communicator. + // This will help when constructing topology-aware communication. + + if (index != commInterNode_) + { + procIDs_[index].clear(); + } } else { - if - ( - MPI_Comm_rank - ( - PstreamGlobals::MPICommunicators_[index], - &myProcNo_[index] - ) - ) + returnCode = MPI_Comm_rank(mpiNewComm, &myProcNo_[index]); + + if (FOAM_UNLIKELY(MPI_SUCCESS != returnCode)) { FatalErrorInFunction << "Problem :" << " when allocating communicator at " << index - << " from ranks " << procIDs_[index] + << " from ranks " << flatOutput(procIDs_[index]) << " of parent " << parentIndex << " cannot find my own rank" << Foam::exit(FatalError); @@ -687,6 +771,99 @@ void Foam::UPstream::allocateCommunicatorComponents } +void Foam::UPstream::dupCommunicatorComponents +( + const label parentIndex, + const label index +) +{ + PstreamGlobals::initCommunicator(index); + + PstreamGlobals::pendingMPIFree_[index] = true; + MPI_Comm_dup + ( + PstreamGlobals::MPICommunicators_[parentIndex], + &PstreamGlobals::MPICommunicators_[index] + ); + + myProcNo_[index] = myProcNo_[parentIndex]; + procIDs_[index] = procIDs_[parentIndex]; +} + + +void Foam::UPstream::splitCommunicatorComponents +( + const label parentIndex, + const label index, + int colour +) +{ + PstreamGlobals::initCommunicator(index); + + // ------------------------------------------------------------------------ + // Create sub-communicator according to its colouring + // => MPI_Comm_split(). + // Since other parts of OpenFOAM may still need a view of the siblings: + // => MPI_Group_translate_ranks(). + // + // The MPI_Group_translate_ranks() step can be replaced with an + // MPI_Allgather() of the involved parent ranks (since we alway maintain + // the relative rank order when splitting). + // + // Since MPI_Comm_split() already does an MPI_Allgather() internally + // to pick out the colours (and do any sorting), we can simply to + // do the same thing: + // + // Do the Allgather first and pickout identical colours to define the + // group and create a communicator based on that. + // + // This is no worse than the Allgather communication overhead of using + // MPI_Comm_split() and saves the extra translate_ranks step. + // ------------------------------------------------------------------------ + + const auto mpiParentComm = PstreamGlobals::MPICommunicators_[parentIndex]; + + int parentRank = 0; + int parentSize = 0; + MPI_Comm_rank(mpiParentComm, &parentRank); + MPI_Comm_size(mpiParentComm, &parentSize); + + // Initialize, first marking the 'procIDs_' with the colours + auto& procIds = procIDs_[index]; + + myProcNo_[index] = -1; + procIds.resize_nocopy(parentSize); + procIds[parentRank] = colour; + + MPI_Allgather + ( + MPI_IN_PLACE, 0, MPI_INT, + procIds.data(), 1, MPI_INT, + mpiParentComm + ); + + if (colour < 0) + { + procIds.clear(); + } + else + { + auto last = + std::copy_if + ( + procIds.cbegin(), + procIds.cend(), + procIds.begin(), + [=](int c){ return (c == colour); } + ); + + procIds.resize(std::distance(procIds.begin(), last)); + } + + allocateCommunicatorComponents(parentIndex, index); +} + + void Foam::UPstream::freeCommunicatorComponents(const label index) { if (UPstream::debug) @@ -717,6 +894,164 @@ void Foam::UPstream::freeCommunicatorComponents(const label index) } +bool Foam::UPstream::setSharedMemoryCommunicators() +{ + // Uses the world communicator (not global communicator) + + // Skip if non-parallel + if (!UPstream::parRun()) + { + numNodes_ = 1; + return false; + } + + if (FOAM_UNLIKELY(commInterNode_ >= 0 || commLocalNode_ >= 0)) + { + // Failed sanity check + FatalErrorInFunction + << "Node communicator(s) already created!" << endl + << Foam::abort(FatalError); + return false; + } + + commInterNode_ = getAvailableCommIndex(constWorldComm_); + commLocalNode_ = getAvailableCommIndex(constWorldComm_); + + PstreamGlobals::initCommunicator(commInterNode_); + PstreamGlobals::initCommunicator(commLocalNode_); + + // Overwritten later + myProcNo_[commInterNode_] = UPstream::masterNo(); + myProcNo_[commLocalNode_] = UPstream::masterNo(); + + // Sorted order, purely cosmetic + if (commLocalNode_ < commInterNode_) + { + std::swap(commLocalNode_, commInterNode_); + } + + if (debug) + { + Perr<< "Allocating node communicators " + << commInterNode_ << ", " << commLocalNode_ << nl + << " parent : " << constWorldComm_ << nl + << endl; + } + + + const auto mpiParentComm = + PstreamGlobals::MPICommunicators_[constWorldComm_]; + + auto& mpiLocalNode = + PstreamGlobals::MPICommunicators_[commLocalNode_]; + + int parentRank = 0; + int parentSize = 0; + MPI_Comm_rank(mpiParentComm, &parentRank); + MPI_Comm_size(mpiParentComm, &parentSize); + + List<int> nodeLeaders(parentSize); + nodeLeaders = -1; + + MPI_Comm_split_type + ( + mpiParentComm, + MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, + &mpiLocalNode + ); + + if (FOAM_UNLIKELY(MPI_COMM_NULL == mpiLocalNode)) + { + // This process is not involved in an intra-host communication? + // - should never happen! + + const label index = commLocalNode_; + PstreamGlobals::pendingMPIFree_[index] = false; + + myProcNo_[index] = -1; + procIDs_[index].clear(); + + FatalErrorInFunction + << "Comm_split_type(shared) failed\n" + << Foam::abort(FatalError); + } + else + { + // This process is involved in intra-host communication + const label index = commLocalNode_; + auto& procIds = procIDs_[index]; + + PstreamGlobals::pendingMPIFree_[index] = true; + + int localRank = 0; + int localSize = 0; + MPI_Comm_rank(mpiLocalNode, &localRank); + MPI_Comm_size(mpiLocalNode, &localSize); + + if (localRank == 0) + { + // This process is a host leader - mark its position + nodeLeaders[parentRank] = parentRank; + } + + procIds.resize_nocopy(localSize); + procIds[localRank] = UPstream::myProcNo(UPstream::constWorldComm_); + // OR: procIds[localRank] = parentRank; + + // Get all of the siblings (within the node) + MPI_Allgather + ( + MPI_IN_PLACE, 0, MPI_INT, + procIds.data(), 1, MPI_INT, + mpiLocalNode + ); + } + + + // Get all of the host-leader information and find who they are. + { + auto& procIds = procIDs_[commInterNode_]; + + MPI_Allgather + ( + MPI_IN_PLACE, 0, MPI_INT, + nodeLeaders.data(), 1, MPI_INT, + mpiParentComm + ); + + // Capture the size (number of nodes) before doing anything further + numNodes_ = std::count_if + ( + nodeLeaders.cbegin(), + nodeLeaders.cend(), + [](int rank){ return (rank >= 0); } + ); + + // ~~~~~~~~~ + // IMPORTANT + // ~~~~~~~~~ + // Always retain knowledge of the inter-node leaders, + // even if this process is not on that communicator. + // This will help when constructing topology-aware communication. + + procIds.resize_nocopy(numNodes_); + + std::copy_if + ( + nodeLeaders.cbegin(), + nodeLeaders.cend(), + procIds.begin(), + [](int rank){ return (rank >= 0); } + ); + } + + // From master to host-leader. Ranks between hosts. + allocateCommunicatorComponents(UPstream::worldComm, commInterNode_); + + return true; +} + + void Foam::UPstream::barrier(const label communicator, UPstream::Request* req) { // No-op for non-parallel or not on communicator diff --git a/src/functionObjects/field/AMIWeights/AMIWeights.C b/src/functionObjects/field/AMIWeights/AMIWeights.C index 004853d8f517f3f0ce5fb2414e0f0210b945133b..7ec095d55dc73c200ca2fe31597826e1c4e97b6c 100644 --- a/src/functionObjects/field/AMIWeights/AMIWeights.C +++ b/src/functionObjects/field/AMIWeights/AMIWeights.C @@ -238,8 +238,7 @@ void Foam::functionObjects::AMIWeights::writeWeightField ); // Collect field - scalarField mergedWeights; - globalFaces().gather(weightSum, mergedWeights); + scalarField mergedWeights = globalFaces().gather(weightSum); const bool isACMI = isA<cyclicACMIPolyPatch>(cpp); @@ -248,7 +247,7 @@ void Foam::functionObjects::AMIWeights::writeWeightField { const cyclicACMIPolyPatch& pp = refCast<const cyclicACMIPolyPatch>(cpp); - globalFaces().gather(pp.mask(), mergedMask); + mergedMask = globalFaces().gather(pp.mask()); } if (Pstream::master()) diff --git a/src/meshTools/multiWorld/multiWorldConnectionsObject.C b/src/meshTools/multiWorld/multiWorldConnectionsObject.C index caa05034311033daf93bf28f3ab78a446d49f24e..d10def446416d4ffe80b5f986926c14be6ba9f41 100644 --- a/src/meshTools/multiWorld/multiWorldConnectionsObject.C +++ b/src/meshTools/multiWorld/multiWorldConnectionsObject.C @@ -162,7 +162,7 @@ Foam::label Foam::multiWorldConnections::createCommunicator(const edge& worlds) } // Allocate new communicator with global world - comm = UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks); + comm = UPstream::newCommunicator(UPstream::commGlobal(), subRanks); if (debug & 2) { diff --git a/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C b/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C index bdb63f152e13184d6d624babf7be5be013612aa8..888e4dbf49c9a74f3b31694e56fba2d7639b86bd 100644 --- a/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C +++ b/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2016 OpenFOAM Foundation - Copyright (C) 2015-2024 OpenCFD Ltd. + Copyright (C) 2015-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -2441,12 +2441,7 @@ void Foam::distributedTriSurfaceMesh::independentlyDistributedBbs // // Gather all borderTris // //globalIndex globalBorderTris(borderTris.size()); // //pointField globalBorderCentres(allCentres, borderTris); -// //globalBorderTris.gather -// //( -// // UPstream::worldComm, -// // UPstream::allProcs(UPstream::worldComm), -// // globalBorderCentres -// //); +// //globalBorderTris.gatherInplace(globalBorderCentres); // pointField globalBorderCentres(allCentres); // map.distribute(globalBorderCentres); // @@ -2586,12 +2581,7 @@ void Foam::distributedTriSurfaceMesh::independentlyDistributedBbs { allCentres[trii] = s[trii].centre(s.points()); } - globalTris().gather - ( - UPstream::worldComm, - UPstream::allProcs(UPstream::worldComm), - allCentres - ); + globalTris().gatherInplace(allCentres); } // Determine local decomposition @@ -2635,13 +2625,8 @@ void Foam::distributedTriSurfaceMesh::independentlyDistributedBbs } // Scatter back to processors - globalTris().scatter - ( - UPstream::worldComm, - UPstream::allProcs(UPstream::worldComm), - allDistribution, - distribution - ); + globalTris().scatter(allDistribution, distribution); + if (debug) { Pout<< "distributedTriSurfaceMesh::" diff --git a/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C b/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C index fa4473ee4a6486f2e523963d077aa83d9b602051..d92f86fec018e5615ce1189b3ef465869e386324 100644 --- a/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C +++ b/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2015-2023 OpenCFD Ltd. + Copyright (C) 2015-2025 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -274,12 +274,9 @@ scalar surfaceNoise::surfaceAverage if (Pstream::parRun()) { // Collect the surface data so that we can output the surfaces - scalarField allData; - - procFaceAddr.gather + scalarField allData = procFaceAddr.gather ( data, - allData, UPstream::msgType(), commType_, UPstream::worldComm @@ -343,12 +340,9 @@ scalar surfaceNoise::writeSurfaceData if (Pstream::parRun()) { // Collect the surface data so that we can output the surfaces - scalarField allData; - - procFaceAddr.gather + scalarField allData = procFaceAddr.gather ( data, - allData, UPstream::msgType(), commType_, UPstream::worldComm