diff --git a/applications/test/parallel-comm2/Make/options b/applications/test/parallel-comm2/Make/options index 18e6fe47afacb902cddccf82632772447704fd88..b3a33caee1ef96d0e2c13a668b2c0a1aa142e41c 100644 --- a/applications/test/parallel-comm2/Make/options +++ b/applications/test/parallel-comm2/Make/options @@ -1,2 +1,4 @@ -/* EXE_INC = */ -/* EXE_LIBS = */ +include $(GENERAL_RULES)/mpi-rules + +EXE_INC = $(PFLAGS) $(PINC) $(c++LESSWARN) +EXE_LIBS = $(PLIBS) diff --git a/applications/test/parallel-comm2/Test-parallel-comm2.C b/applications/test/parallel-comm2/Test-parallel-comm2.C index 934a480c20eb7fd9d541342f22d4ab6baaf8a075..a397c9908eb0d7e0cac16b6b7c64590623c0ec7d 100644 --- a/applications/test/parallel-comm2/Test-parallel-comm2.C +++ b/applications/test/parallel-comm2/Test-parallel-comm2.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -39,6 +39,7 @@ Description #include "Tuple2.H" #include "IOstreams.H" #include "PstreamReduceOps.H" +#include <mpi.h> using namespace Foam; @@ -62,6 +63,8 @@ int main(int argc, char *argv[]) argList::noBanner(); argList::noCheckProcessorDirectories(); argList::addBoolOption("verbose", "Set debug level"); + argList::addBoolOption("comm-split", "Test simple comm split"); + argList::addBoolOption("host-comm", "Test DIY host-comm split"); // Capture manually. We need values before proper startup int nVerbose = 0; @@ -139,6 +142,91 @@ int main(int argc, char *argv[]) Pout<< endl; #endif + if (Pstream::parRun() && args.found("comm-split")) + { + MPI_Comm hostComm; + MPI_Comm_split_type + ( + MPI_COMM_WORLD, + MPI_COMM_TYPE_SHARED, // OMPI_COMM_TYPE_NODE + 0, MPI_INFO_NULL, &hostComm + ); + + int host_nprocs = 0; + int host_rank = 0; + MPI_Comm_size(hostComm, &host_nprocs); + MPI_Comm_rank(hostComm, &host_rank); + + Pout<< nl << "Host comm with " + << host_rank << " / " << host_nprocs + << " (using MPI_Comm_split_type)" << endl; + + MPI_Comm_free(&hostComm); + } + if (Pstream::parRun() && args.found("host-comm")) + { + // Host communicator, based on the current worldComm + // Use hostname + // Lowest rank per hostname is the IO rank + + label numprocs = UPstream::nProcs(UPstream::globalComm); + + stringList hosts(numprocs); + hosts[Pstream::myProcNo(UPstream::globalComm)] = hostName(); + + labelList hostIDs_; + + // Compact + if (Pstream::master(UPstream::globalComm)) + { + DynamicList<word> hostNames(numprocs); + hostIDs_.resize_nocopy(numprocs); + + forAll(hosts, proci) + { + const word& host = hosts[proci]; + + hostIDs_[proci] = hostNames.find(host); + + if (hostIDs_[proci] == -1) + { + hostIDs_[proci] = hostNames.size(); + hostNames.push_back(host); + } + } + } + + Pstream::broadcasts(UPstream::globalComm, hostIDs_); + + const label myHostId = + hostIDs_[Pstream::myProcNo(UPstream::globalComm)]; + + DynamicList<label> subRanks; + forAll(hostIDs_, proci) + { + if (hostIDs_[proci] == myHostId) + { + subRanks.push_back(proci); + } + } + + // Allocate new communicator with globalComm as its parent + const label hostComm = + UPstream::allocateCommunicator + ( + UPstream::globalComm, // parent + subRanks, + true + ); + + Pout<< nl << "Host comm with " + << UPstream::myProcNo(hostComm) + << " / " << UPstream::nProcs(hostComm) + << nl; + + UPstream::freeCommunicator(hostComm, true); + } + Info<< "\nEnd\n" << endl; return 0; diff --git a/applications/test/parallel-comm3a/Make/files b/applications/test/parallel-comm3a/Make/files new file mode 100644 index 0000000000000000000000000000000000000000..31e265a30c10c91309b8fcede503165af346237c --- /dev/null +++ b/applications/test/parallel-comm3a/Make/files @@ -0,0 +1,3 @@ +Test-parallel-comm3a.C + +EXE = $(FOAM_USER_APPBIN)/Test-parallel-comm3a diff --git a/applications/test/parallel-comm3a/Make/options b/applications/test/parallel-comm3a/Make/options new file mode 100644 index 0000000000000000000000000000000000000000..b3a33caee1ef96d0e2c13a668b2c0a1aa142e41c --- /dev/null +++ b/applications/test/parallel-comm3a/Make/options @@ -0,0 +1,4 @@ +include $(GENERAL_RULES)/mpi-rules + +EXE_INC = $(PFLAGS) $(PINC) $(c++LESSWARN) +EXE_LIBS = $(PLIBS) diff --git a/applications/test/parallel-comm3a/Test-parallel-comm3a.C b/applications/test/parallel-comm3a/Test-parallel-comm3a.C new file mode 100644 index 0000000000000000000000000000000000000000..98e1577c7342f7b92cfa0a111bd42fe3b6a225d0 --- /dev/null +++ b/applications/test/parallel-comm3a/Test-parallel-comm3a.C @@ -0,0 +1,249 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | www.openfoam.com + \\/ M anipulation | +------------------------------------------------------------------------------- + Copyright (C) 2023 OpenCFD Ltd. +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. + +Application + Test-parallel-comm3a + +Description + Basic communicator tests + +\*---------------------------------------------------------------------------*/ + +#include "argList.H" +#include "Time.H" +#include "IPstream.H" +#include "OPstream.H" +#include "Pair.H" +#include "Tuple2.H" +#include "IOstreams.H" +#include "StringStream.H" +#include "Random.H" +#include <mpi.h> + +using namespace Foam; + + +void printRequests(const UList<MPI_Request>& requests) +{ + OStringStream buf; + + buf << "request: " << requests.size() << '('; + + for (const auto& req : requests) + { + if (req == MPI_REQUEST_NULL) + { + buf << " null"; + } + else + { + buf << " " << Foam::name(req); + } + } + + buf << " )"; + Pout << buf.str().c_str() << endl; +} + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +int main(int argc, char *argv[]) +{ + argList::noBanner(); + argList::noCheckProcessorDirectories(); + + #include "setRootCase.H" + + if (!Pstream::parRun()) + { + Info<< "\nWarning: not parallel - skipping further tests\n" << endl; + return 0; + } + + const int tag = (UPstream::msgType() + 314159); + // const label comm = UPstream::worldComm; + + Random rnd(20*UPstream::myProcNo()); + + Map<DynamicList<char>> sendBufs; + Map<DynamicList<char>> recvBufs; + + DynamicList<MPI_Request> sendRequests(10); + DynamicList<MPI_Request> recvRequests(10); + + + if (!Pstream::master()) + { + // Send some random length to master + + const int toProci = UPstream::masterNo(); + + label len = rnd.position<label>(10, 20); + if (UPstream::myProcNo() && (UPstream::myProcNo() % 3) == 0) len = 0; + + // Has data to send + if (len) + { + auto& buf = sendBufs(toProci); + buf.resize(len, 'x'); + + MPI_Issend + ( + buf.cdata_bytes(), + buf.size_bytes(), + MPI_BYTE, + toProci, + tag, + MPI_COMM_WORLD, + &sendRequests.emplace_back() + ); + } + } + + + // Probe and receive + + MPI_Request barrierReq; + + for (bool barrier_active = false, done = false; !done; /*nil*/) + { + int flag = 0; + MPI_Status status; + + MPI_Iprobe + ( + MPI_ANY_SOURCE, + tag, + MPI_COMM_WORLD, + &flag, + &status + ); + + if (flag) + { + // Message found, receive into dest buffer location + const label fromProci = status.MPI_SOURCE; + + int count = 0; + MPI_Get_count(&status, MPI_BYTE, &count); + + auto& buf = recvBufs(fromProci); + buf.resize_nocopy(count); + + MPI_Irecv + ( + buf.data_bytes(), + buf.size_bytes(), + MPI_BYTE, + fromProci, + tag, + MPI_COMM_WORLD, + &recvRequests.emplace_back() + ); + } + + if (barrier_active) + { + // Test barrier for completion + // - all received, or nothing to receive + MPI_Test(&barrierReq, &flag, MPI_STATUS_IGNORE); + + if (flag) + { + done = true; + } + } + else + { + // Check if all sends have arrived + MPI_Testall + ( + sendRequests.size(), sendRequests.data(), + &flag, MPI_STATUSES_IGNORE + ); + + if (flag) + { + MPI_Ibarrier(MPI_COMM_WORLD, &barrierReq); + barrier_active = true; + } + } + } + + if (recvRequests.empty()) + { + Pout << "No receive requests" << endl; + } + else + { + printRequests(recvRequests); + } + + + // Either MPI_Waitall, or MPI_Waitany... + + label loop = 0; + for (bool dispatched = recvRequests.empty(); !dispatched; /*nil*/) + { + int index = 0; + MPI_Waitany + ( + recvRequests.size(), + recvRequests.data(), + &index, + MPI_STATUS_IGNORE + ); + + if (index == MPI_UNDEFINED) + { + //Pout<< "Testany is " << (flag ? "done" : "waiting") << endl; + Pout<< "Waitany (loop:" << loop << ") : done" << endl; + dispatched = true; + } + else + { + Pout<< "Waitany (loop:" + << loop << ") " + << index << " of " << recvRequests.size() << endl; + + printRequests(recvRequests); + } + + ++loop; + } + + // Not needed: all tested... + // MPI_Waitall(recvRequests.size(), recvRequests.data(), MPI_STATUSES_IGNORE); + + MPI_Barrier(MPI_COMM_WORLD); + + Info<< "\nEnd\n" << endl; + + return 0; +} + + +// ************************************************************************* // diff --git a/applications/test/parallel-comm3b/Make/files b/applications/test/parallel-comm3b/Make/files new file mode 100644 index 0000000000000000000000000000000000000000..22f4d813db93b3de0f65572c37193ffc2049cc94 --- /dev/null +++ b/applications/test/parallel-comm3b/Make/files @@ -0,0 +1,3 @@ +Test-parallel-comm3b.C + +EXE = $(FOAM_USER_APPBIN)/Test-parallel-comm3b diff --git a/applications/test/parallel-comm3b/Make/options b/applications/test/parallel-comm3b/Make/options new file mode 100644 index 0000000000000000000000000000000000000000..e0747ecc63e313bc45416a72ecb1b80de651432c --- /dev/null +++ b/applications/test/parallel-comm3b/Make/options @@ -0,0 +1,2 @@ +/* EXE_INC */ +/* EXE_LIBS = */ diff --git a/applications/test/parallel-comm3b/Test-parallel-comm3b.C b/applications/test/parallel-comm3b/Test-parallel-comm3b.C new file mode 100644 index 0000000000000000000000000000000000000000..0bbcc5da5e1f381da4e6d9fdcd18907a1bde8787 --- /dev/null +++ b/applications/test/parallel-comm3b/Test-parallel-comm3b.C @@ -0,0 +1,228 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | www.openfoam.com + \\/ M anipulation | +------------------------------------------------------------------------------- + Copyright (C) 2023 OpenCFD Ltd. +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. + +Application + Test-parallel-comm3b + +Description + Basic communicator tests + +\*---------------------------------------------------------------------------*/ + +#include "argList.H" +#include "Time.H" +#include "IPstream.H" +#include "OPstream.H" +#include "Pair.H" +#include "Tuple2.H" +#include "IOstreams.H" +#include "StringStream.H" +#include "Random.H" + +using namespace Foam; + + +void printRequests(const UList<UPstream::Request>& requests) +{ + OStringStream buf; + + buf << "request: " << requests.size() << '('; + + for (const auto& req : requests) + { + if (req.good()) + { + buf << " " << Foam::name(req.pointer()); + } + else + { + buf << " null"; + } + } + + buf << " )"; + Pout << buf.str().c_str() << endl; +} + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +int main(int argc, char *argv[]) +{ + argList::noBanner(); + argList::noCheckProcessorDirectories(); + + #include "setRootCase.H" + + if (!Pstream::parRun()) + { + Info<< "\nWarning: not parallel - skipping further tests\n" << endl; + return 0; + } + + const int tag = (UPstream::msgType() + 314159); + const label comm = UPstream::worldComm; + + Random rnd(20*UPstream::myProcNo()); + + Map<DynamicList<char>> sendBufs; + Map<DynamicList<char>> recvBufs; + + DynamicList<UPstream::Request> sendRequests(10); + DynamicList<UPstream::Request> recvRequests(10); + + // Map request indices to procs + Map<label> recvFromProc(20); + + if (!Pstream::master()) + { + // Send some random length to master + + const int toProci = UPstream::masterNo(); + + label len = rnd.position<label>(10, 20); + if (UPstream::myProcNo() && (UPstream::myProcNo() % 3) == 0) len = 0; + + // Has data to send + if (len) + { + auto& buf = sendBufs(toProci); + buf.resize(len, 'x'); + + UOPstream::write + ( + sendRequests.emplace_back(), + UPstream::masterNo(), + sendBufs[toProci], + tag, + comm, + UPstream::sendModes::sync + ); + } + } + + + // Probe and receive + + UPstream::Request barrierReq; + + for (bool barrier_active = false, done = false; !done; /*nil*/) + { + std::pair<int, int> probed = + UPstream::probeMessage + ( + UPstream::commsTypes::nonBlocking, + -1, // ANY_SOURCE + tag, + comm + ); + + if (probed.second > 0) + { + // Message found and had size: receive it + + const label proci = probed.first; + const label count = probed.second; + + recvBufs(proci).resize_nocopy(count); + recvFromProc(recvRequests.size()) = proci; + + // Non-blocking read + UIPstream::read + ( + recvRequests.emplace_back(), + proci, + recvBufs[proci], + tag, + comm + ); + } + + if (barrier_active) + { + // Test barrier for completion + if (UPstream::finishedRequest(barrierReq)) + { + done = true; + } + } + else + { + // Check if all sends have arrived + if (UPstream::finishedRequests(sendRequests)) + { + UPstream::barrier(comm, &barrierReq); + barrier_active = true; + } + } + } + + if (recvRequests.empty()) + { + Pout << "No receive requests" << endl; + } + else + { + printRequests(recvRequests); + } + + + // Either MPI_Waitall, or MPI_Waitany... + + label loop = 0; + for (bool dispatched = recvRequests.empty(); !dispatched; /*nil*/) + { + label index = UPstream::waitAnyRequest(recvRequests); + + if (index < 0) + { + Pout<< "Waitany (loop:" << loop << ") : done" << endl; + dispatched = true; + } + else + { + Pout<< "Waitany (loop:" + << loop << ") " + << index << " of " << recvRequests.size() + << " from proc:" << recvFromProc.lookup(index, -1) + << endl; + + printRequests(recvRequests); + } + ++loop; + } + + // Not needed: all tested... + // UPstream::waitRequest(recvRequests); + + UPstream::barrier(UPstream::worldComm); + + Info<< "\nEnd\n" << endl; + + return 0; +} + + +// ************************************************************************* // diff --git a/applications/test/parallel-nbx2/Make/files b/applications/test/parallel-nbx2/Make/files new file mode 100644 index 0000000000000000000000000000000000000000..262936c21f995b2224a392097e2a2333a6446784 --- /dev/null +++ b/applications/test/parallel-nbx2/Make/files @@ -0,0 +1,3 @@ +Test-parallel-nbx2.C + +EXE = $(FOAM_USER_APPBIN)/Test-parallel-nbx2 diff --git a/applications/test/parallel-nbx2/Make/options b/applications/test/parallel-nbx2/Make/options new file mode 100644 index 0000000000000000000000000000000000000000..18e6fe47afacb902cddccf82632772447704fd88 --- /dev/null +++ b/applications/test/parallel-nbx2/Make/options @@ -0,0 +1,2 @@ +/* EXE_INC = */ +/* EXE_LIBS = */ diff --git a/applications/test/parallel-nbx2/Test-parallel-nbx2.C b/applications/test/parallel-nbx2/Test-parallel-nbx2.C new file mode 100644 index 0000000000000000000000000000000000000000..27bb661e0d2c7fd8aa44b68b292f3c1b5760f308 --- /dev/null +++ b/applications/test/parallel-nbx2/Test-parallel-nbx2.C @@ -0,0 +1,227 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | www.openfoam.com + \\/ M anipulation | +------------------------------------------------------------------------------- + Copyright (C) 2023 OpenCFD Ltd. +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. + +Application + Test-parallel-nbx2 + +Description + Test for send/receive data + +\*---------------------------------------------------------------------------*/ + +#include "List.H" +#include "argList.H" +#include "Time.H" +#include "IPstream.H" +#include "OPstream.H" +#include "IOstreams.H" +#include "Random.H" + +using namespace Foam; + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +int main(int argc, char *argv[]) +{ + argList::noCheckProcessorDirectories(); + argList::addBoolOption("non-blocking", "Test with non-blocking receives"); + + #include "setRootCase.H" + + const bool optNonBlocking = args.found("non-blocking"); + + if (!Pstream::parRun()) + { + Info<< "\nWarning: not parallel - skipping further tests\n" << endl; + return 0; + } + + Info<< "\nTesting with non-blocking receives: " << optNonBlocking << nl; + + + const int tag = (UPstream::msgType() + 314159); + const label comm = UPstream::worldComm; + + Random rnd(20*UPstream::myProcNo()); + + // Looks a bit like a DIY PstreamBuffers... + Map<DynamicList<char>> sendBufs; + Map<DynamicList<char>> recvBufs; + + DynamicList<UPstream::Request> sendRequests(10); + DynamicList<UPstream::Request> recvRequests(10); + + if (!Pstream::master()) + { + // Send some random length to master + + const int toProci = UPstream::masterNo(); + + label len = rnd.position<label>(10, 20); + if (UPstream::myProcNo() && (UPstream::myProcNo() % 3) == 0) len = 0; + + scalarField fld(len, scalar(UPstream::myProcNo())); + + // Format for sending + if (!fld.empty()) + { + auto& buf = sendBufs(toProci); + UOPstream os(buf); + os << fld; + } + + // Start nonblocking synchronous send to process dest + + if (sendBufs.found(toProci) && !sendBufs[toProci].empty()) + { + Pout<< "send: [" << sendBufs[toProci].size() << " bytes] " + << flatOutput(fld) << endl; + + // Has data to send + UOPstream::write + ( + sendRequests.emplace_back(), + UPstream::masterNo(), + sendBufs[toProci], + tag, + comm, + UPstream::sendModes::sync + ); + } + } + + + // Probe and receive + + UPstream::Request barrierReq; + + for (bool barrier_active = false, done = false; !done; /*nil*/) + { + std::pair<int, int> probed = + UPstream::probeMessage + ( + UPstream::commsTypes::nonBlocking, + -1, // ANY_SOURCE + tag, + comm + ); + + if (probed.second > 0) + { + // Message found and had size: receive it + + const label proci = probed.first; + const label count = probed.second; + + if (optNonBlocking) + { + recvBufs(proci).resize_nocopy(count); + + // Non-blocking read + UIPstream::read + ( + recvRequests.emplace_back(), + proci, + recvBufs[proci], + tag, + comm + ); + // Pout<< "Done: " + // << UPstream::finishedRequests(recvRequests) << endl; + } + else + { + IPstream is + ( + UPstream::commsTypes::scheduled, + probed.first, + probed.second, + tag, + comm + ); + + scalarField fld(is); + + Info<< "from [" << probed.first + << "] : " << flatOutput(fld) << endl; + } + } + + if (barrier_active) + { + // Test barrier for completion + if (UPstream::finishedRequest(barrierReq)) + { + done = true; + } + } + else + { + // Check if all sends have arrived + if (UPstream::finishedRequests(sendRequests)) + { + UPstream::barrier(comm, &barrierReq); + barrier_active = true; + } + } + } + + Pout<< "pending receives: " << recvRequests.size() << endl; + + // Wait for receives to complete + UPstream::waitRequests(recvRequests); + + // It could be we need this type of synchronization point + // if the receives are non-blocking + if (optNonBlocking) + { + UPstream::barrier(comm); + } + + if (!recvBufs.empty()) + { + Pout<< "Receives from: " << flatOutput(recvBufs.sortedToc()) << endl; + + forAllConstIters(recvBufs, iter) + { + Pout<< "proc:" << iter.key() << " len:" << iter.val().size() << nl; + + if (!iter.val().empty()) + { + UIPstream is(iter.val()); + scalarField fld(is); + + Pout<< "recv:" << iter.key() + << " : " << flatOutput(fld) << nl; + } + } + } + + Info<< "\nEnd\n" << endl; + return 0; +} + + +// ************************************************************************* // diff --git a/applications/test/processorTopology/Make/options b/applications/test/processorTopology/Make/options index 18e6fe47afacb902cddccf82632772447704fd88..b3a33caee1ef96d0e2c13a668b2c0a1aa142e41c 100644 --- a/applications/test/processorTopology/Make/options +++ b/applications/test/processorTopology/Make/options @@ -1,2 +1,4 @@ -/* EXE_INC = */ -/* EXE_LIBS = */ +include $(GENERAL_RULES)/mpi-rules + +EXE_INC = $(PFLAGS) $(PINC) $(c++LESSWARN) +EXE_LIBS = $(PLIBS) diff --git a/applications/test/processorTopology/Test-processorTopology.C b/applications/test/processorTopology/Test-processorTopology.C index c4e87c88683a05c5cb4b3dbc0457c0d5d80cd8cd..0150221d86f9544dd3e0c28c900ce129a1298461 100644 --- a/applications/test/processorTopology/Test-processorTopology.C +++ b/applications/test/processorTopology/Test-processorTopology.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -33,6 +33,7 @@ Description #include "polyMesh.H" #include "globalMeshData.H" #include "OFstream.H" +#include <mpi.h> using namespace Foam; @@ -42,11 +43,25 @@ using namespace Foam; int main(int argc, char *argv[]) { argList::noFunctionObjects(); + argList::addBoolOption("verbose", "Set debug level"); + argList::addBoolOption("comm-graph", "Test simple graph communicator"); argList::addNote ( "Create graph of OpenFOAM mesh connections" ); + // Capture manually. We need values before proper startup + int nVerbose = 0; + for (int argi = 1; argi < argc; ++argi) + { + if (strcmp(argv[argi], "-verbose") == 0) + { + ++nVerbose; + } + } + + UPstream::debug = nVerbose; + #include "setRootCase.H" if (!Pstream::parRun()) @@ -61,7 +76,7 @@ int main(int argc, char *argv[]) // Adjacency table const labelListList& connectivity = - mesh.globalData().topology().procNeighbours(); + mesh.globalData().topology().procAdjacency(); if (Pstream::master()) { @@ -105,6 +120,127 @@ int main(int argc, char *argv[]) << "Use neato, circo or fdp graphviz tools" << nl; } + if (Pstream::parRun() && args.found("comm-graph")) + { + Info<< nl; + + // Local neighbours + const labelList& neighbours = + mesh.globalData().topology().procNeighbours(); + + Pout<< "Neigbours: " << flatOutput(neighbours) << endl; + + // As integers values + List<int> connected(neighbours.size()); + List<int> weights(neighbours.size()); + forAll(neighbours, i) + { + connected[i] = neighbours[i]; + weights[i] = 1; + } + + MPI_Comm topoComm; + + int mpiErrorCode = + MPI_Dist_graph_create_adjacent + ( + MPI_COMM_WORLD, + // Connections into this rank + connected.size(), connected.cdata(), MPI_UNWEIGHTED, + // Connections out of this rank + connected.size(), connected.cdata(), MPI_UNWEIGHTED, + MPI_INFO_NULL, + 0, // no reordering (apparently broken anyhow) + &topoComm + ); + + if (mpiErrorCode) + { + FatalError + << "Failed to create topo communicator. Error:" + << mpiErrorCode << exit(FatalError); + } + + int topo_rank = 0; + int topo_nprocs = 0; + int topo_inCount = 0; + int topo_outCount = 0; + int topo_isWeighted = 0; + MPI_Comm_rank(topoComm, &topo_rank); + MPI_Comm_size(topoComm, &topo_nprocs); + + { + int topo_type = 0; + MPI_Topo_test(topoComm, &topo_type); + + if (MPI_CART == topo_type) + { + Info<< "MPI topology : Cartesian" << endl; + } + else if (MPI_GRAPH == topo_type) + { + Info<< "MPI topology : Graph" << endl; + } + else if (MPI_DIST_GRAPH == topo_type) + { + Info<< "MPI topology : Distributed graph" << endl; + } + else + { + Info<< "MPI topology : None" << endl; + } + } + + MPI_Dist_graph_neighbors_count + ( + topoComm, + &topo_inCount, + &topo_outCount, + &topo_isWeighted + ); + + Pout<< "Topo comm with " + << topo_rank << " / " << topo_nprocs + << " from " << connected.size() << flatOutput(connected) + << " numNbr:" << topo_inCount + << nl; + + + List<int> myPatchIds(neighbours.size()); + forAll(myPatchIds, i) + { + // Patches to neighbours + myPatchIds[i] = + mesh.globalData().topology().procPatchLookup(neighbours[i]); + } + + List<int> nbrPatchIds(neighbours.size(), Zero); + + mpiErrorCode = MPI_Neighbor_alltoall + ( + myPatchIds.data(), + 1, // one element per neighbour + MPI_INT, + nbrPatchIds.data(), + 1, // one element per neighbour + MPI_INT, + topoComm + ); + + if (mpiErrorCode) + { + FatalError + << "MPI Error: " << mpiErrorCode << exit(FatalError); + } + + Pout<< "proc neighbours:" << flatOutput(neighbours) + << " my patches:" << flatOutput(myPatchIds) + << " their patches:" << flatOutput(nbrPatchIds) + << endl; + + MPI_Comm_free(&topoComm); + } + Info<< nl << "End\n" << endl; return 0; diff --git a/etc/controlDict b/etc/controlDict index cd6746fc9ea121c782cf0f878aa197f9f613a3a5..ea88edf7d7f71c2bfab081f11faf6d069553757b 100644 --- a/etc/controlDict +++ b/etc/controlDict @@ -127,7 +127,10 @@ OptimisationSwitches // Default communication type (nonBlocking | scheduled | blocking); commsType nonBlocking; floatTransfer 0; + // Number processors to change to tree communication nProcsSimpleSum 0; + // Min numProc to use non-blocking exchange algorithm (Hoeffler: NBX) + nonBlockingExchange 0; // MPI buffer size (bytes) // Can override with the MPI_BUFFER_SIZE env variable. diff --git a/src/OpenFOAM/Make/files b/src/OpenFOAM/Make/files index 1cef3ee916f476b293f47015191066d4e78e5197..b0404996bc3d8eb770c1a9fc1019458342355433 100644 --- a/src/OpenFOAM/Make/files +++ b/src/OpenFOAM/Make/files @@ -26,6 +26,8 @@ $(fileOps)/collatedFileOperation/hostCollatedFileOperation.C $(fileOps)/collatedFileOperation/threadedCollatedOFstream.C $(fileOps)/collatedFileOperation/OFstreamCollator.C +parallel/processorTopology/processorTopology.C + bools = primitives/bools $(bools)/bool/bool.C $(bools)/Switch/Switch.C diff --git a/src/OpenFOAM/db/IOstreams/Fstreams/masterOFstream.C b/src/OpenFOAM/db/IOstreams/Fstreams/masterOFstream.C index 91a4f68534945af51551755cd9ccfab19e98d6b7..58f1caddf370510024274db76fe8f0ae74c9bfd1 100644 --- a/src/OpenFOAM/db/IOstreams/Fstreams/masterOFstream.C +++ b/src/OpenFOAM/db/IOstreams/Fstreams/masterOFstream.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2017 OpenFOAM Foundation - Copyright (C) 2020-2022 OpenCFD Ltd. + Copyright (C) 2020-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -31,7 +31,6 @@ License #include "OSspecific.H" #include "PstreamBuffers.H" #include "masterUncollatedFileOperation.H" -#include <algorithm> // * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * // @@ -111,49 +110,45 @@ void Foam::masterOFstream::commit() return; } - boolList procValid(UPstream::listGatherValues<bool>(valid_)); - // Different files PstreamBuffers pBufs(Pstream::commsTypes::nonBlocking); - // Send my buffer to master + // Send my (valid) buffer to master if (!Pstream::master()) { - UOPstream os(Pstream::masterNo(), pBufs); - string s(this->str()); - this->reset(); + if (valid_) + { + string s(this->str()); - os.write(s.data(), s.length()); + UOPstream os(Pstream::masterNo(), pBufs); + os.write(s.data(), s.length()); + } + this->reset(); } - labelList recvSizes; - pBufs.finishedGathers(recvSizes); + pBufs.finishedGathers(); if (Pstream::master()) { - // Write master data - if (procValid[Pstream::masterNo()]) + // Write (valid) master data + if (valid_) { checkWrite(filePaths[Pstream::masterNo()], this->str()); } this->reset(); - // Find the max receive size - recvSizes[Pstream::masterNo()] = 0; - List<char> buf - ( - *std::max_element(recvSizes.cbegin(), recvSizes.cend()) - ); + // Allocate large enough to read without resizing + List<char> buf(pBufs.maxRecvCount()); for (const int proci : Pstream::subProcs()) { - UIPstream is(proci, pBufs); - - const std::streamsize count(recvSizes[proci]); - is.read(buf.data(), count); + const std::streamsize count(pBufs.recvDataCount(proci)); - if (procValid[proci]) + if (count) { + UIPstream is(proci, pBufs); + + is.read(buf.data(), count); checkWrite(filePaths[proci], buf.cdata(), count); } } diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/IPBstreams.C b/src/OpenFOAM/db/IOstreams/Pstreams/IPBstreams.C index 5540777f096601559d351ccfa96ab3e9f06ac1e9..a43f73116e4bebd5f8f47a74dd9f10edf12f85d7 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/IPBstreams.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/IPBstreams.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -75,13 +75,12 @@ Foam::IPBstream::IPBstream commsType, fromProcNo, Pstream::transferBuf_, - transferBufPosition_, + UIPstreamBase::storedRecvBufPos_, // Internal only tag, comm, false, // Do not clear Pstream::transferBuf_ if at end fmt - ), - transferBufPosition_(0) + ) {} diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/IPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/IPstream.H index 43152570b1be1f5b2d4bc3cac270827b48a02d24..5dda9b6e6eb7cd3793aaf0632b18668460c53c25 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/IPstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/IPstream.H @@ -56,11 +56,6 @@ class IPstream public Pstream, public UIPstream { - // Private Data - - //- Receive index into Pstream::transferBuf_ - label transferBufPosition_; - public: // Constructors @@ -90,11 +85,6 @@ class IPBstream public Pstream, public UIPBstream { - // Private Data - - //- Receive index into Pstream::transferBuf_ - label transferBufPosition_; - public: // Constructors diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/IPstreams.C b/src/OpenFOAM/db/IOstreams/Pstreams/IPstreams.C index caabf931d7ff798b3df1ba2096b2ff0bb8679dd4..d82a270238cd7804aa61ff2ca535a4f1fc59d49e 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/IPstreams.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/IPstreams.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -91,6 +91,16 @@ Foam::UIPstream::UIPstream(const int fromProcNo, PstreamBuffers& buffers) } +Foam::UIPstream::UIPstream +( + const DynamicList<char>& recvBuf, + IOstreamOption::streamFormat fmt +) +: + UIPstreamBase(recvBuf, fmt) +{} + + Foam::IPstream::IPstream ( const UPstream::commsTypes commsType, @@ -107,13 +117,12 @@ Foam::IPstream::IPstream commsType, fromProcNo, Pstream::transferBuf_, - transferBufPosition_, + UIPstreamBase::storedRecvBufPos_, // Internal only tag, comm, false, // Do not clear Pstream::transferBuf_ if at end fmt - ), - transferBufPosition_(0) + ) {} diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/OPstreams.C b/src/OpenFOAM/db/IOstreams/Pstreams/OPstreams.C index 9f1d70f42fea0575d10453edabe0e246f4513f4c..fa913a868c693e28d74f34ed5da1a69d71babcde 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/OPstreams.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/OPstreams.C @@ -52,6 +52,16 @@ Foam::UOPstream::UOPstream(const int toProcNo, PstreamBuffers& buffers) {} +Foam::UOPstream::UOPstream +( + DynamicList<char>& sendBuf, + IOstreamOption::streamFormat fmt +) +: + UOPstreamBase(sendBuf, fmt) +{} + + Foam::OPstream::OPstream ( const UPstream::commsTypes commsType, diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H index d4fd86c480d028a82945a82fa49ff556648f0d5f..4a293d80bf7cf4883a787428f5d3be38b424ea6a 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2016 OpenFOAM Foundation - Copyright (C) 2016-2022 OpenCFD Ltd. + Copyright (C) 2016-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -36,6 +36,7 @@ SourceFiles PstreamGather.C PstreamCombineGather.C PstreamGatherList.C + PstreamExchangeConsensus.C PstreamExchange.C \*---------------------------------------------------------------------------*/ @@ -64,8 +65,11 @@ class Pstream { // Private Static Functions - //- Exchange contiguous data. Sends sendBufs, receives into recvBufs. + //- Exchange \em contiguous data. + //- Sends sendBufs, receives into recvBufs. // Data provided and received as container. + // + // No internal guards or resizing. template<class Container, class T> static void exchangeContainer ( @@ -77,8 +81,11 @@ class Pstream const bool wait //!< Wait for requests to complete ); - //- Exchange contiguous data. Sends sendBufs, receives into recvBufs. + //- Exchange \em contiguous data. + //- Sends sendBufs, receives into recvBufs. // Data provided and received as pointers. + // + // No internal guards or resizing. template<class T> static void exchangeBuf ( @@ -133,6 +140,7 @@ public: //- Broadcast content (contiguous or non-contiguous) //- to all processes in communicator. + // For \b non-parallel : do nothing. template<class Type> static void broadcast ( @@ -141,6 +149,7 @@ public: ); //- Broadcast multiple items to all processes in communicator. + // For \b non-parallel : do nothing. template<class Type, class... Args> static void broadcasts(const label comm, Type& arg1, Args&&... args); @@ -513,6 +522,8 @@ public: //- Helper: exchange sizes of sendData. //- The sendData is the data per processor (in the communicator). // Returns sizes of sendData on the sending processor. + // \n + // For \b non-parallel : copy sizes from sendData directly. template<class Container> static void exchangeSizes ( @@ -522,10 +533,10 @@ public: ); - //- Helper: exchange contiguous data. + //- Helper: exchange \em contiguous data. //- Sends sendData, receives into recvData. // If wait=true will wait for all transfers to finish. - template<class Container, class T> + template<class Container, class Type> static void exchange ( const UList<Container>& sendData, @@ -536,11 +547,11 @@ public: const bool wait = true //!< Wait for requests to complete ); - //- Exchange contiguous data. + //- Exchange \em contiguous data. //- Sends sendData, receives into recvData. //- Determines sizes to receive. // If wait=true will wait for all transfers to finish. - template<class Container, class T> + template<class Container, class Type> static void exchange ( const UList<Container>& sendData, @@ -549,6 +560,68 @@ public: const label comm = UPstream::worldComm, const bool wait = true //!< Wait for requests to complete ); + + + // Non-blocking exchange + + //- Exchange the \b non-zero sizes of sendBufs entries (sparse map) + //- with all ranks in the communicator + //- using non-blocking consensus exchange. + // + // Since the recvData map always cleared before receipt and sizes + // of zero are never transmitted, a simple check + // of its keys is sufficient to determine connectivity. + // + // For \b non-parallel : copy size of rank (if it exists and non-empty) + // from sendBufs to recvSizes. + // + // \note The message tag is adjusted internally to improve uniqueness + template<class Container> + static void exchangeSizes + ( + const Map<Container>& sendBufs, + Map<label>& recvSizes, + const label tag = UPstream::msgType(), + const label comm = UPstream::worldComm + ); + + //- Exchange \em contiguous data using non-blocking consensus + //- Sends sendData, receives into recvData. + // + // Each entry of the recvBufs list is cleared before receipt. + // For \b non-parallel : copy own rank from sendBufs to recvBufs. + // + // \note The message tag should be chosen to be a unique value + // since the implementation uses probing with ANY_SOURCE !! + template<class Container, class Type> + static void exchangeConsensus + ( + const UList<Container>& sendBufs, + List<Container>& recvBufs, + const int tag, + const label comm + ); + + //- Exchange \em contiguous data using non-blocking consensus + //- Sends sendData, receives into recvData. + // + // Each \em entry of the recvBufs map is cleared before receipt, + // but the map itself if not cleared. This allows the map to preserve + // allocated space (eg DynamicList entries) between calls. + // + // For \b non-parallel : copy own rank (if it exists and non-empty) + // from sendBufs to recvBufs. + // + // \note The message tag should be chosen to be a unique value + // since the implementation uses probing with ANY_SOURCE !! + template<class Container, class Type> + static void exchangeConsensus + ( + const Map<Container>& sendBufs, + Map<Container>& recvBufs, + const int tag, + const label comm + ); }; @@ -563,6 +636,7 @@ public: #include "PstreamGather.C" #include "PstreamCombineGather.C" #include "PstreamGatherList.C" + #include "PstreamExchangeConsensus.C" #include "PstreamExchange.C" #endif diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C index cb8e6642c46d70679411ba64afb90804d40bf96f..ff37820401f1ed0b1346dfe75f6c444ae249bba4 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2021-2022 OpenCFD Ltd. + Copyright (C) 2021-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -33,24 +33,55 @@ License void Foam::PstreamBuffers::finalExchange ( - labelList& recvSizes, - const bool wait + const bool wait, + const bool needSizes, + labelList& recvSizes ) { // Could also check that it is not called twice // but that is used for overlapping send/recv (eg, overset) finishedSendsCalled_ = true; + recvPositions_ = Zero; if (commsType_ == UPstream::commsTypes::nonBlocking) { + if + ( + wait + && UPstream::parRun() + && UPstream::nProcsNonblockingExchange > 1 + && UPstream::nProcsNonblockingExchange <= nProcs() + ) + { + Pstream::exchangeConsensus<DynamicList<char>, char> + ( + sendBuffers_, + recvBuffers_, + (tag_ + 314159), // some unique tag? + comm_ + ); + + // Copy back out + if (needSizes) + { + recvSizes.resize_nocopy(recvBuffers_.size()); + forAll(recvBuffers_, proci) + { + recvSizes[proci] = recvBuffers_[proci].size(); + } + } + + return; + } + // all-to-all - Pstream::exchangeSizes(sendBuf_, recvSizes, comm_); + Pstream::exchangeSizes(sendBuffers_, recvSizes, comm_); Pstream::exchange<DynamicList<char>, char> ( - sendBuf_, + sendBuffers_, recvSizes, - recvBuf_, + recvBuffers_, tag_, comm_, wait @@ -63,13 +94,15 @@ void Foam::PstreamBuffers::finalExchange ( const labelUList& sendProcs, const labelUList& recvProcs, - labelList& recvSizes, - const bool wait + const bool wait, + const bool needSizes, // unused + labelList& recvSizes ) { // Could also check that it is not called twice // but that is used for overlapping send/recv (eg, overset) finishedSendsCalled_ = true; + recvPositions_ = Zero; if (commsType_ == UPstream::commsTypes::nonBlocking) { @@ -77,7 +110,7 @@ void Foam::PstreamBuffers::finalExchange ( sendProcs, recvProcs, - sendBuf_, + sendBuffers_, recvSizes, tag_, comm_ @@ -85,9 +118,9 @@ void Foam::PstreamBuffers::finalExchange Pstream::exchange<DynamicList<char>, char> ( - sendBuf_, + sendBuffers_, recvSizes, - recvBuf_, + recvBuffers_, tag_, comm_, wait @@ -99,26 +132,28 @@ void Foam::PstreamBuffers::finalExchange void Foam::PstreamBuffers::finalExchangeGatherScatter ( const bool isGather, - const bool wait + const bool wait, + const bool needSizes, + labelList& recvSizes ) { // Could also check that it is not called twice // but that is used for overlapping send/recv (eg, overset) finishedSendsCalled_ = true; + recvPositions_ = Zero; if (commsType_ == UPstream::commsTypes::nonBlocking) { - labelList recvSizes; - if (isGather) { // gather mode (all-to-one): master [0] <- everyone - recvSizes = UPstream::listGatherValues(sendBuf_[0].size(), comm_); + recvSizes = + UPstream::listGatherValues(sendBuffers_[0].size(), comm_); if (!UPstream::master(comm_)) { - recvSizes.resize_nocopy(recvBuf_.size()); + recvSizes.resize_nocopy(nProcs_); recvSizes = Zero; } } @@ -126,13 +161,13 @@ void Foam::PstreamBuffers::finalExchangeGatherScatter { // scatter mode (one-to-all): master [0] -> everyone - recvSizes.resize_nocopy(sendBuf_.size()); + recvSizes.resize_nocopy(nProcs_); if (UPstream::master(comm_)) { - forAll(sendBuf_, proci) + forAll(sendBuffers_, proci) { - recvSizes[proci] = sendBuf_[proci].size(); + recvSizes[proci] = sendBuffers_[proci].size(); } } @@ -145,9 +180,9 @@ void Foam::PstreamBuffers::finalExchangeGatherScatter Pstream::exchange<DynamicList<char>, char> ( - sendBuf_, + sendBuffers_, recvSizes, - recvBuf_, + recvBuffers_, tag_, comm_, wait @@ -160,29 +195,9 @@ void Foam::PstreamBuffers::finalExchangeGatherScatter Foam::PstreamBuffers::PstreamBuffers ( - const UPstream::commsTypes commsType, - const int tag, - const label comm, - IOstreamOption::streamFormat fmt -) -: - finishedSendsCalled_(false), - allowClearRecv_(true), - format_(fmt), - commsType_(commsType), - tag_(tag), - comm_(comm), - sendBuf_(UPstream::nProcs(comm_)), - recvBuf_(UPstream::nProcs(comm_)), - recvBufPos_(UPstream::nProcs(comm_), Zero) -{} - - -Foam::PstreamBuffers::PstreamBuffers -( - const label comm, - const UPstream::commsTypes commsType, - const int tag, + UPstream::commsTypes commsType, + int tag, + label communicator, IOstreamOption::streamFormat fmt ) : @@ -191,10 +206,11 @@ Foam::PstreamBuffers::PstreamBuffers format_(fmt), commsType_(commsType), tag_(tag), - comm_(comm), - sendBuf_(UPstream::nProcs(comm_)), - recvBuf_(UPstream::nProcs(comm_)), - recvBufPos_(UPstream::nProcs(comm_), Zero) + comm_(communicator), + nProcs_(UPstream::nProcs(comm_)), + sendBuffers_(nProcs_), + recvBuffers_(nProcs_), + recvPositions_(nProcs_, Zero) {} @@ -203,33 +219,61 @@ Foam::PstreamBuffers::PstreamBuffers Foam::PstreamBuffers::~PstreamBuffers() { // Check that all data has been consumed. - forAll(recvBufPos_, proci) + forAll(recvPositions_, proci) { - if (recvBufPos_[proci] < recvBuf_[proci].size()) + const label pos = recvPositions_[proci]; + const label len = recvBuffers_[proci].size(); + + if (pos < len) { FatalErrorInFunction << "Message from processor " << proci - << " Only consumed " << recvBufPos_[proci] << " of " - << recvBuf_[proci].size() << " bytes" << nl + << " Only consumed " << pos << " of " << len << " bytes" << nl << Foam::abort(FatalError); } } } +// * * * * * * * * * * * * Protected Member Functions * * * * * * * * * * * // + +Foam::DynamicList<char>& Foam::PstreamBuffers::accessSendBuffer +( + const label proci +) +{ + return sendBuffers_[proci]; +} + + +Foam::DynamicList<char>& Foam::PstreamBuffers::accessRecvBuffer +( + const label proci +) +{ + return recvBuffers_[proci]; +} + + +Foam::label& Foam::PstreamBuffers::accessRecvPosition(const label proci) +{ + return recvPositions_[proci]; +} + + // * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * * // void Foam::PstreamBuffers::clear() { - for (DynamicList<char>& buf : sendBuf_) + for (DynamicList<char>& buf : sendBuffers_) { buf.clear(); } - for (DynamicList<char>& buf : recvBuf_) + for (DynamicList<char>& buf : recvBuffers_) { buf.clear(); } - recvBufPos_ = 0; + recvPositions_ = Zero; finishedSendsCalled_ = false; } @@ -237,24 +281,24 @@ void Foam::PstreamBuffers::clear() void Foam::PstreamBuffers::clearRecv(const label proci) { - recvBuf_[proci].clear(); - recvBufPos_[proci] = 0; + recvBuffers_[proci].clear(); + recvPositions_[proci] = 0; } void Foam::PstreamBuffers::clearStorage() { - // Could also clear out entire sendBuf_, recvBuf_ and reallocate. + // Could also clear out entire sendBuffers_, recvBuffers_ and reallocate. // Not sure if it makes much difference - for (DynamicList<char>& buf : sendBuf_) + for (DynamicList<char>& buf : sendBuffers_) { buf.clearStorage(); } - for (DynamicList<char>& buf : recvBuf_) + for (DynamicList<char>& buf : recvBuffers_) { buf.clearStorage(); } - recvBufPos_ = 0; + recvPositions_ = Zero; finishedSendsCalled_ = false; } @@ -262,7 +306,7 @@ void Foam::PstreamBuffers::clearStorage() bool Foam::PstreamBuffers::hasSendData() const { - for (const DynamicList<char>& buf : sendBuf_) + for (const DynamicList<char>& buf : sendBuffers_) { if (!buf.empty()) { @@ -277,9 +321,9 @@ bool Foam::PstreamBuffers::hasRecvData() const { if (finishedSendsCalled_) { - forAll(recvBufPos_, proci) + forAll(recvPositions_, proci) { - if (recvBuf_[proci].size() > recvBufPos_[proci]) + if (recvPositions_[proci] < recvBuffers_[proci].size()) { return true; } @@ -299,7 +343,7 @@ bool Foam::PstreamBuffers::hasRecvData() const Foam::label Foam::PstreamBuffers::sendDataCount(const label proci) const { - return sendBuf_[proci].size(); + return sendBuffers_[proci].size(); } @@ -307,7 +351,7 @@ Foam::label Foam::PstreamBuffers::recvDataCount(const label proci) const { if (finishedSendsCalled_) { - const label len(recvBuf_[proci].size() > recvBufPos_[proci]); + const label len(recvBuffers_[proci].size() - recvPositions_[proci]); if (len > 0) { @@ -328,13 +372,13 @@ Foam::label Foam::PstreamBuffers::recvDataCount(const label proci) const Foam::labelList Foam::PstreamBuffers::recvDataCounts() const { - labelList counts(recvBuf_.size(), Zero); + labelList counts(recvPositions_.size(), Zero); if (finishedSendsCalled_) { - forAll(recvBufPos_, proci) + forAll(recvPositions_, proci) { - const label len(recvBuf_[proci].size() - recvBufPos_[proci]); + const label len(recvBuffers_[proci].size() - recvPositions_[proci]); if (len > 0) { @@ -354,19 +398,60 @@ Foam::labelList Foam::PstreamBuffers::recvDataCounts() const } +Foam::label Foam::PstreamBuffers::maxNonLocalRecvCount(const label proci) const +{ + label maxLen = 0; + + if (finishedSendsCalled_) + { + forAll(recvPositions_, i) + { + if (i != proci) + { + const label len(recvBuffers_[i].size() - recvPositions_[i]); + maxLen = max(maxLen, len); + } + } + } + #ifdef FULLDEBUG + else + { + FatalErrorInFunction + << "Call finishedSends first" << exit(FatalError); + } + #endif + + return maxLen; +} + + +Foam::label Foam::PstreamBuffers::maxRecvCount() const +{ + // Use out-of-range proci to avoid excluding any processor + return maxNonLocalRecvCount(-1); +} + + +Foam::label Foam::PstreamBuffers::maxNonLocalRecvCount() const +{ + return maxNonLocalRecvCount(UPstream::myProcNo(comm_)); +} + + const Foam::UList<char> Foam::PstreamBuffers::peekRecvData(const label proci) const { if (finishedSendsCalled_) { - const label len(recvBuf_[proci].size() - recvBufPos_[proci]); + const label pos = recvPositions_[proci]; + const label len = recvBuffers_[proci].size(); - if (len > 0) + if (pos < len) { return UList<char> ( - const_cast<char*>(&recvBuf_[proci][recvBufPos_[proci]]), - len + const_cast<char*>(recvBuffers_[proci].cdata()) + pos, + (len - pos) ); } } @@ -393,7 +478,7 @@ bool Foam::PstreamBuffers::allowClearRecv(bool on) noexcept void Foam::PstreamBuffers::finishedSends(const bool wait) { labelList recvSizes; - finalExchange(recvSizes, wait); + finalExchange(wait, false, recvSizes); } @@ -403,7 +488,7 @@ void Foam::PstreamBuffers::finishedSends const bool wait ) { - finalExchange(recvSizes, wait); + finalExchange(wait, true, recvSizes); if (commsType_ != UPstream::commsTypes::nonBlocking) { @@ -427,7 +512,7 @@ void Foam::PstreamBuffers::finishedSends ) { labelList recvSizes; - finalExchange(sendProcs, recvProcs, recvSizes, wait); + finalExchange(sendProcs, recvProcs, wait, false, recvSizes); } @@ -439,7 +524,7 @@ void Foam::PstreamBuffers::finishedSends const bool wait ) { - finalExchange(sendProcs, recvProcs, recvSizes, wait); + finalExchange(sendProcs, recvProcs, wait, true, recvSizes); if (commsType_ != UPstream::commsTypes::nonBlocking) { @@ -472,17 +557,17 @@ bool Foam::PstreamBuffers::finishedSends // Update send connections // - reasonable to assume there are no self-sends on UPstream::myProcNo - forAll(sendBuf_, proci) + forAll(sendBuffers_, proci) { // ie, sendDataCount(proci) != 0 - if (sendConnections.set(proci, !sendBuf_[proci].empty())) + if (sendConnections.set(proci, !sendBuffers_[proci].empty())) { // The state changed changed = true; } } - UPstream::reduceOr(changed); + UPstream::reduceOr(changed, comm_); if (changed) { @@ -490,25 +575,25 @@ bool Foam::PstreamBuffers::finishedSends // The send ranks sendProcs.clear(); - forAll(sendBuf_, proci) + forAll(sendBuffers_, proci) { // ie, sendDataCount(proci) != 0 - if (!sendBuf_[proci].empty()) + if (!sendBuffers_[proci].empty()) { - sendProcs.append(proci); + sendProcs.push_back(proci); } } - finishedSends(wait); // All-to-all + labelList recvSizes; + finishedSends(recvSizes, wait); // All-to-all // The recv ranks recvProcs.clear(); - forAll(recvBuf_, proci) + forAll(recvSizes, proci) { - // ie, recvDataCount(proci) - if (!recvBuf_[proci].empty()) + if (recvSizes[proci] > 0) { - recvProcs.append(proci); + recvProcs.push_back(proci); } } } @@ -525,13 +610,15 @@ bool Foam::PstreamBuffers::finishedSends void Foam::PstreamBuffers::finishedGathers(const bool wait) { - finalExchangeGatherScatter(true, wait); + labelList recvSizes; + finalExchangeGatherScatter(true, wait, false, recvSizes); } void Foam::PstreamBuffers::finishedScatters(const bool wait) { - finalExchangeGatherScatter(false, wait); + labelList recvSizes; + finalExchangeGatherScatter(false, wait, false, recvSizes); } @@ -541,7 +628,7 @@ void Foam::PstreamBuffers::finishedGathers const bool wait ) { - finalExchangeGatherScatter(true, wait); + finalExchangeGatherScatter(true, wait, true, recvSizes); if (commsType_ != UPstream::commsTypes::nonBlocking) { @@ -554,11 +641,6 @@ void Foam::PstreamBuffers::finishedGathers // Note: maybe possible only if using different tag from write started // by ~UOPstream. Needs some work. } - - // For nonBlocking mode, simply recover received sizes - // from the buffers themselves. - - recvSizes = recvDataCounts(); } @@ -568,7 +650,7 @@ void Foam::PstreamBuffers::finishedScatters const bool wait ) { - finalExchangeGatherScatter(false, wait); + finalExchangeGatherScatter(false, wait, true, recvSizes); if (commsType_ != UPstream::commsTypes::nonBlocking) { @@ -581,11 +663,6 @@ void Foam::PstreamBuffers::finishedScatters // Note: maybe possible only if using different tag from write started // by ~UOPstream. Needs some work. } - - // For nonBlocking mode, simply recover received sizes - // from the buffers themselves. - - recvSizes = recvDataCounts(); } diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H index 44bfea19c64298ef8742f4a744dcb29826edadc5..c3912569c924013a00cf151276444dbbc7409cb5 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2021-2022 OpenCFD Ltd. + Copyright (C) 2021-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -117,11 +117,6 @@ class bitSet; class PstreamBuffers { - // Friendship - friend class UOPstreamBase; // Access to sendBuf_ - friend class UIPstreamBase; // Access to recvBuf_, recvBufPos_ - - // Private Data //- Track if sends are complete @@ -142,21 +137,32 @@ class PstreamBuffers //- Communicator const label comm_; - //- Send buffer. Size is nProcs() - List<DynamicList<char>> sendBuf_; + //- Number of ranks associated with PstreamBuffers (at construction) + const label nProcs_; + + + // Buffer storage + + //- Send buffers. Size is nProcs() + List<DynamicList<char>> sendBuffers_; - //- Receive buffer. Size is nProcs() - List<DynamicList<char>> recvBuf_; + //- Receive buffers. Size is nProcs() + List<DynamicList<char>> recvBuffers_; - //- Current read positions within recvBuf_. Size is nProcs() - labelList recvBufPos_; + //- Current read positions within recvBuffers_. Size is nProcs() + labelList recvPositions_; // Private Member Functions //- Mark all sends as having been done. // This will start receives (nonBlocking comms). - void finalExchange(labelList& recvSizes, const bool wait); + void finalExchange + ( + const bool wait, + const bool needSizes, // If recvSizes needed or scratch + labelList& recvSizes + ); //- Mark sends as done. // Only exchange sizes using the sendProcs/recvProcs subset @@ -165,35 +171,76 @@ class PstreamBuffers ( const labelUList& sendProcs, const labelUList& recvProcs, - labelList& recvSizes, - const bool wait + const bool wait, + const bool needSizes, // If recvSizes needed or scratch + labelList& recvSizes ); //- For all-to-one or one-to-all - void finalExchangeGatherScatter(const bool isGather, const bool wait); + void finalExchangeGatherScatter + ( + const bool isGather, + const bool wait, + const bool needSizes, // If recvSizes needed or scratch + labelList& recvSizes + ); + + + // Friendship Access + + //- Access a send buffer for given proc (in range 0-nProcs) + DynamicList<char>& accessSendBuffer(const label proci); + + //- Access a recv buffer for given proc (in range 0-nProcs). + DynamicList<char>& accessRecvBuffer(const label proci); + + //- Access the recv position within recv buffer for given proc + //- (in range 0-nProcs). + label& accessRecvPosition(const label proci); + + friend class UOPstreamBase; // accessSendBuffer() + friend class UIPstreamBase; // accessRecvBuffer(), accessRecvPosition() public: // Constructors - //- Construct given comms type, message tag, communicator, IO format + //- Construct given communication type (default: nonBlocking), message + //- tag, communicator (default: worldComm), IO format (default: binary) explicit PstreamBuffers ( - const UPstream::commsTypes commsType, - const int tag = UPstream::msgType(), - const label comm = UPstream::worldComm, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, + int tag = UPstream::msgType(), + label communicator = UPstream::worldComm, IOstreamOption::streamFormat fmt = IOstreamOption::BINARY ); - //- Construct given communicator, comms type, message tag, IO format + //- Construct given communicator, communication type + //- (default: nonBlocking), message tag, IO format (default: binary) explicit PstreamBuffers ( - const label comm, - const UPstream::commsTypes commsType, - const int tag = UPstream::msgType(), + label communicator, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, + int tag = UPstream::msgType(), IOstreamOption::streamFormat fmt = IOstreamOption::BINARY - ); + ) + : + PstreamBuffers(commsType, tag, communicator, fmt) + {} + + //- Construct given communicator, message tag, communication type + //- (default: nonBlocking), IO format (default: binary) + PstreamBuffers + ( + label communicator, + int tag, + UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking, + IOstreamOption::streamFormat fmt = IOstreamOption::BINARY + ) + : + PstreamBuffers(commsType, tag, communicator, fmt) + {} //- Destructor - checks that all data have been consumed @@ -202,7 +249,7 @@ public: // Member Functions - // Access + // Attributes //- The associated buffer format (ascii | binary) IOstreamOption::streamFormat format() const noexcept @@ -216,39 +263,39 @@ public: return commsType_; } - //- The transfer message type + //- The transfer message tag int tag() const noexcept { return tag_; } - //- Communicator + //- The communicator index label comm() const noexcept { return comm_; } - - // Sizing - //- Number of ranks associated with PstreamBuffers label nProcs() const noexcept { - return recvBufPos_.size(); + return nProcs_; } + + // Sizing + //- Range of ranks indices associated with PstreamBuffers UPstream::rangeType allProcs() const noexcept { // Proc 0 -> nProcs (int value) - return UPstream::rangeType(static_cast<int>(nProcs())); + return UPstream::rangeType(static_cast<int>(nProcs_)); } //- Range of sub-processes indices associated with PstreamBuffers UPstream::rangeType subProcs() const noexcept { // Proc 1 -> nProcs (int value) - return UPstream::rangeType(1, static_cast<int>(nProcs()-1)); + return UPstream::rangeType(1, static_cast<int>(nProcs_-1)); } @@ -285,6 +332,18 @@ public: //- Must call finishedSends() or other finished.. method first! labelList recvDataCounts() const; + //- Maximum receive size from any rocessor rank. + //- Must call finishedSends() or other finished.. method first! + label maxRecvCount() const; + + //- Maximum receive size, excluding current processor rank + //- Must call finishedSends() or other finished.. method first! + label maxNonLocalRecvCount() const; + + //- Maximum receive size, excluding the specified processor rank + //- Must call finishedSends() or other finished.. method first! + label maxNonLocalRecvCount(const label proci) const; + //- Number of unconsumed receive bytes for the specified processor. //- Must call finishedSends() or other finished.. method first! // The method is only useful in limited situations, such as when @@ -430,7 +489,7 @@ public: //- Mark all sends to master as done. // // Non-blocking mode: populates receive buffers. - // Can use recvDataCounts() method to recover sizes received. + // Can use recvDataCount, maxRecvCount etc to recover sizes received. // // \param wait wait for requests to complete (in nonBlocking mode) // @@ -450,7 +509,7 @@ public: //- Mark all sends to sub-procs as done. // // Non-blocking mode: populates receive buffers. - // Can use recvDataCounts() method to recover sizes received. + // Can use recvDataCount, maxRecvCount etc to recover sizes received. // // \param wait wait for requests to complete (in nonBlocking mode) // diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamCombineGather.C b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamCombineGather.C index 8449a67f2bea8d4536462b7f40322d897645ccf4..8460e33a7338baf9d2d12e29b45f1dc4297e0465 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamCombineGather.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamCombineGather.C @@ -597,7 +597,7 @@ void Foam::Pstream::mapCombineGather { auto masterIter = values.find(recvIter.key()); - if (masterIter != values.end()) // == found() + if (masterIter.good()) { // Combine with existing cop(masterIter.val(), recvIter.val()); diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamExchange.C b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamExchange.C index 26c592c6f55e1453c0aae88fd237cbb6f3079637..5e4aa2130ff8652eb51285de8f3da61145501497 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamExchange.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamExchange.C @@ -35,7 +35,7 @@ Description // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // -template<class Container, class T> +template<class Container, class Type> void Foam::Pstream::exchangeContainer ( const UList<Container>& sendBufs, @@ -60,7 +60,7 @@ void Foam::Pstream::exchangeContainer UPstream::commsTypes::nonBlocking, proci, recvBufs[proci].data_bytes(), - recvSizes[proci]*sizeof(T), + recvSizes[proci]*sizeof(Type), tag, comm ); @@ -108,7 +108,7 @@ void Foam::Pstream::exchangeContainer } -template<class T> +template<class Type> void Foam::Pstream::exchangeBuf ( const labelUList& sendSizes, @@ -134,7 +134,7 @@ void Foam::Pstream::exchangeBuf UPstream::commsTypes::nonBlocking, proci, recvBufs[proci], - recvSizes[proci]*sizeof(T), + recvSizes[proci]*sizeof(Type), tag, comm ); @@ -156,7 +156,7 @@ void Foam::Pstream::exchangeBuf UPstream::commsTypes::nonBlocking, proci, sendBufs[proci], - sendSizes[proci]*sizeof(T), + sendSizes[proci]*sizeof(Type), tag, comm ) @@ -165,7 +165,7 @@ void Foam::Pstream::exchangeBuf FatalErrorInFunction << "Cannot send outgoing message. " << "to:" << proci << " nBytes:" - << label(sendSizes[proci]*sizeof(T)) + << label(sendSizes[proci]*sizeof(Type)) << Foam::abort(FatalError); } } @@ -182,7 +182,7 @@ void Foam::Pstream::exchangeBuf } -template<class Container, class T> +template<class Container, class Type> void Foam::Pstream::exchange ( const UList<Container>& sendBufs, @@ -193,12 +193,13 @@ void Foam::Pstream::exchange const bool wait ) { - // OR static_assert(is_contiguous<T>::value, "Contiguous data only!") - if (!is_contiguous<T>::value) - { - FatalErrorInFunction - << "Contiguous data only." << sizeof(T) << Foam::abort(FatalError); - } + static_assert(is_contiguous<Type>::value, "Contiguous data only!"); + // if (!is_contiguous<Type>::value) + // { + // FatalErrorInFunction + // << "Contiguous data only: " + // << sizeof(Type) << Foam::abort(FatalError); + // } if (sendBufs.size() != UPstream::nProcs(comm)) { @@ -227,7 +228,7 @@ void Foam::Pstream::exchange if (UPstream::maxCommsSize <= 0) { // Do the exchanging in one go - exchangeContainer<Container, T> + exchangeContainer<Container, Type> ( sendBufs, recvSizes, @@ -257,7 +258,7 @@ void Foam::Pstream::exchange max ( static_cast<label>(1), - static_cast<label>(UPstream::maxCommsSize/sizeof(T)) + static_cast<label>(UPstream::maxCommsSize/sizeof(Type)) ) ); @@ -324,11 +325,11 @@ void Foam::Pstream::exchange ); } - /// Info<< "iter " << iter - /// << ": beg=" << flatOutput(startSend) - /// << " len=" << flatOutput(nSend) << endl; + // Info<< "iter " << iter + // << ": beg=" << flatOutput(startSend) + // << " len=" << flatOutput(nSend) << endl; - exchangeBuf<T> + exchangeBuf<Type> ( nSend, charPtrSend, @@ -414,20 +415,50 @@ void Foam::Pstream::exchangeSizes } -/// FUTURE? -/// -/// template<class Container> -/// void Foam::Pstream::exchangeSizes -/// ( -/// const labelUList& neighProcs, -/// const Container& sendBufs, -/// labelList& recvSizes, -/// const label tag, -/// const label comm -/// ) -/// { -/// exchangeSizes<Container>(neighProcs, neighProcs, sendBufs, tag, comm); -/// } +// FUTURE? +// template<class Container> +// void Foam::Pstream::exchangeSizes +// ( +// const labelUList& neighProcs, +// const Container& sendBufs, +// labelList& recvSizes, +// const label tag, +// const label comm +// ); + + +// Sparse sending +template<class Container> +void Foam::Pstream::exchangeSizes +( + const Map<Container>& sendBufs, + Map<label>& recvSizes, + const label tag, + const label comm +) +{ + Map<label> sendSizes(2*sendBufs.size()); + recvSizes.clear(); // Done in allToAllConsensus too, but be explicit here + + forAllConstIters(sendBufs, iter) + { + const label proci = iter.key(); + const label count = iter.val().size(); + + if (count) + { + sendSizes.emplace(proci, count); + } + } + + UPstream::allToAllConsensus + ( + sendSizes, + recvSizes, + (tag + 314159), // some unique tag? + comm + ); +} template<class Container> @@ -438,26 +469,46 @@ void Foam::Pstream::exchangeSizes const label comm ) { - if (sendBufs.size() != UPstream::nProcs(comm)) + const label numProcs = UPstream::nProcs(comm); + + if (sendBufs.size() != numProcs) { FatalErrorInFunction << "Size of container " << sendBufs.size() - << " does not equal the number of processors " - << UPstream::nProcs(comm) + << " does not equal the number of processors " << numProcs << Foam::abort(FatalError); } - labelList sendSizes(sendBufs.size()); + labelList sendSizes(numProcs); forAll(sendBufs, proci) { sendSizes[proci] = sendBufs[proci].size(); } recvSizes.resize_nocopy(sendSizes.size()); + + if + ( + UPstream::nProcsNonblockingExchange > 1 + && UPstream::nProcsNonblockingExchange <= numProcs + ) + { + // Use algorithm NBX: Nonblocking Consensus Exchange + + UPstream::allToAllConsensus + ( + sendSizes, + recvSizes, + (UPstream::msgType() + 314159), // some unique tag? + comm + ); + return; + } + UPstream::allToAll(sendSizes, recvSizes, comm); } -template<class Container, class T> +template<class Container, class Type> void Foam::Pstream::exchange ( const UList<Container>& sendBufs, @@ -467,10 +518,36 @@ void Foam::Pstream::exchange const bool wait ) { + if + ( + wait + && UPstream::parRun() + && UPstream::nProcsNonblockingExchange > 1 + && UPstream::nProcsNonblockingExchange <= UPstream::nProcs(comm) + ) + { + // Use algorithm NBX: Nonblocking Consensus Exchange + + Pstream::exchangeConsensus<Container, Type> + ( + sendBufs, + recvBufs, + (tag + 314159), // some unique tag? + comm + ); + return; + } + + // Algorithm PEX: Personalized Exchange + // - Step 1: each process writes the data sizes to each peer and + // redistributes the vector (eg, MPI_Alltoall) + // - Step 2: size receive buffers and setup receives for all + // non-zero sendcounts. Post all sends and wait. + labelList recvSizes; exchangeSizes(sendBufs, recvSizes, comm); - exchange<Container, T>(sendBufs, recvSizes, recvBufs, tag, comm, wait); + exchange<Container, Type>(sendBufs, recvSizes, recvBufs, tag, comm, wait); } diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamExchangeConsensus.C b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamExchangeConsensus.C new file mode 100644 index 0000000000000000000000000000000000000000..0cf30fd437934adeaa48f700db329d2e1538e638 --- /dev/null +++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamExchangeConsensus.C @@ -0,0 +1,348 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | www.openfoam.com + \\/ M anipulation | +------------------------------------------------------------------------------- + Copyright (C) 2023 OpenCFD Ltd. +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. + +Note + The algorithm NBX (Nonblocking consensus exchange) is described by + + "Scalable Communication Protocols for Dynamic Sparse Data Exchange", + Hoeffler, Siebert, Lumsdaine + May 2010 ACM SIGPLAN Notices 45(5):159-168 + https://doi.org/10.1145/1837853.1693476 + + http://unixer.de/publications/img/hoefler-dsde-protocols.pdf + +\*---------------------------------------------------------------------------*/ + +#include "Pstream.H" +#include "contiguous.H" +#include "PstreamReduceOps.H" + +// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // + +template<class Container, class Type> +void Foam::Pstream::exchangeConsensus +( + const UList<Container>& sendBufs, + List<Container>& recvBufs, + const int tag, + const label comm +) +{ + static_assert(is_contiguous<Type>::value, "Contiguous data only!"); + // if (!is_contiguous<Type>::value) + // { + // FatalErrorInFunction + // << "Contiguous data only." << sizeof(Type) + // << Foam::abort(FatalError); + // } + + const label myProci = UPstream::myProcNo(comm); + const label numProc = UPstream::nProcs(comm); + + if (sendBufs.size() != numProc) + { + FatalErrorInFunction + << "Size of list " << sendBufs.size() + << " does not equal the number of processors " << numProc + << Foam::abort(FatalError); + } + + // Initial: resize and clear everything + recvBufs.resize_nocopy(sendBufs.size()); + + for (auto& buf : recvBufs) + { + buf.clear(); + } + + if (!UPstream::parRun() || numProc < 2) + { + // Do myself + recvBufs[myProci] = sendBufs[myProci]; + return; + } + + // This largely follows PstreamDetail::allToAllConsensus + // but more MPI wrapping used here. + + DynamicList<UPstream::Request> requests(sendBufs.size()); + + //// profilingPstream::beginTiming(); + + // If there are synchronisation problems, + // a beginning barrier can help, but should not be necessary + // when unique message tags are being used. + + //// UPstream::barrier(comm); + + + // Start nonblocking synchronous send to process dest + for (label proci = 0; proci < numProc; ++proci) + { + const auto& sendData = sendBufs[proci]; + + if (sendData.empty()) + { + // Do not send/recv empty data + } + else if (proci == myProci) + { + // Do myself + recvBufs[proci] = sendBufs[proci]; + } + else + { + // Has data to send + + UOPstream::write + ( + requests.emplace_back(), + proci, + sendData.cdata_bytes(), + sendData.size_bytes(), + tag, + comm, + UPstream::sendModes::sync + ); + } + } + + + // Probe and receive + + UPstream::Request barrierReq; + + for (bool barrier_active = false, done = false; !done; /*nil*/) + { + std::pair<int, int> probed = + UPstream::probeMessage + ( + UPstream::commsTypes::nonBlocking, + -1, // ANY_SOURCE + tag, + comm + ); + + if (probed.second > 0) + { + // Message found and had size. + // - receive into dest buffer location + + const label proci = probed.first; + const label nRecv = (probed.second / sizeof(Type)); + + auto& recvData = recvBufs[proci]; + recvData.resize_nocopy(nRecv); + + UIPstream::read + ( + UPstream::commsTypes::scheduled, + proci, + recvData.data_bytes(), + recvData.size_bytes(), + tag, + comm + ); + } + + if (barrier_active) + { + // Test barrier for completion + // - all received, or nothing to receive + if (UPstream::finishedRequest(barrierReq)) + { + done = true; + } + } + else + { + // Check if all sends have arrived + if (UPstream::finishedRequests(requests)) + { + UPstream::barrier(comm, &barrierReq); + barrier_active = true; + } + } + } + + //// profilingPstream::addAllToAllTime(); +} + + +template<class Container, class Type> +void Foam::Pstream::exchangeConsensus +( + const Map<Container>& sendBufs, + Map<Container>& recvBufs, + const int tag, + const label comm +) +{ + static_assert(is_contiguous<Type>::value, "Contiguous data only!"); + // if (!is_contiguous<Type>::value) + // { + // FatalErrorInFunction + // << "Contiguous data only." << sizeof(Type) + // << Foam::abort(FatalError); + // } + + const label myProci = UPstream::myProcNo(comm); + const label numProc = UPstream::nProcs(comm); + + // Initial: clear out receive 'slots' + // Preferrable to clear out the map entries instead of the map itself + // since this can potentially preserve allocated space + // (eg DynamicList entries) between calls + + forAllIters(recvBufs, iter) + { + iter.val().clear(); + } + + if (!UPstream::parRun() || numProc < 2) + { + // Do myself + const auto iter = sendBufs.find(myProci); + if (iter.good()) + { + const auto& sendData = iter.val(); + + if (!sendData.empty()) + { + // Do myself: insert_or_assign + recvBufs(iter.key()) = sendData; + } + } + return; + } + + + // Algorithm NBX: Nonblocking consensus with Map (HashTable) containers + + DynamicList<UPstream::Request> requests(sendBufs.size()); + + //// profilingPstream::beginTiming(); + + // If there are synchronisation problems, + // a beginning barrier can help, but should not be necessary + // when unique message tags are being used. + + //// UPstream::barrier(comm); + + + // Start nonblocking synchronous send to process dest + forAllConstIters(sendBufs, iter) + { + const label proci = iter.key(); + const auto& sendData = iter.val(); + + if (sendData.empty()) + { + // Do not send/recv empty data + } + else if (proci == myProci) + { + // Do myself: insert_or_assign + recvBufs(proci) = sendData; + } + else + { + // Has data to send + + UOPstream::write + ( + requests.emplace_back(), + proci, + sendData.cdata_bytes(), + sendData.size_bytes(), + tag, + comm, + UPstream::sendModes::sync + ); + } + } + + + // Probe and receive + + UPstream::Request barrierReq; + + for (bool barrier_active = false, done = false; !done; /*nil*/) + { + std::pair<int, int> probed = + UPstream::probeMessage + ( + UPstream::commsTypes::nonBlocking, + -1, // ANY_SOURCE + tag, + comm + ); + + if (probed.second > 0) + { + // Message found and had size. + // - receive into dest buffer location + + const label proci = probed.first; + const label nRecv = (probed.second / sizeof(Type)); + + auto& recvData = recvBufs(proci); + recvData.resize_nocopy(nRecv); + + UIPstream::read + ( + UPstream::commsTypes::scheduled, + proci, + recvData.data_bytes(), + recvData.size_bytes(), + tag, + comm + ); + } + + if (barrier_active) + { + // Test barrier for completion + if (UPstream::finishedRequest(barrierReq)) + { + done = true; + } + } + else + { + // Check if all sends have arrived + if (UPstream::finishedRequests(requests)) + { + UPstream::barrier(comm, &barrierReq); + barrier_active = true; + } + } + } + + //// profilingPstream::addAllToAllTime(); +} + + +// ************************************************************************* // diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UIPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UIPstream.H index bcb989cf3ad8a4736db0ff8941c3a68bc3bdf4ad..68e3f26bdc5639bd8cd36cf4379e0cf461ebffeb 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UIPstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UIPstream.H @@ -85,19 +85,30 @@ protected: // Protected Data - int fromProcNo_; + //- Source rank for the data + const int fromProcNo_; - DynamicList<char>& recvBuf_; + //- Message tag for communication + const int tag_; - label& recvBufPos_; + //- The communicator index + const int comm_; - const int tag_; + //- The message size, read on bufferIPCrecv or set directly + int messageSize_; - const label comm_; + //- Receive position in buffer data, if ony + //- If there is no external location for recvBufPos_ + label storedRecvBufPos_; + //- Clear the receive buffer on termination (in the destructor) const bool clearAtEnd_; - int messageSize_; + //- Reference to the receive buffer data + DynamicList<char>& recvBuf_; + + //- Reference to the receive position in buffer data + label& recvBufPos_; // Protected Constructors @@ -120,10 +131,17 @@ protected: //- Construct given buffers UIPstreamBase(const int fromProcNo, PstreamBuffers& buffers); + //- Construct for an externally obtained buffer. + // The parameter is allowed to be const (since reading will not + // affect it), but must reference a concrete variable. + UIPstreamBase + ( + const DynamicList<char>& receiveBuf, + IOstreamOption::streamFormat fmt + ); public: - //- Destructor. Optionally clears external receive buffer. virtual ~UIPstreamBase(); @@ -238,6 +256,16 @@ public: //- Construct given buffers UIPstream(const int fromProcNo, PstreamBuffers& buffers); + //- Construct for reading from a standalone buffer that has + //- been obtained externally by the caller. + // The parameter is allowed to be const (since reading will not + // affect it), but must reference a concrete variable. + explicit UIPstream + ( + const DynamicList<char>& recvBuf, + IOstreamOption::streamFormat fmt = IOstreamOption::BINARY + ); + //- Destructor virtual ~UIPstream() = default; diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UIPstreamBase.C b/src/OpenFOAM/db/IOstreams/Pstreams/UIPstreamBase.C index 21adb26831826abfdccb8228bea1ce88078b2c9b..97685b3022570845ace46cdc8ffc990f927822ea 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UIPstreamBase.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UIPstreamBase.C @@ -159,12 +159,13 @@ Foam::UIPstreamBase::UIPstreamBase UPstream(commsType), Istream(fmt), fromProcNo_(fromProcNo), - recvBuf_(receiveBuf), - recvBufPos_(receiveBufPosition), tag_(tag), comm_(comm), + messageSize_(0), + storedRecvBufPos_(0), clearAtEnd_(clearAtEnd), - messageSize_(0) + recvBuf_(receiveBuf), + recvBufPos_(receiveBufPosition) { setOpened(); setGood(); @@ -180,12 +181,13 @@ Foam::UIPstreamBase::UIPstreamBase UPstream(buffers.commsType()), Istream(buffers.format()), fromProcNo_(fromProcNo), - recvBuf_(buffers.recvBuf_[fromProcNo]), - recvBufPos_(buffers.recvBufPos_[fromProcNo]), tag_(buffers.tag()), comm_(buffers.comm()), + messageSize_(0), + storedRecvBufPos_(0), clearAtEnd_(buffers.allowClearRecv()), - messageSize_(0) + recvBuf_(buffers.accessRecvBuffer(fromProcNo)), + recvBufPos_(buffers.accessRecvPosition(fromProcNo)) { if ( @@ -205,6 +207,32 @@ Foam::UIPstreamBase::UIPstreamBase } +Foam::UIPstreamBase::UIPstreamBase +( + const DynamicList<char>& receiveBuf, + IOstreamOption::streamFormat fmt +) +: + UPstream(UPstream::commsTypes::nonBlocking), // placeholder + Istream(fmt), + fromProcNo_(UPstream::masterNo()), // placeholder + tag_(UPstream::msgType()), // placeholder + comm_(UPstream::selfComm), // placeholder + messageSize_(receiveBuf.size()), // Message == buffer + storedRecvBufPos_(0), + clearAtEnd_(false), // Do not clear recvBuf if at end!! + recvBuf_ + ( + // The receive buffer is never modified with this code path + const_cast<DynamicList<char>&>(receiveBuf) + ), + recvBufPos_(storedRecvBufPos_) // Internal reference +{ + setOpened(); + setGood(); +} + + // * * * * * * * * * * * * * * * * Destructor * * * * * * * * * * * * * * * // Foam::UIPstreamBase::~UIPstreamBase() @@ -517,8 +545,7 @@ void Foam::UIPstreamBase::print(Ostream& os) const { os << "Reading from processor " << fromProcNo_ << " using communicator " << comm_ - << " and tag " << tag_ - << Foam::endl; + << " and tag " << tag_ << Foam::endl; } diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UOPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UOPstream.H index a3f86d32812ba2ab8f8f4fead172f23a8bf61812..02f6f85037b451ad5640bcfb8d7a0aca25d6aa72 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UOPstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UOPstream.H @@ -92,16 +92,21 @@ protected: // Protected Data - int toProcNo_; - - DynamicList<char>& sendBuf_; + //- Destination rank for the data + const int toProcNo_; + //- Message tag for communication const int tag_; - const label comm_; + //- The communicator index + const int comm_; + //- Call bufferIPCsend on termination (in the destructor) const bool sendAtDestruct_; + //- Reference to the send buffer data + DynamicList<char>& sendBuf_; + // Protected Constructors @@ -122,6 +127,12 @@ protected: //- Construct given buffers UOPstreamBase(const int toProcNo, PstreamBuffers& buffers); + //- Construct for externally obtained buffers + UOPstreamBase + ( + DynamicList<char>& sendBuf, + IOstreamOption::streamFormat fmt + ); public: @@ -310,6 +321,14 @@ public: //- Construct given buffers UOPstream(const int toProcNo, PstreamBuffers& buffers); + //- Construct for writing into a standalone buffer. + //- Data transfer is handled externally by the caller. + explicit UOPstream + ( + DynamicList<char>& sendBuf, + IOstreamOption::streamFormat fmt = IOstreamOption::BINARY + ); + //- Destructor, usually sends buffer on destruct. virtual ~UOPstream(); diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UOPstreamBase.C b/src/OpenFOAM/db/IOstreams/Pstreams/UOPstreamBase.C index 631818b772371a46f159d6355d8c4af5b7f99c7b..b865858bad15c3fcc37722b882deb359183a67ed 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UOPstreamBase.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UOPstreamBase.C @@ -145,10 +145,10 @@ Foam::UOPstreamBase::UOPstreamBase UPstream(commsType), Ostream(fmt), toProcNo_(toProcNo), - sendBuf_(sendBuf), tag_(tag), comm_(comm), - sendAtDestruct_(sendAtDestruct) + sendAtDestruct_(sendAtDestruct), + sendBuf_(sendBuf) { setOpened(); setGood(); @@ -160,16 +160,36 @@ Foam::UOPstreamBase::UOPstreamBase(const int toProcNo, PstreamBuffers& buffers) UPstream(buffers.commsType()), Ostream(buffers.format()), toProcNo_(toProcNo), - sendBuf_(buffers.sendBuf_[toProcNo]), tag_(buffers.tag()), comm_(buffers.comm()), - sendAtDestruct_(buffers.commsType() != UPstream::commsTypes::nonBlocking) + sendAtDestruct_(buffers.commsType() != UPstream::commsTypes::nonBlocking), + sendBuf_(buffers.accessSendBuffer(toProcNo)) { setOpened(); setGood(); } +Foam::UOPstreamBase::UOPstreamBase +( + DynamicList<char>& sendBuf, + IOstreamOption::streamFormat fmt +) +: + UPstream(UPstream::commsTypes::nonBlocking), // placeholder + Ostream(fmt), + toProcNo_(UPstream::masterNo()), // placeholder + tag_(UPstream::msgType()), // placeholder + comm_(UPstream::selfComm), // placeholder + sendAtDestruct_(false), // Never sendAtDestruct!! + sendBuf_(sendBuf) +{ + sendBuf_.clear(); // Overwrite into buffer + setOpened(); + setGood(); +} + + // * * * * * * * * * * * * * * * * Destructor * * * * * * * * * * * * * * * // Foam::UOPstreamBase::~UOPstreamBase() @@ -394,8 +414,8 @@ void Foam::UOPstreamBase::rewind() void Foam::UOPstreamBase::print(Ostream& os) const { - os << "Writing from processor " << toProcNo_ - << " to processor " << myProcNo() << " in communicator " << comm_ + os << "Writing to processor " << toProcNo_ + << " from processor " << myProcNo() << " in communicator " << comm_ << " and tag " << tag_ << Foam::endl; } diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C index d1be6447e66e7c00a749595fcc35d675099b7c6d..e3cf730d71e05e707046d2bfa74eefca7dd1ff03 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2015-2022 OpenCFD Ltd. + Copyright (C) 2015-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -161,11 +161,11 @@ Foam::label Foam::UPstream::allocateCommunicator // Extend storage index = parentComm_.size(); - myProcNo_.append(-1); - procIDs_.append(List<int>()); - parentComm_.append(-1); - linearCommunication_.append(List<commsStruct>()); - treeCommunication_.append(List<commsStruct>()); + myProcNo_.push_back(-1); + procIDs_.emplace_back(); + parentComm_.push_back(-1); + linearCommunication_.emplace_back(); + treeCommunication_.emplace_back(); } if (debug) @@ -292,7 +292,7 @@ void Foam::UPstream::freeCommunicators(const bool doPstream) int Foam::UPstream::baseProcNo(label comm, int procID) { - while (parent(comm) >= 0 && procID >= 0) + while (UPstream::parent(comm) >= 0 && procID >= 0) { const auto& parentRanks = UPstream::procID(comm); procID = parentRanks[procID]; @@ -305,14 +305,14 @@ int Foam::UPstream::baseProcNo(label comm, int procID) Foam::label Foam::UPstream::procNo(const label comm, const int baseProcID) { - const auto& parentRanks = procID(comm); - label parentComm = parent(comm); + const auto& parentRanks = UPstream::procID(comm); + label parentComm = UPstream::parent(comm); int procID = baseProcID; if (parentComm >= 0) { - procID = procNo(parentComm, baseProcID); + procID = UPstream::procNo(parentComm, baseProcID); } return parentRanks.find(procID); @@ -327,7 +327,7 @@ Foam::label Foam::UPstream::procNo ) { label physProcID = UPstream::baseProcNo(currentComm, currentProcID); - return procNo(comm, physProcID); + return UPstream::procNo(comm, physProcID); } @@ -510,6 +510,30 @@ registerOptSwitch Foam::UPstream::nProcsSimpleSum ); +int Foam::UPstream::nProcsNonblockingExchange +( + Foam::debug::optimisationSwitch("nonBlockingExchange", 0) +); +registerOptSwitch +( + "nonBlockingExchange", + int, + Foam::UPstream::nProcsNonblockingExchange +); + + +int Foam::UPstream::nPollProcInterfaces +( + Foam::debug::optimisationSwitch("nPollProcInterfaces", 0) +); +registerOptSwitch +( + "nPollProcInterfaces", + int, + Foam::UPstream::nPollProcInterfaces +); + + Foam::UPstream::commsTypes Foam::UPstream::defaultCommsType ( commsTypeNames.get @@ -553,18 +577,6 @@ namespace Foam } //! \endcond -int Foam::UPstream::nPollProcInterfaces -( - Foam::debug::optimisationSwitch("nPollProcInterfaces", 0) -); -registerOptSwitch -( - "nPollProcInterfaces", - int, - Foam::UPstream::nPollProcInterfaces -); - - int Foam::UPstream::maxCommsSize ( Foam::debug::optimisationSwitch("maxCommsSize", 0) diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H index ed16d3a770869386b9b49b866dcda3905a8d0fc8..678747ec268f36aee28cda7e1a4c5f00e741326d 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H @@ -55,6 +55,8 @@ namespace Foam //- Implementation details for UPstream/Pstream/MPI etc. namespace PstreamDetail {} +// Forward Declarations +template<class T> class Map; /*---------------------------------------------------------------------------*\ Class UPstream Declaration @@ -279,12 +281,16 @@ public: //- Number of processors to change from linear to tree communication static int nProcsSimpleSum; - //- Default commsType - static commsTypes defaultCommsType; + //- Number of processors to change to nonBlocking consensual + //- exchange (NBX). Ignored for zero or negative values. + static int nProcsNonblockingExchange; //- Number of polling cycles in processor updates static int nPollProcInterfaces; + //- Default commsType + static commsTypes defaultCommsType; + //- Optional maximum message size (bytes) static int maxCommsSize; @@ -463,6 +469,23 @@ public: UPstream::Request* req = nullptr ); + //- Probe for an incoming message. + // + // \param commsType Blocking or not + // \param fromProcNo The source rank (negative == ANY_SOURCE) + // \param tag The source message tag + // \param communicator The communicator index + // + // \returns source rank and message size (bytes) + // and (-1, 0) on failure + static std::pair<int,int> probeMessage + ( + const UPstream::commsTypes commsType, + const int fromProcNo, + const int tag = UPstream::msgType(), + const label communicator = worldComm + ); + // Non-blocking comms @@ -483,6 +506,11 @@ public: // A no-op if parRun() == false or the list is empty static void waitRequests(UList<UPstream::Request>& requests); + //- Wait until any request has finished and return its index. + // Returns -1 if parRun() == false, or the list is empty, + // or if all the requests have already been handled + static label waitAnyRequest(UList<UPstream::Request>& requests); + //- Wait until request i has finished. // A no-op if parRun() == false, // there are no pending requests, @@ -504,6 +532,10 @@ public: // or for a null-request static bool finishedRequest(UPstream::Request& req); + //- Non-blocking comms: have all requests finished? + // A no-op and returns true if parRun() == false or list is empty + static bool finishedRequests(UList<UPstream::Request>& requests); + static int allocateTag(const char* const msg = nullptr); static void freeTag(const int tag, const char* const msg = nullptr); @@ -684,6 +716,8 @@ public: //- Exchange integer data with all processors (in the communicator). // \c sendData[proci] is the value to send to proci. // After return recvData contains the data from the other processors. + // \n + // For \b non-parallel : does a simple copy of sendData to recvData static void allToAll ( const UList<int32_t>& sendData, @@ -694,6 +728,8 @@ public: //- Exchange integer data with all processors (in the communicator). // \c sendData[proci] is the value to send to proci. // After return recvData contains the data from the other processors. + // \n + // For \b non-parallel : does a simple copy of sendData to recvData static void allToAll ( const UList<int64_t>& sendData, @@ -701,6 +737,88 @@ public: const label communicator = worldComm ); + //- Exchange \b non-zero integer data with all ranks in the communicator + //- using non-blocking consensus exchange. + // The \c sendData[proci] is the (non-zero) value to send to proci. + // After return recvData contains the non-zero values sent from the + // other processors. The recvData list is always assigned zero before + // receipt and values of zero are never transmitted. + // After return recvData contains the data from the other processors. + // \n + // For \b non-parallel : does a simple copy of sendData to recvData + // + // \note The message tag should be chosen to be a unique value + // since the implementation uses probing with ANY_SOURCE !! + static void allToAllConsensus + ( + const UList<int32_t>& sendData, + UList<int32_t>& recvData, + const int tag, + const label communicator = worldComm + ); + + //- Exchange \b non-zero integer data with all ranks in the communicator + //- using non-blocking consensus exchange. + // The \c sendData[proci] is the (non-zero) value to send to proci. + // After return recvData contains the non-zero values sent from the + // other processors. The recvData list is always assigned zero before + // receipt and values of zero are never transmitted. + // After return recvData contains the data from the other processors. + // \n + // For \b non-parallel : does a simple copy of sendData to recvData + // + // \note The message tag should be chosen to be a unique value + // since the implementation uses probing with ANY_SOURCE !! + static void allToAllConsensus + ( + const UList<int64_t>& sendData, + UList<int64_t>& recvData, + const int tag, + const label communicator = worldComm + ); + + //- Exchange \b non-zero integer data with all ranks in the communicator + //- using non-blocking consensus exchange. + // The \c sendData[proci] is the (non-zero) value to send to proci. + // After return recvData contains the non-zero values sent from the + // other processors. Since the recvData map always cleared before + // receipt and values of zero are never transmitted, a simple check + // of its keys is sufficient to determine connectivity. + // \n + // For \b non-parallel : copy own rank (if it exists and non-zero) + // from sendData to recvData. + // + // \note The message tag should be chosen to be a unique value + // since the implementation uses probing with ANY_SOURCE !! + static void allToAllConsensus + ( + const Map<int32_t>& sendData, + Map<int32_t>& recvData, + const int tag, + const label communicator = worldComm + ); + + //- Exchange \b non-zero integer data with all ranks in the communicator + //- using non-blocking consensus exchange. + // The \c sendData[proci] is the (non-zero) value to send to proci. + // After return recvData contains the non-zero values sent from the + // other processors. Since the recvData map always cleared before + // receipt and values of zero are never transmitted, a simple check + // of its keys is sufficient to determine connectivity. + // \n + // For \b non-parallel : copy own rank (if it exists and non-zero) + // from sendData to recvData. + // + // \note The message tag should be chosen to be a unique value + // since the implementation uses probing with ANY_SOURCE !! + static void allToAllConsensus + ( + const Map<int64_t>& sendData, + Map<int64_t>& recvData, + const int tag, + const label communicator = worldComm + ); + // Low-level gather/scatter routines @@ -770,7 +888,8 @@ public: //- Gather individual values into list locations. // On master list length == nProcs, otherwise zero length. - // If called in non-parallel mode, + // \n + // For \b non-parallel : // the returned list length is 1 with localValue. template<class T> static List<T> listGatherValues @@ -781,7 +900,8 @@ public: //- Scatter individual values from list locations. // On master input list length == nProcs, ignored on other procs. - // If called in non-parallel mode, + // \n + // For \b non-parallel : // returns the first list element (or zero). template<class T> static T listScatterValues @@ -795,6 +915,7 @@ public: //- Broadcast buffer contents to all processes in communicator. //- The sizes must match on all processes. + // For \b non-parallel : do nothing. // \return True on success static bool broadcast ( @@ -808,6 +929,7 @@ public: // Logical reductions //- Logical (and) reduction (cf. MPI AllReduce) + // For \b non-parallel : do nothing static void reduceAnd ( bool& value, @@ -815,6 +937,7 @@ public: ); //- Logical (or) reduction (cf. MPI AllReduce) + // For \b non-parallel : do nothing static void reduceOr ( bool& value, @@ -852,7 +975,6 @@ public: //- An opaque wrapper for MPI_Request with a vendor-independent //- representation independent of any \c <mpi.h> header -// // The MPI standard states that MPI_Request is always an opaque object. // Generally it is either an integer (eg, mpich) or a pointer (eg, openmpi). class UPstream::Request diff --git a/src/OpenFOAM/global/argList/argList.C b/src/OpenFOAM/global/argList/argList.C index c5a52d07ee6771c021fc4e38bb39407b3aefe470..a0f2576ed1e4b8f2d657e8468b99b25d248169b3 100644 --- a/src/OpenFOAM/global/argList/argList.C +++ b/src/OpenFOAM/global/argList/argList.C @@ -1715,8 +1715,11 @@ void Foam::argList::parse } } Info<< "Pstream initialized with:" << nl - << " floatTransfer : " << Pstream::floatTransfer << nl + << " floatTransfer : " + << Switch::name(Pstream::floatTransfer) << nl << " nProcsSimpleSum : " << Pstream::nProcsSimpleSum << nl + << " nonBlockingExchange: " + << Pstream::nProcsNonblockingExchange << nl << " commsType : " << Pstream::commsTypeNames[Pstream::defaultCommsType] << nl << " polling iterations : " << Pstream::nPollProcInterfaces diff --git a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C index a6030560f07afd08fd8b0d625124fd1854b4e198..7f3563edb5ac9c2bc0f7998786a603280d27290d 100644 --- a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C +++ b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2017-2018 OpenFOAM Foundation - Copyright (C) 2019-2022 OpenCFD Ltd. + Copyright (C) 2019-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -641,8 +641,7 @@ Foam::fileOperations::masterUncollatedFileOperation::read } } - labelList recvSizes; - pBufs.finishedSends(recvSizes); + pBufs.finishedSends(); // isPtr will be valid on master and will be the unbuffered // IFstream. Else the information is in the PstreamBuffers (and @@ -653,12 +652,11 @@ Foam::fileOperations::masterUncollatedFileOperation::read if (procValid[Pstream::myProcNo(comm)]) { // This processor needs to return something + List<char> buf(pBufs.recvDataCount(Pstream::masterNo())); - UIPstream is(Pstream::masterNo(), pBufs); - - List<char> buf(recvSizes[Pstream::masterNo()]); if (!buf.empty()) { + UIPstream is(Pstream::masterNo(), pBufs); is.read(buf.data(), buf.size()); } @@ -2353,8 +2351,7 @@ Foam::fileOperations::masterUncollatedFileOperation::NewIFstream } - labelList recvSizes; - pBufs.finishedSends(recvSizes); + pBufs.finishedSends(); if (Pstream::master(Pstream::worldComm)) { @@ -2370,10 +2367,13 @@ Foam::fileOperations::masterUncollatedFileOperation::NewIFstream << " from processor " << Pstream::masterNo() << endl; } - UIPstream is(Pstream::masterNo(), pBufs); + List<char> buf(pBufs.recvDataCount(Pstream::masterNo())); - List<char> buf(recvSizes[Pstream::masterNo()]); - is.read(buf.data(), buf.size()); + if (!buf.empty()) + { + UIPstream is(Pstream::masterNo(), pBufs); + is.read(buf.data(), buf.size()); + } if (debug) { diff --git a/src/OpenFOAM/meshes/lduMesh/lduPrimitiveMeshTemplates.C b/src/OpenFOAM/meshes/lduMesh/lduPrimitiveMeshTemplates.C index 6b774f304faab42a231a0a450711e9b52860f6b4..e636eef5479f81fb318ccd9c33d218563499f0eb 100644 --- a/src/OpenFOAM/meshes/lduMesh/lduPrimitiveMeshTemplates.C +++ b/src/OpenFOAM/meshes/lduMesh/lduPrimitiveMeshTemplates.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2013 OpenFOAM Foundation - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -38,39 +38,52 @@ Foam::lduSchedule Foam::lduPrimitiveMesh::nonBlockingSchedule { lduSchedule schedule(2*interfaces.size()); - // 1. All non-processor patches - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - label patchEvali = 0; + label numProcPatches = 0; + + // + // 1. Schedule non-processor patches + // forAll(interfaces, patchi) { - if (interfaces.set(patchi) && !isA<ProcPatch>(interfaces[patchi])) + if (interfaces.set(patchi)) { - schedule[patchEvali++].setInitEvaluate(patchi); - schedule[patchEvali++].setEvaluate(patchi); + if (isA<ProcPatch>(interfaces[patchi])) + { + ++numProcPatches; + } + else + { + schedule[patchEvali++].setInitEvaluate(patchi); + schedule[patchEvali++].setEvaluate(patchi); + } } } - // 2. All processor patches - // ~~~~~~~~~~~~~~~~~~~~~~~~ - forAll(interfaces, patchi) - { - if (interfaces.set(patchi) && isA<ProcPatch>(interfaces[patchi])) - { - schedule[patchEvali++].setInitEvaluate(patchi); - } - } + // + // 2. Schedule processor patches + // - forAll(interfaces, patchi) + if (numProcPatches) { - if (interfaces.set(patchi) && isA<ProcPatch>(interfaces[patchi])) + forAll(interfaces, patchi) { - schedule[patchEvali++].setEvaluate(patchi); + if (interfaces.set(patchi) && isA<ProcPatch>(interfaces[patchi])) + { + schedule[patchEvali].setInitEvaluate(patchi); + schedule[patchEvali + numProcPatches].setEvaluate(patchi); + ++patchEvali; + } } } + // Caution: + // The schedule is only valid for a subset of its range + // (where interfaces are defined) but must retain the full list length + // for later (external) bookkeeping + return schedule; } diff --git a/src/OpenFOAM/parallel/commSchedule/commSchedule.C b/src/OpenFOAM/parallel/commSchedule/commSchedule.C index 32302b39fb75ed897139536ab8c10e7560177ad6..ae5817627cd0cfd2cf70bb76e7be32bb3be5e0d7 100644 --- a/src/OpenFOAM/parallel/commSchedule/commSchedule.C +++ b/src/OpenFOAM/parallel/commSchedule/commSchedule.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2016 OpenFOAM Foundation - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -27,7 +27,6 @@ License \*---------------------------------------------------------------------------*/ #include "commSchedule.H" -#include "ListOps.H" #include "IOstreams.H" #include "IOmanip.H" #include "StringStream.H" @@ -49,15 +48,15 @@ namespace Foam // Count the number of outstanding communications for a single processor static label outstandingComms ( - const labelList& commToSchedule, - DynamicList<label>& procComms + const labelUList& commToSchedule, + const DynamicList<label>& procComms ) { label nOutstanding = 0; - for (const label commi : procComms) + for (const label commPairi : procComms) { - if (commToSchedule[commi] == -1) + if (commToSchedule[commPairi] == -1) { ++nOutstanding; } @@ -82,20 +81,20 @@ Foam::commSchedule::commSchedule // Determine comms per processor. List<DynamicList<label>> procToComms(nProcs); - forAll(comms, commI) + forAll(comms, commPairi) { - label proc0 = comms[commI][0]; - label proc1 = comms[commI][1]; + const label proc0 = comms[commPairi].first(); + const label proc1 = comms[commPairi].second(); if (proc0 < 0 || proc0 >= nProcs || proc1 < 0 || proc1 >= nProcs) { FatalErrorInFunction << "Illegal processor(s): " - << comms[commI] << abort(FatalError); + << comms[commPairi] << abort(FatalError); } - procToComms[proc0].append(commI); - procToComms[proc1].append(commI); + procToComms[proc0].push_back(commPairi); + procToComms[proc1].push_back(commPairi); } // Note: no need to shrink procToComms. Are small. @@ -108,7 +107,7 @@ Foam::commSchedule::commSchedule const labelPair& twoProcs = comms[i]; Pout<< i << ": " - << twoProcs[0] << " with " << twoProcs[1] << endl; + << twoProcs.first() << " <-> " << twoProcs.second() << endl; } Pout<< endl; @@ -158,44 +157,46 @@ Foam::commSchedule::commSchedule while (true) { - label maxCommI = -1; + label maxComm = -1; label maxNeed = labelMin; - forAll(comms, commI) + forAll(comms, commPairi) { - label proc0 = comms[commI][0]; - label proc1 = comms[commI][1]; + const label proc0 = comms[commPairi].first(); + const label proc1 = comms[commPairi].second(); if ( - commToSchedule[commI] == -1 // unscheduled - && !busy[proc0] - && !busy[proc1] + commToSchedule[commPairi] == -1 // unscheduled + && !busy[proc0] + && !busy[proc1] ) { label need = + ( outstandingComms(commToSchedule, procToComms[proc0]) - + outstandingComms(commToSchedule, procToComms[proc1]); + + outstandingComms(commToSchedule, procToComms[proc1]) + ); - if (need > maxNeed) + if (maxNeed < need) { maxNeed = need; - maxCommI = commI; + maxComm = commPairi; } } } - if (maxCommI == -1) + if (maxComm == -1) { // Found no unscheduled procs. break; } - // Schedule commI in this iteration - commToSchedule[maxCommI] = nScheduled++; - busy[comms[maxCommI][0]] = true; - busy[comms[maxCommI][1]] = true; + // Schedule commPairi in this iteration + commToSchedule[maxComm] = nScheduled++; + busy[comms[maxComm].first()] = true; + busy[comms[maxComm].second()] = true; } if (debug && UPstream::master()) @@ -206,16 +207,16 @@ Foam::commSchedule::commSchedule { labelList procToComm(nProcs, -1); - forAll(commToSchedule, commI) + forAll(commToSchedule, commPairi) { - label sched = commToSchedule[commI]; + const label sched = commToSchedule[commPairi]; if (sched >= oldNScheduled && sched < nScheduled) { - label proc0 = comms[commI][0]; - procToComm[proc0] = commI; - label proc1 = comms[commI][1]; - procToComm[proc1] = commI; + const label proc0 = comms[commPairi].first(); + const label proc1 = comms[commPairi].second(); + procToComm[proc0] = commPairi; + procToComm[proc1] = commPairi; } } @@ -255,31 +256,32 @@ Foam::commSchedule::commSchedule labelList nProcScheduled(nProcs, Zero); // Count - forAll(schedule_, i) + for (const label commPairi : schedule_) { - label commI = schedule_[i]; - const labelPair& twoProcs = comms[commI]; + const labelPair& twoProcs = comms[commPairi]; - nProcScheduled[twoProcs[0]]++; - nProcScheduled[twoProcs[1]]++; + nProcScheduled[twoProcs.first()]++; + nProcScheduled[twoProcs.second()]++; } + // Allocate forAll(procSchedule_, proci) { - procSchedule_[proci].setSize(nProcScheduled[proci]); + procSchedule_[proci].resize_nocopy(nProcScheduled[proci]); } + nProcScheduled = 0; + // Fill - forAll(schedule_, i) + for (const label commPairi : schedule_) { - label commI = schedule_[i]; - const labelPair& twoProcs = comms[commI]; + const labelPair& twoProcs = comms[commPairi]; - label proc0 = twoProcs[0]; - procSchedule_[proc0][nProcScheduled[proc0]++] = commI; + const label proc0 = twoProcs.first(); + const label proc1 = twoProcs.second(); - label proc1 = twoProcs[1]; - procSchedule_[proc1][nProcScheduled[proc1]++] = commI; + procSchedule_[proc0][nProcScheduled[proc0]++] = commPairi; + procSchedule_[proc1][nProcScheduled[proc1]++] = commPairi; } if (debug && UPstream::master()) @@ -292,13 +294,13 @@ Foam::commSchedule::commSchedule Pout<< "Processor " << proci << " talks to processors:" << endl; - forAll(procComms, i) + for (const label commPairi : procComms) { - const labelPair& twoProcs = comms[procComms[i]]; - - label nbr = (twoProcs[1] == proci ? twoProcs[0] : twoProcs[1]); + const labelPair& twoProcs = comms[commPairi]; - Pout<< " " << nbr << endl; + Pout<< " " + << (proci == twoProcs[1] ? twoProcs[0] : twoProcs[1]) + << endl; } } Pout<< endl; diff --git a/src/OpenFOAM/parallel/processorTopology/processorTopology.C b/src/OpenFOAM/parallel/processorTopology/processorTopology.C new file mode 100644 index 0000000000000000000000000000000000000000..4689d7b38ef5316ab0ddd3c238f1642fd79fd15d --- /dev/null +++ b/src/OpenFOAM/parallel/processorTopology/processorTopology.C @@ -0,0 +1,120 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | www.openfoam.com + \\/ M anipulation | +------------------------------------------------------------------------------- + Copyright (C) 2023 OpenCFD Ltd. +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. + +\*---------------------------------------------------------------------------*/ + +#include "processorTopology.H" +#include "Pstream.H" + +// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * // + +Foam::processorTopology::processorTopology() +: + procPatchMap_(0), + comm_(UPstream::worldComm) +{} + + +// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // + +const Foam::labelList& Foam::processorTopology::procNeighbours() const +{ + if (procNeighbours_.empty() && !procPatchMap_.empty()) + { + // My neighbouring procs in ascending sorted order + procNeighbours_ = procPatchMap_.sortedToc(); + } + + return procNeighbours_; +} + + +// May be useful in the future... +// ------------------------------ +// +// const Foam::labelUList Foam::processorTopology::below() const +// { +// const auto& all = procNeighbours(); +// +// const auto* pivot = std::upper_bound +// ( +// all.begin(), +// all.end(), +// UPstream::myProcNo(comm_) +// ); +// +// if (pivot != all.end()) +// { +// return UList<label> +// ( +// const_cast<label*>(all.begin()), +// (pivot - all.begin()) +// ); +// } +// return UList<label>(); +// } +// +// +// const Foam::labelUList Foam::processorTopology::above() const +// { +// const auto& all = procNeighbours(); +// +// const auto* pivot = std::upper_bound +// ( +// all.begin(), +// all.end(), +// UPstream::myProcNo(comm_) +// ); +// if (pivot != all.end()) +// { +// return UList<label> +// ( +// const_cast<label*>(pivot), +// (all.end() - pivot) +// ); +// } +// return UList<label>(); +// } + + +const Foam::labelListList& Foam::processorTopology::procAdjacency() const +{ + if (UPstream::parRun() && procAdjacencyTable_.empty()) + { + procAdjacencyTable_.resize(UPstream::nProcs(comm_)); + + // My neighbouring procs in ascending sorted order + procAdjacencyTable_[UPstream::myProcNo(comm_)] + = procPatchMap_.sortedToc(); + + // Synchronize on all processors + Pstream::allGatherList(procAdjacencyTable_, UPstream::msgType(), comm_); + } + + return procAdjacencyTable_; +} + + +// ************************************************************************* // diff --git a/src/OpenFOAM/parallel/processorTopology/processorTopology.H b/src/OpenFOAM/parallel/processorTopology/processorTopology.H index 3cce0ec109565ee4af9b4fc397121db05772862d..8a15d6ebed17989c7406662dd1dc1c905bc71919 100644 --- a/src/OpenFOAM/parallel/processorTopology/processorTopology.H +++ b/src/OpenFOAM/parallel/processorTopology/processorTopology.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2016 OpenFOAM Foundation - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -51,6 +51,8 @@ SourceFiles #include "labelList.H" #include "lduSchedule.H" +#include "Map.H" +#include "UPstream.H" // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // @@ -65,48 +67,41 @@ class processorTopology { // Private Data - //- The processor to processor connection (adjacency) table - labelListList procNeighbours_; + //- Map from neighbour proc to patch index + //- (processor-local information!) + Map<label> procPatchMap_; + + //- The neighbour processor connections (ascending order) associated + //- with the local rank. + // Possibly demand-driven data. + mutable labelList procNeighbours_; + + //- The complete processor to processor connection adjacency table. + //- Globally synchronized information. + // Likely demand-driven data. + mutable labelListList procAdjacencyTable_; //- Order in which the patches should be initialised/evaluated //- corresponding to the schedule lduSchedule patchSchedule_; - //- Local map from neighbour proc to patchi. Different per processor! - // -1 or patchi for connection to procID - labelList procPatchMap_; - + //- The communicator used during creation of the topology + label comm_; - // Private Member Functions - //- From neighbour processor to index in our local boundary. - // Local information (so not same over all processors) - ///const labelList& procPatchMap() const noexcept - ///{ - /// return procPatchMap_; - ///} - - //- Which \em local boundary is attached to specified processor - // Local information (so not same over all processors) - // \return -1 if not currently connected to specified processor. - label procToLocalPatch(const label proci) const - { - return - ( - proci >= 0 && proci < procPatchMap_.size() - ? procPatchMap_[proci] - : static_cast<label>(-1) - ); - } + // Private Methods Functions + // Could expose as public... + // //- Map of neighbour processor to \em local boundary patch index. + // const Map<label>& procPatchMap() const noexcept + // { + // return procPatchMap_; + // } public: // Generated Methods - //- Default construct (empty) - processorTopology() = default; - //- Copy construct processorTopology(const processorTopology&) = default; @@ -120,6 +115,12 @@ public: processorTopology& operator=(processorTopology&&) = default; + // Constructors + + //- Default construct (empty) + processorTopology(); + + // Static Functions //- Factory method to create topology, schedule and proc/patch maps. @@ -135,17 +136,24 @@ public: // Member Functions - //- The number of processors used by the topology - label nProcs() const noexcept - { - return procNeighbours_.size(); - } + //- The communicator used during creation of the topology + label comm() const noexcept { return comm_; } - //- The processor to processor connection topology - //- (like an adjacency list). Globally synchronized information - const labelListList& procNeighbours() const noexcept + //- The neighbour processor connections (ascending order) associated + //- with the \em local rank. + const labelList& procNeighbours() const; + + //- The complete processor to processor connection adjacency table. + //- Globally synchronized information. + // Likely demand-driven data. + const labelListList& procAdjacency() const; + + //- Which \em local boundary is attached to specified neighbour + //- processor. + // \return -1 if not currently connected to specified processor. + label procPatchLookup(const label proci) const { - return procNeighbours_; + return procPatchMap_.lookup(proci, -1); } //- Order in which the patches should be initialised/evaluated @@ -154,14 +162,6 @@ public: { return patchSchedule_; } - - //- Which \em local boundary is attached to specified processor - // Local information (so not same over all processors) - // \return -1 if not currently connected to specified processor. - label procPatchLookup(const label proci) const - { - return procToLocalPatch(proci); - } }; diff --git a/src/OpenFOAM/parallel/processorTopology/processorTopologyNew.H b/src/OpenFOAM/parallel/processorTopology/processorTopologyNew.H index e7f966143a42f73224c0ad495f3c340e935a1339..17384f353227f8ceb1eb5113d85d7f126f061366 100644 --- a/src/OpenFOAM/parallel/processorTopology/processorTopologyNew.H +++ b/src/OpenFOAM/parallel/processorTopology/processorTopologyNew.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017 OpenFOAM Foundation - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -44,10 +44,8 @@ Warning #define Foam_processorTopologyNew_H #include "processorTopology.H" -#include "bitSet.H" #include "commSchedule.H" #include "DynamicList.H" -#include "Pstream.H" // * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * // @@ -58,89 +56,74 @@ Foam::processorTopology Foam::processorTopology::New const label comm ) { - const label myProci = Pstream::myProcNo(comm); - const label nProcs = Pstream::nProcs(comm); + processorTopology topo; - processorTopology procTopo; + topo.comm_ = comm; // The communicator + auto& schedule = topo.patchSchedule_; - auto& procToProcs = procTopo.procNeighbours_; - auto& procToPatch = procTopo.procPatchMap_; - auto& schedule = procTopo.patchSchedule_; - - procToProcs.resize(nProcs); schedule.resize(2*patches.size()); - if (Pstream::parRun()) - { - // Fill my 'slot' with my neighbours - auto& procSlot = procToProcs[myProci]; + // The evaluation number within the schedule + label patchEvali = 0; - bitSet isNeighbour(procToProcs.size()); + // Number of processor patches + label numProcPatches = 0; - forAll(patches, patchi) - { - const auto* cpp = isA<ProcPatch>(patches[patchi]); - if (cpp) - { - const label nbrProci = cpp->neighbProcNo(); + // + // 1. Schedule all non-processor patches + // - isNeighbour.set(nbrProci); - } + forAll(patches, patchi) + { + if (isA<ProcPatch>(patches[patchi])) + { + ++numProcPatches; } + else + { + schedule[patchEvali++].setInitEvaluate(patchi); + schedule[patchEvali++].setEvaluate(patchi); + } + } - // The neighbouring procs in sorted (ascending) order - procSlot = isNeighbour.sortedToc(); - const label maxNbrProci = procSlot.empty() ? -1 : procSlot.last(); + // Assemble processor patch information + if (UPstream::parRun() && numProcPatches) + { + // Create reverse map (from proc to patch) + // - assumes single connections between processors - // Note could also use Map<label> if desired - procToPatch.resize_nocopy(maxNbrProci + 1); - procToPatch = -1; + auto& patchMap = topo.procPatchMap_; + patchMap.resize(2*numProcPatches); forAll(patches, patchi) { const auto* cpp = isA<ProcPatch>(patches[patchi]); if (cpp) { - const label nbrProci = cpp->neighbProcNo(); - - // Reverse map - procToPatch[nbrProci] = patchi; + patchMap.set(cpp->neighbProcNo(), patchi); } } - - // Synchronize on all processors - Pstream::allGatherList(procToProcs, UPstream::msgType(), comm); } - // Define the schedule - - label patchEvali = 0; - - // 1. All non-processor patches - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - forAll(patches, patchi) - { - if (!isA<ProcPatch>(patches[patchi])) - { - schedule[patchEvali++].setInitEvaluate(patchi); - schedule[patchEvali++].setEvaluate(patchi); - } - } - - - // 2. All processor patches - // ~~~~~~~~~~~~~~~~~~~~~~~~ + // + // 2. Handle processor patches + // if ( - Pstream::parRun() - && Pstream::defaultCommsType == Pstream::commsTypes::scheduled + UPstream::parRun() + && UPstream::defaultCommsType == UPstream::commsTypes::scheduled ) { + const label myProci = UPstream::myProcNo(comm); + const label nProcs = UPstream::nProcs(comm); + + // Synchronized on all processors + const auto& procToProcs = topo.procAdjacency(); + // Determine the schedule for all processor patches. // Insert processor pair once to determine the schedule. // Each processor pair stands for both send and receive. @@ -158,30 +141,28 @@ Foam::processorTopology Foam::processorTopology::New { if (proci < nbrProci) { - comms.append(labelPair(proci, nbrProci)); + // Owner to neighbour connection + comms.push_back(labelPair(proci, nbrProci)); } } } // Determine a schedule. + labelList mySchedule ( - commSchedule - ( - nProcs, - comms - ).procSchedule()[myProci] + commSchedule(nProcs, comms).procSchedule()[myProci] ); for (const label scheduleIndex : mySchedule) { // Get the other processor - label nbrProci = comms[scheduleIndex][0]; + label nbrProci = comms[scheduleIndex].first(); if (nbrProci == myProci) { - nbrProci = comms[scheduleIndex][1]; + nbrProci = comms[scheduleIndex].second(); } - const label patchi = procToPatch[nbrProci]; + const label patchi = topo.procPatchLookup(nbrProci); if (myProci > nbrProci) { @@ -199,26 +180,21 @@ Foam::processorTopology Foam::processorTopology::New { // Non-blocking schedule for processor patches - // initEvaluate - forAll(patches, patchi) + if (numProcPatches) { - if (isA<ProcPatch>(patches[patchi])) + forAll(patches, patchi) { - schedule[patchEvali++].setInitEvaluate(patchi); - } - } - - // evaluate - forAll(patches, patchi) - { - if (isA<ProcPatch>(patches[patchi])) - { - schedule[patchEvali++].setEvaluate(patchi); + if (isA<ProcPatch>(patches[patchi])) + { + schedule[patchEvali].setInitEvaluate(patchi); + schedule[patchEvali + numProcPatches].setEvaluate(patchi); + ++patchEvali; + } } } } - return procTopo; + return topo; } diff --git a/src/OpenFOAM/primitives/tuples/Pair.H b/src/OpenFOAM/primitives/tuples/Pair.H index 9548289ac77c1b0e5c966c6d93620203b4a9adcf..48a8538205a8ca3ac056803f11bd0187e2a7b864 100644 --- a/src/OpenFOAM/primitives/tuples/Pair.H +++ b/src/OpenFOAM/primitives/tuples/Pair.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2016 OpenFOAM Foundation - Copyright (C) 2017-2022 OpenCFD Ltd. + Copyright (C) 2017-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -44,7 +44,8 @@ See also #include "FixedList.H" #include "Istream.H" -#include <utility> // For std::move +#include "Ostream.H" +#include <utility> // For std::move, std::pair // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // @@ -194,6 +195,16 @@ struct is_contiguous_scalar<Pair<T>> : is_contiguous_scalar<T> {}; template<class T> struct Hash<Pair<T>> : Pair<T>::hasher {}; +//- Hashing for std::pair data +template<class T1, class T2> +struct Hash<std::pair<T1, T2>> +{ + unsigned operator()(const std::pair<T1, T2>& obj, unsigned seed=0) const + { + return Hash<T2>()(obj.second, Hash<T1>()(obj.first, seed)); + } +}; + // * * * * * * * * * * * * * * Global Functions * * * * * * * * * * * * * * // @@ -253,6 +264,32 @@ bool operator>=(const Pair<T>& a, const Pair<T>& b) } +// * * * * * * * * * * * * * * * IOstream Operators * * * * * * * * * * * * // + +//- Read std::pair from Istream +template<class T1, class T2> +inline Istream& operator>>(Istream& is, std::pair<T1,T2>& t) +{ + is.readBegin("pair"); + is >> t.first >> t.second; + is.readEnd("pair"); + + is.check(FUNCTION_NAME); + return is; +} + + +//- Write std::pair to Ostream +template<class T1, class T2> +inline Ostream& operator<<(Ostream& os, const std::pair<T1,T2>& t) +{ + os << token::BEGIN_LIST + << t.first << token::SPACE << t.second + << token::END_LIST; + return os; +} + + // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // } // End namespace Foam diff --git a/src/OpenFOAM/primitives/tuples/Tuple2.H b/src/OpenFOAM/primitives/tuples/Tuple2.H index 6c6fe3d1ee784dd2d77d40172f2992baf9a1126d..ab895b39859306b7e649c9deb61842c6cc879ac5 100644 --- a/src/OpenFOAM/primitives/tuples/Tuple2.H +++ b/src/OpenFOAM/primitives/tuples/Tuple2.H @@ -133,17 +133,6 @@ public: // * * * * * * * * * * * * * * * * * Traits * * * * * * * * * * * * * * * * // -//- Hashing for std::pair data -template<class T1, class T2> -struct Hash<std::pair<T1, T2>> -{ - unsigned operator()(const std::pair<T1, T2>& obj, unsigned seed=0) const - { - return Hash<T2>()(obj.second, Hash<T1>()(obj.first, seed)); - } -}; - - //- Hashing for Tuple2 data template<class T1, class T2> struct Hash<Tuple2<T1, T2>> @@ -291,19 +280,6 @@ struct maxFirstEqOp // * * * * * * * * * * * * * * * IOstream Operators * * * * * * * * * * * * // -//- Read std::pair from Istream -template<class T1, class T2> -inline Istream& operator>>(Istream& is, std::pair<T1,T2>& t) -{ - is.readBegin("pair"); - is >> t.first >> t.second; - is.readEnd("pair"); - - is.check(FUNCTION_NAME); - return is; -} - - //- Read Tuple2 from Istream template<class T1, class T2> inline Istream& operator>>(Istream& is, Tuple2<T1,T2>& t) @@ -317,18 +293,7 @@ inline Istream& operator>>(Istream& is, Tuple2<T1,T2>& t) } -//- Write std::pair to Ostream. -template<class T1, class T2> -inline Ostream& operator<<(Ostream& os, const std::pair<T1,T2>& t) -{ - os << token::BEGIN_LIST - << t.first << token::SPACE << t.second - << token::END_LIST; - return os; -} - - -//- Write Tuple2 to Ostream. +//- Write Tuple2 to Ostream template<class T1, class T2> inline Ostream& operator<<(Ostream& os, const Tuple2<T1,T2>& t) { diff --git a/src/Pstream/dummy/UPstream.C b/src/Pstream/dummy/UPstream.C index 381d7d3c22af8d5785de1fb7d4529f49b7c17263..6eddc72490b71b726899cff8d918a7370bff9e9d 100644 --- a/src/Pstream/dummy/UPstream.C +++ b/src/Pstream/dummy/UPstream.C @@ -98,4 +98,17 @@ void Foam::UPstream::barrier(const label communicator, UPstream::Request* req) {} +std::pair<int,int> +Foam::UPstream::probeMessage +( + const UPstream::commsTypes commsType, + const int fromProcNo, + const int tag, + const label comm +) +{ + return std::pair<int,int>(-1, 0); +} + + // ************************************************************************* // diff --git a/src/Pstream/dummy/UPstreamAllToAll.C b/src/Pstream/dummy/UPstreamAllToAll.C index 6983ae418d55add42fea43cfb4821020f426a092..134808104f8c428a2bde9e2fdfdad7c02ed154b6 100644 --- a/src/Pstream/dummy/UPstreamAllToAll.C +++ b/src/Pstream/dummy/UPstreamAllToAll.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -26,6 +26,7 @@ License \*---------------------------------------------------------------------------*/ #include "UPstream.H" +#include "Map.H" #include <cinttypes> #include <cstring> // memmove @@ -40,9 +41,41 @@ void Foam::UPstream::allToAll \ UList<Native>& recvData, \ const label comm \ ) \ +{ \ + recvData.deepCopy(sendData); \ +} + + +Pstream_CommonRoutines(int32_t); +Pstream_CommonRoutines(int64_t); + +#undef Pstream_CommonRoutines + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +#undef Pstream_CommonRoutines +#define Pstream_CommonRoutines(Native) \ +void Foam::UPstream::allToAllConsensus \ +( \ + const UList<Native>& sendData, \ + UList<Native>& recvData, \ + const int tag, \ + const label comm \ +) \ { \ recvData.deepCopy(sendData); \ } \ +void Foam::UPstream::allToAllConsensus \ +( \ + const Map<Native>& sendData, \ + Map<Native>& recvData, \ + const int tag, \ + const label comm \ +) \ +{ \ + recvData = sendData; \ +} Pstream_CommonRoutines(int32_t); diff --git a/src/Pstream/dummy/UPstreamRequest.C b/src/Pstream/dummy/UPstreamRequest.C index efd2427c1403477d2c603ad23a651386e53d6354..b158f9e8b7c9f30dbf77616c21964e9822af5bcf 100644 --- a/src/Pstream/dummy/UPstreamRequest.C +++ b/src/Pstream/dummy/UPstreamRequest.C @@ -56,11 +56,20 @@ void Foam::UPstream::resetRequests(const label n) {} void Foam::UPstream::waitRequests(const label pos) {} void Foam::UPstream::waitRequests(UList<UPstream::Request>&) {} +Foam::label Foam::UPstream::waitAnyRequest(UList<UPstream::Request>&) +{ + return -1; +} + void Foam::UPstream::waitRequest(const label i) {} void Foam::UPstream::waitRequest(UPstream::Request&) {} bool Foam::UPstream::finishedRequest(const label i) { return true; } bool Foam::UPstream::finishedRequest(UPstream::Request&) { return true; } +bool Foam::UPstream::finishedRequests(UList<UPstream::Request>&) +{ + return true; +} // ************************************************************************* // diff --git a/src/Pstream/mpi/PstreamGlobals.C b/src/Pstream/mpi/PstreamGlobals.C index 2d6e65aa1043d1fe4f46e615ab4ca0dd7659ab47..de5634b906963b6ca52ec3198c410b8a6377bf64 100644 --- a/src/Pstream/mpi/PstreamGlobals.C +++ b/src/Pstream/mpi/PstreamGlobals.C @@ -6,6 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2013-2015 OpenFOAM Foundation + Copyright (C) 2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -29,6 +30,8 @@ License // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * // +Foam::DynamicList<unsigned> Foam::PstreamGlobals::pendingMPIFree_; + Foam::DynamicList<MPI_Comm> Foam::PstreamGlobals::MPICommunicators_; Foam::DynamicList<MPI_Group> Foam::PstreamGlobals::MPIGroups_; diff --git a/src/Pstream/mpi/PstreamGlobals.H b/src/Pstream/mpi/PstreamGlobals.H index 859d0aa64b6aa379205fe474f955a46fad4d9145..a45bd1b858d2e9f7d8ae10c802a0d4174cdbad3a 100644 --- a/src/Pstream/mpi/PstreamGlobals.H +++ b/src/Pstream/mpi/PstreamGlobals.H @@ -49,11 +49,24 @@ namespace Foam namespace PstreamGlobals { +//- Internal enumeration to track the state of MPI_Comm, MPI_Group allocation +// Handled as bitmasks +enum allocationTypes : unsigned +{ + NonePending = 0u, // No MPI free is pending + CommPending = 1u, // MPI_Comm_free() is needed + GroupPending = 2u // MPI_Group_free() is needed +}; + +// Track if MPI_Comm_free and/or MPI_Group_free are pending for +// each communicator index (indexes into MPICommunicators_, MPIGroups_) +extern DynamicList<unsigned> pendingMPIFree_; + // Current communicators, which may be allocated or predefined // (eg, MPI_COMM_SELF, MPI_COMM_WORLD) extern DynamicList<MPI_Comm> MPICommunicators_; -// Groups associated with the currrent communicators. +// Groups used to create communicators extern DynamicList<MPI_Group> MPIGroups_; //- Outstanding non-blocking operations. diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C index cfb0c2ebdc9bdda8634f99fa7ddcb7340c2795b1..005707be6171654cc1fdbaaab418c6e6599c938b 100644 --- a/src/Pstream/mpi/UPstream.C +++ b/src/Pstream/mpi/UPstream.C @@ -37,6 +37,7 @@ License #include <cstring> #include <cstdlib> #include <csignal> +#include <numeric> // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * // @@ -502,15 +503,17 @@ void Foam::UPstream::allocatePstreamCommunicator const label index ) { - if (index == PstreamGlobals::MPIGroups_.size()) + if (index == PstreamGlobals::MPICommunicators_.size()) { - // Extend storage with dummy values - MPI_Comm newComm = MPI_COMM_NULL; - MPI_Group newGroup = MPI_GROUP_NULL; - PstreamGlobals::MPIGroups_.push_back(newGroup); - PstreamGlobals::MPICommunicators_.push_back(newComm); + // Extend storage with null values + + PstreamGlobals:: + pendingMPIFree_.emplace_back(PstreamGlobals::NonePending); + + PstreamGlobals::MPICommunicators_.emplace_back(MPI_COMM_NULL); + PstreamGlobals::MPIGroups_.emplace_back(MPI_GROUP_NULL); } - else if (index > PstreamGlobals::MPIGroups_.size()) + else if (index > PstreamGlobals::MPICommunicators_.size()) { FatalErrorInFunction << "PstreamGlobals out of sync with UPstream data. Problem." @@ -530,27 +533,40 @@ void Foam::UPstream::allocatePstreamCommunicator << Foam::exit(FatalError); } + PstreamGlobals::pendingMPIFree_[index] = PstreamGlobals::NonePending; PstreamGlobals::MPICommunicators_[index] = MPI_COMM_WORLD; - MPI_Comm_group(MPI_COMM_WORLD, &PstreamGlobals::MPIGroups_[index]); - MPI_Comm_rank(MPI_COMM_WORLD, &myProcNo_[index]); + PstreamGlobals::MPIGroups_[index] = MPI_GROUP_NULL; + + // TBD: MPI_Comm_dup(MPI_COMM_WORLD, ...); + // with pendingMPIFree_[index] = CommPending ... + // Note: freePstreamCommunicator may need an update + + MPI_Comm_rank + ( + PstreamGlobals::MPICommunicators_[index], + &myProcNo_[index] + ); // Set the number of ranks to the actual number int numProcs; - MPI_Comm_size(MPI_COMM_WORLD, &numProcs); + MPI_Comm_size + ( + PstreamGlobals::MPICommunicators_[index], + &numProcs + ); - //procIDs_[index] = identity(numProcs); + // identity [0-numProcs], as 'int' procIDs_[index].resize_nocopy(numProcs); - forAll(procIDs_[index], i) - { - procIDs_[index][i] = i; - } + std::iota(procIDs_[index].begin(), procIDs_[index].end(), 0); } else if (parentIndex == -2) { // Self communicator + PstreamGlobals::pendingMPIFree_[index] = PstreamGlobals::NonePending; PstreamGlobals::MPICommunicators_[index] = MPI_COMM_SELF; - MPI_Comm_group(MPI_COMM_SELF, &PstreamGlobals::MPIGroups_[index]); + PstreamGlobals::MPIGroups_[index] = MPI_GROUP_NULL; + MPI_Comm_rank(MPI_COMM_SELF, &myProcNo_[index]); // Number of ranks is always 1 (self communicator) @@ -573,6 +589,11 @@ void Foam::UPstream::allocatePstreamCommunicator } else { + // General sub-communicator + + PstreamGlobals::pendingMPIFree_[index] + = (PstreamGlobals::CommPending | PstreamGlobals::GroupPending); + // Create new group MPI_Group_incl ( @@ -603,7 +624,10 @@ void Foam::UPstream::allocatePstreamCommunicator if (PstreamGlobals::MPICommunicators_[index] == MPI_COMM_NULL) { + // No communicator created, group only myProcNo_[index] = -1; + PstreamGlobals:: + pendingMPIFree_[index] = PstreamGlobals::GroupPending; } else { @@ -629,30 +653,48 @@ void Foam::UPstream::allocatePstreamCommunicator } -void Foam::UPstream::freePstreamCommunicator(const label communicator) +void Foam::UPstream::freePstreamCommunicator(const label index) { // Skip placeholders and pre-defined (not allocated) communicators if (UPstream::debug) { - Pout<< "freePstreamCommunicator: " << communicator + Pout<< "freePstreamCommunicator: " << index << " from " << PstreamGlobals::MPICommunicators_.size() << endl; } // Not touching the first two communicators (SELF, WORLD) - if (communicator > 1) + if (index > 1) { - if (MPI_COMM_NULL != PstreamGlobals::MPICommunicators_[communicator]) + if + ( + (MPI_COMM_NULL != PstreamGlobals::MPICommunicators_[index]) + && + ( + PstreamGlobals::pendingMPIFree_[index] + & PstreamGlobals::CommPending + ) + ) { // Free communicator. Sets communicator to MPI_COMM_NULL - MPI_Comm_free(&PstreamGlobals::MPICommunicators_[communicator]); + MPI_Comm_free(&PstreamGlobals::MPICommunicators_[index]); } - if (MPI_GROUP_NULL != PstreamGlobals::MPIGroups_[communicator]) + if + ( + (MPI_GROUP_NULL != PstreamGlobals::MPIGroups_[index]) + && + ( + PstreamGlobals::pendingMPIFree_[index] + & PstreamGlobals::GroupPending + ) + ) { // Free group. Sets group to MPI_GROUP_NULL - MPI_Group_free(&PstreamGlobals::MPIGroups_[communicator]); + MPI_Group_free(&PstreamGlobals::MPIGroups_[index]); } + + PstreamGlobals::pendingMPIFree_[index] = PstreamGlobals::NonePending; } } @@ -705,6 +747,7 @@ void Foam::UPstream::barrier(const label communicator, UPstream::Request* req) { MPI_Request request; + // Non-blocking if ( MPI_Ibarrier @@ -723,6 +766,7 @@ void Foam::UPstream::barrier(const label communicator, UPstream::Request* req) } else { + // Blocking if ( MPI_Barrier @@ -739,4 +783,77 @@ void Foam::UPstream::barrier(const label communicator, UPstream::Request* req) } +std::pair<int,int> +Foam::UPstream::probeMessage +( + const UPstream::commsTypes commsType, + const int fromProcNo, + const int tag, + const label comm +) +{ + std::pair<int,int> result(-1, 0); + + if (!UPstream::parRun()) + { + return result; + } + + const int source = (fromProcNo < 0) ? MPI_ANY_SOURCE : fromProcNo; + // Supporting MPI_ANY_TAG is not particularly useful... + + int flag = 0; + MPI_Status status; + + if (UPstream::commsTypes::blocking == commsType) + { + // Blocking + if + ( + MPI_Probe + ( + source, + tag, + PstreamGlobals::MPICommunicators_[comm], + &status + ) + ) + { + FatalErrorInFunction + << "MPI_Probe returned with error" + << Foam::abort(FatalError); + } + flag = 1; + } + else + { + // Non-blocking + if + ( + MPI_Iprobe + ( + source, + tag, + PstreamGlobals::MPICommunicators_[comm], + &flag, + &status + ) + ) + { + FatalErrorInFunction + << "MPI_Iprobe returned with error" + << Foam::abort(FatalError); + } + } + + if (flag) + { + result.first = status.MPI_SOURCE; + MPI_Get_count(&status, MPI_BYTE, &result.second); + } + + return result; +} + + // ************************************************************************* // diff --git a/src/Pstream/mpi/UPstreamAllToAll.C b/src/Pstream/mpi/UPstreamAllToAll.C index 8bc5b5ebac4ceebd5746362f9bb335f982e8e996..e9c44147815c0c62f745edad8928a2abe725242f 100644 --- a/src/Pstream/mpi/UPstreamAllToAll.C +++ b/src/Pstream/mpi/UPstreamAllToAll.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2022 OpenCFD Ltd. + Copyright (C) 2022-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -26,6 +26,7 @@ License \*---------------------------------------------------------------------------*/ #include "Pstream.H" +#include "Map.H" #include "UPstreamWrapping.H" #include <cinttypes> @@ -45,7 +46,46 @@ void Foam::UPstream::allToAll \ ( \ sendData, recvData, TaggedType, comm \ ); \ +} + + +Pstream_CommonRoutines(int32_t, MPI_INT32_T); +Pstream_CommonRoutines(int64_t, MPI_INT64_T); + +#undef Pstream_CommonRoutines + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +#undef Pstream_CommonRoutines +#define Pstream_CommonRoutines(Native, TaggedType) \ +void Foam::UPstream::allToAllConsensus \ +( \ + const UList<Native>& sendData, \ + UList<Native>& recvData, \ + const int tag, \ + const label comm \ +) \ +{ \ + PstreamDetail::allToAllConsensus \ + ( \ + sendData, recvData, TaggedType, tag, comm \ + ); \ } \ + \ +void Foam::UPstream::allToAllConsensus \ +( \ + const Map<Native>& sendData, \ + Map<Native>& recvData, \ + const int tag, \ + const label comm \ +) \ +{ \ + PstreamDetail::allToAllConsensus \ + ( \ + sendData, recvData, TaggedType, tag, comm \ + ); \ +} Pstream_CommonRoutines(int32_t, MPI_INT32_T); diff --git a/src/Pstream/mpi/UPstreamRequest.C b/src/Pstream/mpi/UPstreamRequest.C index d3f6884f96c16f5c601979b06d3d4056ee6774b8..603b638a85ea4d659515b323b108cb9cf9073ea6 100644 --- a/src/Pstream/mpi/UPstreamRequest.C +++ b/src/Pstream/mpi/UPstreamRequest.C @@ -138,21 +138,24 @@ void Foam::UPstream::waitRequests(UList<UPstream::Request>& requests) for (auto& req : requests) { - if (req.good()) + MPI_Request request = PstreamDetail::Request::get(req); + + if (MPI_REQUEST_NULL != request) { - waitRequests[count] = PstreamDetail::Request::get(req); + waitRequests[count] = request; ++count; } } if (!count) { + // Early exit: non-NULL requests found return; } profilingPstream::beginTiming(); - // On success: sets request to MPI_REQUEST_NULL + // On success: sets each request to MPI_REQUEST_NULL if (MPI_Waitall(count, waitRequests, MPI_STATUSES_IGNORE)) { FatalErrorInFunction @@ -163,10 +166,58 @@ void Foam::UPstream::waitRequests(UList<UPstream::Request>& requests) profilingPstream::addWaitTime(); // Everything handled, reset all to MPI_REQUEST_NULL + requests = UPstream::Request(MPI_REQUEST_NULL); +} + + +Foam::label Foam::UPstream::waitAnyRequest(UList<UPstream::Request>& requests) +{ + // No-op for non-parallel or no pending requests + if (!UPstream::parRun() || requests.empty()) + { + return -1; + } + + // Looks ugly but is legitimate since UPstream::Request is an intptr_t, + // which is always large enough to hold an MPI_Request (int or pointer) + + label count = 0; + auto* waitRequests = reinterpret_cast<MPI_Request*>(requests.data()); + + // Transcribe UPstream::Request into MPI_Request + // - do not change locations within the list since these are relevant + // for the return index. for (auto& req : requests) { - req.reset(); + waitRequests[count] = PstreamDetail::Request::get(req); + ++count; + } + + profilingPstream::beginTiming(); + + // On success: sets request to MPI_REQUEST_NULL + int index = -1; + if (MPI_Waitany(count, waitRequests, &index, MPI_STATUS_IGNORE)) + { + FatalErrorInFunction + << "MPI_Waitany returned with error" + << Foam::abort(FatalError); } + + profilingPstream::addWaitTime(); + + if (index == MPI_UNDEFINED) + { + index = -1; // No outstanding requests + } + + // Transcribe MPI_Request back into UPstream::Request + while (--count >= 0) + { + requests[count] = UPstream::Request(waitRequests[count]); + } + + return index; } @@ -190,14 +241,16 @@ void Foam::UPstream::waitRequests(UList<UPstream::Request>& requests) /// waitRequests[count] = PstreamDetail::Request::get(req1); /// if (MPI_REQUEST_NULL != waitRequests[count]) /// { -/// req1.reset(); +/// // Flag in advance as being handled +/// req1 = UPstream::Request(MPI_REQUEST_NULL); /// ++count; /// } /// /// waitRequests[count] = PstreamDetail::Request::get(req2); /// if (MPI_REQUEST_NULL != waitRequests[count]) /// { -/// req2.reset(); +/// // Flag in advance as being handled +/// req2 = UPstream::Request(MPI_REQUEST_NULL); /// ++count; /// } /// @@ -208,7 +261,7 @@ void Foam::UPstream::waitRequests(UList<UPstream::Request>& requests) /// /// profilingPstream::beginTiming(); /// -/// // On success: sets request to MPI_REQUEST_NULL +/// // On success: sets each request to MPI_REQUEST_NULL /// if (MPI_Waitall(count, waitRequests, MPI_STATUSES_IGNORE)) /// { /// FatalErrorInFunction @@ -297,7 +350,8 @@ void Foam::UPstream::waitRequest(UPstream::Request& req) profilingPstream::addWaitTime(); - req.reset(); // Handled, reset to MPI_REQUEST_NULL + // Handled, reset to MPI_REQUEST_NULL + req = UPstream::Request(MPI_REQUEST_NULL); } @@ -363,8 +417,69 @@ bool Foam::UPstream::finishedRequest(UPstream::Request& req) if (flag) { - // Done - reset to MPI_REQUEST_NULL - req.reset(); + // Success: reset request to MPI_REQUEST_NULL + req = UPstream::Request(MPI_REQUEST_NULL); + } + + return flag != 0; +} + + +bool Foam::UPstream::finishedRequests(UList<UPstream::Request>& requests) +{ + // No-op for non-parallel or no pending requests + if (!UPstream::parRun() || requests.empty()) + { + return true; + } + + // Looks ugly but is legitimate since UPstream::Request is an intptr_t, + // which is always large enough to hold an MPI_Request (int or pointer) + + label count = 0; + auto* waitRequests = reinterpret_cast<MPI_Request*>(requests.data()); + + for (auto& req : requests) + { + MPI_Request request = PstreamDetail::Request::get(req); + + if (MPI_REQUEST_NULL != request) + { + waitRequests[count] = request; + ++count; + } + } + + if (!count) + { + // Early exit: non-NULL requests found + return true; + } + + // On success: sets each request to MPI_REQUEST_NULL + // On failure: no request is modified + int flag = 0; + MPI_Testall(count, waitRequests, &flag, MPI_STATUSES_IGNORE); + + if (flag) + { + // Success: reset all requests to MPI_REQUEST_NULL + requests = UPstream::Request(MPI_REQUEST_NULL); + } + else + { + // Not all done. Recover wrapped representation but in reverse order + // since sizeof(MPI_Request) can be smaller than + // sizeof(UPstream::Request::value_type) + // eg, mpich has MPI_Request as 'int' + // + // This is uglier that we'd like, but much better than allocating + // and freeing a scratch buffer each time we query things. + + while (--count >= 0) + { + requests[count] = UPstream::Request(waitRequests[count]); + } } return flag != 0; diff --git a/src/Pstream/mpi/UPstreamWrapping.H b/src/Pstream/mpi/UPstreamWrapping.H index 355bf3bf86d786509e393e4d41c8258a5146fe2b..125da4defd9ea832b89c9225b979b365cb8455ee 100644 --- a/src/Pstream/mpi/UPstreamWrapping.H +++ b/src/Pstream/mpi/UPstreamWrapping.H @@ -136,6 +136,30 @@ void allToAllv ); +// Non-blocking consensual integer (size) exchange +template<class Type> +void allToAllConsensus +( + const UList<Type>& sendData, + UList<Type>& recvData, + MPI_Datatype datatype, + const int tag, // Message tag + const label comm // Communicator +); + + +// Non-blocking consensual integer (size) exchange +template<class Type> +void allToAllConsensus +( + const Map<Type>& sendData, + Map<Type>& recvData, + MPI_Datatype datatype, + const int tag, // Message tag + const label comm // Communicator +); + + // MPI_Gather or MPI_Igather template<class Type> void gather diff --git a/src/Pstream/mpi/UPstreamWrappingTemplates.C b/src/Pstream/mpi/UPstreamWrappingTemplates.C index 109e6b7f4588d2dd91f778f17464caf22a436814..076c03a1c0c9ab7cb3e4d5eea6a15e24939aee42 100644 --- a/src/Pstream/mpi/UPstreamWrappingTemplates.C +++ b/src/Pstream/mpi/UPstreamWrappingTemplates.C @@ -29,6 +29,7 @@ License #include "UPstreamWrapping.H" #include "profilingPstream.H" #include "PstreamGlobals.H" +#include "Map.H" // * * * * * * * * * * * * * * * Global Functions * * * * * * * * * * * * * // @@ -126,6 +127,9 @@ void Foam::PstreamDetail::allReduce { if (!UPstream::parRun()) { + // No requests generated + if (req) req->reset(); + if (requestID) *requestID = -1; return; } @@ -159,6 +163,7 @@ void Foam::PstreamDetail::allReduce bool handled(false); #if defined(MPI_VERSION) && (MPI_VERSION >= 3) + // MPI-3 : eg, openmpi-1.7 (2013) and later if (immediate) { handled = true; @@ -239,7 +244,7 @@ void Foam::PstreamDetail::allToAll { const bool immediate = (req || requestID); - const label np = UPstream::nProcs(comm); + const label numProc = UPstream::nProcs(comm); if (UPstream::warnComm != -1 && comm != UPstream::warnComm) { @@ -251,7 +256,7 @@ void Foam::PstreamDetail::allToAll { Pout<< "** MPI_Alltoall (blocking):"; } - Pout<< " np:" << np + Pout<< " numProc:" << numProc << " sendData:" << sendData.size() << " with comm:" << comm << " warnComm:" << UPstream::warnComm @@ -259,18 +264,22 @@ void Foam::PstreamDetail::allToAll error::printStack(Pout); } - if (sendData.size() != np || recvData.size() != np) + if (sendData.size() != numProc || recvData.size() != numProc) { FatalErrorInFunction - << "Have " << np << " ranks, but size of sendData:" + << "Have " << numProc << " ranks, but size of sendData:" << sendData.size() << " or recvData:" << recvData.size() << " is different!" << Foam::abort(FatalError); } - if (!UPstream::parRun()) + if (!UPstream::parRun() || numProc < 2) { recvData.deepCopy(sendData); + + // No requests generated + if (req) req->reset(); + if (requestID) *requestID = -1; return; } @@ -279,6 +288,7 @@ void Foam::PstreamDetail::allToAll bool handled(false); #if defined(MPI_VERSION) && (MPI_VERSION >= 3) + // MPI-3 : eg, openmpi-1.7 (2013) and later if (immediate) { handled = true; @@ -421,6 +431,10 @@ void Foam::PstreamDetail::allToAllv (sendData + sendOffsets[0]), recvCounts[0]*sizeof(Type) ); + + // No requests generated + if (req) req->reset(); + if (requestID) *requestID = -1; return; } @@ -429,6 +443,7 @@ void Foam::PstreamDetail::allToAllv bool handled(false); #if defined(MPI_VERSION) && (MPI_VERSION >= 3) + // MPI-3 : eg, openmpi-1.7 (2013) and later if (immediate) { handled = true; @@ -503,6 +518,365 @@ void Foam::PstreamDetail::allToAllv } +template<class Type> +void Foam::PstreamDetail::allToAllConsensus +( + const UList<Type>& sendData, + UList<Type>& recvData, + MPI_Datatype datatype, + const int tag, + const label comm +) +{ + const label myProci = UPstream::myProcNo(comm); + const label numProc = UPstream::nProcs(comm); + + if (UPstream::warnComm != -1 && comm != UPstream::warnComm) + { + Pout<< "** non-blocking consensus Alltoall (list):"; + Pout<< " numProc:" << numProc + << " sendData:" << sendData.size() + << " with comm:" << comm + << " warnComm:" << UPstream::warnComm + << endl; + error::printStack(Pout); + } + + if (sendData.size() != numProc || recvData.size() != numProc) + { + FatalErrorInFunction + << "Have " << numProc << " ranks, but size of sendData:" + << sendData.size() << " or recvData:" << recvData.size() + << " is different!" + << Foam::abort(FatalError); + } + + // Initial: assign zero everywhere. Values of zero are never transmitted + const Type zeroValue = pTraits<Type>::zero; + recvData = zeroValue; + + if (!UPstream::parRun() || numProc < 2) + { + // deep copy + recvData.deepCopy(sendData); + return; + } + + + // Implementation description + // -------------------------- + // "Scalable Communication Protocols for Dynamic Sparse Data Exchange", + // Hoeffler, Siebert, Lumsdaine + // May 2010 ACM SIGPLAN Notices 45(5):159-168 + // https://doi.org/10.1145/1837853.1693476 + // + // - http://unixer.de/publications/img/hoefler-dsde-protocols.pdf + // + // Algorithm NBX: Nonblocking consensus + + // This specific specialization is largely just for integer data + // so we initialise the receiving data with zero and then + // do not send/recv them. + // This is because we are dealing with a flat list of entries to + // send and not a sparse Map etc. + + DynamicList<MPI_Request> requests(sendData.size()); + + profilingPstream::beginTiming(); + + // If there are synchronisation problems, + // a beginning barrier can help, but should not be necessary + // when unique message tags are being used. + + //// MPI_Barrier(PstreamGlobals::MPICommunicators_[comm]); + + // Start nonblocking synchronous send to process dest + for (label proci = 0; proci < numProc; ++proci) + { + if (sendData[proci] == zeroValue) + { + // Do not send/recv empty data + } + else if (proci == myProci) + { + // Do myself + recvData[proci] = sendData[proci]; + } + else + { + // Has data to send + + MPI_Issend + ( + &sendData[proci], + 1, // one element per rank + datatype, + proci, + tag, + PstreamGlobals::MPICommunicators_[comm], + &requests.emplace_back() + ); + } + } + + + // Probe and receive + + MPI_Request barrierReq; + + for (bool barrier_active = false, done = false; !done; /*nil*/) + { + int flag = 0; + MPI_Status status; + + MPI_Iprobe + ( + MPI_ANY_SOURCE, + tag, + PstreamGlobals::MPICommunicators_[comm], + &flag, + &status + ); + + if (flag) + { + // Message found, receive into dest buffer location + const label proci = status.MPI_SOURCE; + + int count = 0; + MPI_Get_count(&status, datatype, &count); + + if (count != 1) + { + FatalErrorInFunction + << "Incorrect message size. Expected 1 but had " + << count << nl + << exit(FatalError); + } + + MPI_Recv + ( + &recvData[proci], + count, // count=1 (see above) + datatype, + proci, + tag, + PstreamGlobals::MPICommunicators_[comm], + MPI_STATUS_IGNORE + ); + } + + if (barrier_active) + { + // Test barrier for completion + // - all received, or nothing to receive + MPI_Test(&barrierReq, &flag, MPI_STATUS_IGNORE); + + if (flag) + { + done = true; + } + } + else + { + // Check if all sends have arrived + MPI_Testall + ( + requests.size(), requests.data(), + &flag, MPI_STATUSES_IGNORE + ); + + if (flag) + { + MPI_Ibarrier + ( + PstreamGlobals::MPICommunicators_[comm], + &barrierReq + ); + barrier_active = true; + } + } + } + + profilingPstream::addAllToAllTime(); +} + + +template<class Type> +void Foam::PstreamDetail::allToAllConsensus +( + const Map<Type>& sendBufs, + Map<Type>& recvBufs, + MPI_Datatype datatype, + const int tag, + const label comm +) +{ + const label myProci = UPstream::myProcNo(comm); + const label numProc = UPstream::nProcs(comm); + + if (UPstream::warnComm != -1 && comm != UPstream::warnComm) + { + Pout<< "** non-blocking consensus Alltoall (map):"; + Pout<< " numProc:" << numProc + << " sendData:" << sendBufs.size() + << " with comm:" << comm + << " warnComm:" << UPstream::warnComm + << endl; + error::printStack(Pout); + } + + // Initial: clear out everything + const Type zeroValue = pTraits<Type>::zero; + recvBufs.clear(); + + if (!UPstream::parRun() || numProc < 2) + { + // Do myself + const auto iter = sendBufs.find(myProci); + if (iter.found() && (iter.val() != zeroValue)) + { + // Do myself: insert_or_assign + recvBufs(iter.key()) = iter.val(); + } + return; + } + + + // Algorithm NBX: Nonblocking consensus + // Implementation like above, but sending map data. + + DynamicList<MPI_Request> requests(sendBufs.size()); + + profilingPstream::beginTiming(); + + // If there are synchronisation problems, + // a beginning barrier can help, but should not be necessary + // when unique message tags are being used. + + //// MPI_Barrier(PstreamGlobals::MPICommunicators_[comm]); + + // Start nonblocking synchronous send to process dest + + // Same as forAllConstIters() + const auto endIter = sendBufs.cend(); + for (auto iter = sendBufs.cbegin(); iter != endIter; ++iter) + { + const label proci = iter.key(); + const auto& sendData = iter.val(); + + if (sendData == zeroValue) + { + // Do not send/recv empty/zero data + } + else if (proci == myProci) + { + // Do myself: insert_or_assign + recvBufs(proci) = sendData; + } + else + { + // Has data to send + + MPI_Issend + ( + &sendData, + 1, // one element per rank + datatype, + proci, + tag, + PstreamGlobals::MPICommunicators_[comm], + &requests.emplace_back() + ); + } + } + + + // Probe and receive + + MPI_Request barrierReq; + + for (bool barrier_active = false, done = false; !done; /*nil*/) + { + int flag = 0; + MPI_Status status; + + MPI_Iprobe + ( + MPI_ANY_SOURCE, + tag, + PstreamGlobals::MPICommunicators_[comm], + &flag, + &status + ); + + if (flag) + { + // Message found, receive into dest buffer location + + const label proci = status.MPI_SOURCE; + int count = 0; + + MPI_Get_count(&status, datatype, &count); + + if (count != 1) + { + FatalErrorInFunction + << "Incorrect message size. Expected 1 but had " + << count << nl + << exit(FatalError); + } + + auto& recvData = recvBufs(proci); + + MPI_Recv + ( + &recvData, + count, // count=1 (see above) + datatype, + proci, + tag, + PstreamGlobals::MPICommunicators_[comm], + MPI_STATUS_IGNORE + ); + } + + if (barrier_active) + { + // Test barrier for completion + // - all received, or nothing to receive + MPI_Test(&barrierReq, &flag, MPI_STATUS_IGNORE); + + if (flag) + { + done = true; + } + } + else + { + // Check if all sends have arrived + MPI_Testall + ( + requests.size(), requests.data(), + &flag, MPI_STATUSES_IGNORE + ); + + if (flag) + { + MPI_Ibarrier + ( + PstreamGlobals::MPICommunicators_[comm], + &barrierReq + ); + barrier_active = true; + } + } + } + + profilingPstream::addAllToAllTime(); +} + + template<class Type> void Foam::PstreamDetail::gather ( @@ -522,6 +896,10 @@ void Foam::PstreamDetail::gather if (!UPstream::parRun()) { std::memmove(recvData, sendData, recvCount*sizeof(Type)); + + // No requests generated + if (req) req->reset(); + if (requestID) *requestID = -1; return; } @@ -552,6 +930,7 @@ void Foam::PstreamDetail::gather bool handled(false); #if defined(MPI_VERSION) && (MPI_VERSION >= 3) + // MPI-3 : eg, openmpi-1.7 (2013) and later if (immediate) { handled = true; @@ -643,6 +1022,10 @@ void Foam::PstreamDetail::scatter if (!UPstream::parRun()) { std::memmove(recvData, sendData, recvCount*sizeof(Type)); + + // No requests generated + if (req) req->reset(); + if (requestID) *requestID = -1; return; } @@ -673,6 +1056,7 @@ void Foam::PstreamDetail::scatter bool handled(false); #if defined(MPI_VERSION) && (MPI_VERSION >= 3) + // MPI-3 : eg, openmpi-1.7 (2013) and later if (immediate) { handled = true; @@ -766,6 +1150,10 @@ void Foam::PstreamDetail::gatherv { // recvCounts[0] may be invalid - use sendCount instead std::memmove(recvData, sendData, sendCount*sizeof(Type)); + + // No requests generated + if (req) req->reset(); + if (requestID) *requestID = -1; return; } @@ -819,6 +1207,7 @@ void Foam::PstreamDetail::gatherv bool handled(false); #if defined(MPI_VERSION) && (MPI_VERSION >= 3) + // MPI-3 : eg, openmpi-1.7 (2013) and later if (immediate) { handled = true; @@ -913,6 +1302,10 @@ void Foam::PstreamDetail::scatterv if (!UPstream::parRun()) { std::memmove(recvData, sendData, recvCount*sizeof(Type)); + + // No requests generated + if (req) req->reset(); + if (requestID) *requestID = -1; return; } @@ -960,6 +1353,7 @@ void Foam::PstreamDetail::scatterv bool handled(false); #if defined(MPI_VERSION) && (MPI_VERSION >= 3) + // MPI-3 : eg, openmpi-1.7 (2013) and later if (immediate) { handled = true; diff --git a/src/dynamicMesh/fvMeshDistribute/fvMeshDistribute.C b/src/dynamicMesh/fvMeshDistribute/fvMeshDistribute.C index 5751e519c3a01213a9ddb1caf8619107e634d00c..41462314aad2e1b9e314481b28852eb6798ea2b1 100644 --- a/src/dynamicMesh/fvMeshDistribute/fvMeshDistribute.C +++ b/src/dynamicMesh/fvMeshDistribute/fvMeshDistribute.C @@ -2159,8 +2159,8 @@ Foam::autoPtr<Foam::mapDistributePolyMesh> Foam::fvMeshDistribute::distribute // ~~~~~~~~~~~~~~~~~ labelList nSendCells(countCells(distribution)); - labelList nRevcCells(Pstream::nProcs()); - Pstream::allToAll(nSendCells, nRevcCells); + labelList nRecvCells(Pstream::nProcs()); + UPstream::allToAll(nSendCells, nRecvCells); // Allocate buffers PstreamBuffers pBufs(Pstream::commsTypes::nonBlocking); @@ -2382,22 +2382,17 @@ Foam::autoPtr<Foam::mapDistributePolyMesh> Foam::fvMeshDistribute::distribute UPstream::parRun(oldParRun); // Restore parallel state - - // Start sending&receiving from buffers + if (debug) { - if (debug) - { - Pout<< "Starting sending" << endl; - } + Pout<< "Starting sending" << endl; + } - labelList recvSizes; - pBufs.finishedSends(recvSizes); + pBufs.finishedSends(); - if (debug) - { - Pout<< "Finished sending and receiving : " << flatOutput(recvSizes) - << endl; - } + if (debug) + { + Pout<< "Finished sending and receiving : " + << flatOutput(pBufs.recvDataCounts()) << endl; } @@ -2547,17 +2542,17 @@ Foam::autoPtr<Foam::mapDistributePolyMesh> Foam::fvMeshDistribute::distribute ); PtrList<PtrList<volTensorField::Internal>> dtfs(Pstream::nProcs()); - forAll(nRevcCells, sendProc) + forAll(nRecvCells, sendProc) { // Did processor sendProc send anything to me? - if (sendProc != Pstream::myProcNo() && nRevcCells[sendProc] > 0) + if (sendProc != Pstream::myProcNo() && nRecvCells[sendProc] > 0) { if (debug) { Pout<< nl << "RECEIVING FROM DOMAIN " << sendProc << " cells to receive:" - << nRevcCells[sendProc] + << nRecvCells[sendProc] << nl << endl; } diff --git a/src/lagrangian/basic/Cloud/Cloud.C b/src/lagrangian/basic/Cloud/Cloud.C index 28a8a56338b270079eb6f8a6cfdd62e4ec8e9f80..3cbc131223a9df1699068e205f8ad75f713c1456 100644 --- a/src/lagrangian/basic/Cloud/Cloud.C +++ b/src/lagrangian/basic/Cloud/Cloud.C @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011-2017, 2020 OpenFOAM Foundation - Copyright (C) 2020-2022 OpenCFD Ltd. + Copyright (C) 2020-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -161,8 +161,7 @@ void Foam::Cloud<ParticleType>::move const labelList& procPatchNeighbours = pData.processorPatchNeighbours(); // Which processors this processor is connected to - const labelList& neighbourProcs = - pData.topology().procNeighbours()[Pstream::myProcNo()]; + const labelList& neighbourProcs = pData.topology().procNeighbours(); // Initialise the stepFraction moved for the particles for (ParticleType& p : *this)