diff --git a/applications/test/globalIndex3/Make/files b/applications/test/globalIndex3/Make/files
new file mode 100644
index 0000000000000000000000000000000000000000..257dcf67be1ae6fad655464713115e2346f73887
--- /dev/null
+++ b/applications/test/globalIndex3/Make/files
@@ -0,0 +1,3 @@
+Test-globalIndex3.cxx
+
+EXE = $(FOAM_USER_APPBIN)/Test-globalIndex3
diff --git a/applications/test/globalIndex3/Make/options b/applications/test/globalIndex3/Make/options
new file mode 100644
index 0000000000000000000000000000000000000000..4ef31e8ea7512fe6edbd055a7563ae78dec5bc90
--- /dev/null
+++ b/applications/test/globalIndex3/Make/options
@@ -0,0 +1,4 @@
+include $(GENERAL_RULES)/mpi-rules
+
+EXE_INC = $(PFLAGS) $(PINC)
+EXE_LIBS = $(PLIBS)
diff --git a/applications/test/globalIndex3/Test-globalIndex3.cxx b/applications/test/globalIndex3/Test-globalIndex3.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..19a72499d3290652b284becd53b5dc6597523dfc
--- /dev/null
+++ b/applications/test/globalIndex3/Test-globalIndex3.cxx
@@ -0,0 +1,578 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | www.openfoam.com
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+    Copyright (C) 2025 OpenCFD Ltd.
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
+Application
+    Test-globalIndex3
+
+Description
+    Tests for globalIndex with node-wise splitting
+
+\*---------------------------------------------------------------------------*/
+
+#include "globalIndex.H"
+#include "globalMeshData.H"
+#include "argList.H"
+#include "Time.H"
+#include "polyMesh.H"
+#include "IndirectList.H"
+#include "IOstreams.H"
+#include "Random.H"
+#include "openfoam_mpi.H"
+
+// pre-scan for "-split-size NUM"
+int option_splitsize(int argc, char *argv[])
+{
+    int ivalue = -1;
+    for (int argi = 1; argi < argc-1; ++argi)
+    {
+        if (strcmp(argv[argi], "-split-size") == 0)
+        {
+            ++argi;
+            ivalue = atoi(argv[argi]);
+        }
+    }
+
+    return ivalue;
+}
+
+using namespace Foam;
+
+template<class T>
+void printList(Ostream& os, const UList<T>& list)
+{
+    os << list.size() << " " << flatOutput(list) << nl;
+}
+
+void printGlobalIndex(Ostream& os, const globalIndex& gi)
+{
+    printList(os, gi.offsets());
+}
+
+
+template<class ProcIDsContainer, class Type>
+void globalIndexGather
+(
+    const labelUList& off,  // needed on master only
+    const label comm,
+    const ProcIDsContainer& procIDs,
+    const UList<Type>& fld,
+    UList<Type>& allFld,    // must be adequately sized on master
+    const int tag,
+    UPstream::commsTypes commsType,
+    bool useWindow = false
+)
+{
+    // low-level: no parRun guard
+    const int masterProci = procIDs.size() ? procIDs[0] : 0;
+
+    // Protection for disjoint calls
+    if (FOAM_UNLIKELY(!UPstream::is_rank(comm)))
+    {
+        FatalErrorInFunction
+            << "Calling with process not on the communicator"
+            << Foam::abort(FatalError);
+    }
+
+    // Require contiguous data for non-blocking
+    if constexpr (!is_contiguous_v<Type>)
+    {
+        if (commsType == UPstream::commsTypes::nonBlocking)
+        {
+            commsType = UPstream::commsTypes::scheduled;
+        }
+    }
+
+    const label startOfRequests = UPstream::nRequests();
+
+
+    // Very hard-coded at the moment
+    int returnCode = MPI_SUCCESS;
+    const int nCmpts = pTraits<Type>::nComponents;
+
+    MPI_Win win;
+    MPI_Datatype dataType = MPI_DOUBLE;
+    if (useWindow)
+    {
+        using cmptType = typename pTraits<Type>::cmptType;
+
+        if (std::is_same<float, cmptType>::value)
+        {
+            dataType = MPI_FLOAT;
+        }
+        else if (std::is_same<double, cmptType>::value)
+        {
+            dataType = MPI_DOUBLE;
+        }
+        else
+        {
+            // Not supported
+            useWindow = false;
+        }
+    }
+
+    if (useWindow)
+    {
+        MPI_Comm mpiComm =
+            PstreamUtils::Cast::to_mpi(UPstream::Communicator::lookup(comm));
+
+        char commName[MPI_MAX_OBJECT_NAME];
+        int nameLen = 0;
+
+        if
+        (
+            MPI_COMM_NULL != mpiComm
+         && MPI_SUCCESS == MPI_Comm_get_name(mpiComm, commName, &nameLen)
+         && (nameLen > 0)
+        )
+        {
+            Pout<< "window on " << commName << nl;
+        }
+
+        if (UPstream::myProcNo(comm) == masterProci || fld.empty())
+        {
+            // Collective
+            returnCode = MPI_Win_create
+            (
+                nullptr,
+                0,
+                1,  // disp_units
+                MPI_INFO_NULL,
+                mpiComm,
+               &win
+            );
+        }
+        else
+        {
+            // Collective
+            returnCode = MPI_Win_create
+            (
+                const_cast<char *>(fld.cdata_bytes()),
+                fld.size_bytes(),
+                sizeof(Type),  // disp_units
+                MPI_INFO_NULL,
+                mpiComm,
+               &win
+            );
+        }
+
+        if (MPI_SUCCESS != returnCode || MPI_WIN_NULL == win)
+        {
+            FatalErrorInFunction
+                << "MPI_Win_create() failed"
+                << Foam::abort(FatalError);
+            // return nullptr;
+        }
+    }
+
+
+    if (UPstream::myProcNo(comm) == masterProci)
+    {
+        const label total = off.back();  // == totalSize()
+
+        if (allFld.size() < total)
+        {
+            FatalErrorInFunction
+                << "[out] UList size=" << allFld.size()
+                << " too small to receive " << total << nl
+                << Foam::abort(FatalError);
+        }
+
+
+        // Assign my local data - respect offset information
+        // so that we can request 0 entries to be copied.
+        // Also handle the case where we have a slice of the full
+        // list.
+        {
+            SubList<Type> dst(allFld, off[1]-off[0], off[0]);
+            SubList<Type> src(fld, off[1]-off[0]);
+
+            if (!dst.empty() && (dst.data() != src.data()))
+            {
+                dst = src;
+            }
+        }
+
+        if (useWindow)
+        {
+            MPI_Win_lock_all(MPI_MODE_NOCHECK, win);
+        }
+
+        for (label i = 1; i < procIDs.size(); ++i)
+        {
+            SubList<Type> slot(allFld, off[i+1]-off[i], off[i]);
+
+            if (slot.empty())
+            {
+                // Nothing to do
+            }
+            else if (useWindow)
+            {
+                returnCode = MPI_Get
+                (
+                    // origin
+                    slot.data(),
+                    slot.size()*(nCmpts),
+                    dataType,
+
+                    // target
+                    procIDs[i],
+                    0,  // displacement
+                    slot.size()*(nCmpts),
+                    dataType,
+                    win
+                );
+
+                if (MPI_SUCCESS != returnCode)
+                {
+                    FatalErrorInFunction
+                        << "MPI_Get failed"
+                        << Foam::abort(FatalError);
+                    // return nullptr;
+                }
+            }
+            else if constexpr (is_contiguous_v<Type>)
+            {
+                UIPstream::read
+                (
+                    commsType,
+                    procIDs[i],
+                    slot,
+                    tag,
+                    comm
+                );
+            }
+            else
+            {
+                IPstream::recv(slot, procIDs[i], tag, comm);
+            }
+        }
+
+        if (useWindow)
+        {
+            MPI_Win_unlock_all(win);
+        }
+    }
+    else if (!useWindow)
+    {
+        if (fld.empty())
+        {
+            // Nothing to do
+        }
+        else if constexpr (is_contiguous_v<Type>)
+        {
+            UOPstream::write
+            (
+                commsType,
+                masterProci,
+                fld,
+                tag,
+                comm
+            );
+        }
+        else
+        {
+            OPstream::send(fld, commsType, masterProci, tag, comm);
+        }
+    }
+
+    if (useWindow)
+    {
+        // Collective
+        MPI_Win_free(&win);
+    }
+
+    if (commsType == UPstream::commsTypes::nonBlocking)
+    {
+        // Wait for outstanding requests
+        UPstream::waitRequests(startOfRequests);
+    }
+}
+
+
+// Report inter-node/intra-node offsets
+static void reportOffsets(const globalIndex& gi)
+{
+    labelList interNodeOffsets;
+    labelList localNodeOffsets;
+    labelRange nodeRange;
+
+    const label numProc = UPstream::nProcs(UPstream::commConstWorld());
+
+    gi.splitNodeOffsets
+    (
+        interNodeOffsets,
+        localNodeOffsets,
+        UPstream::worldComm
+    );
+
+    const auto interNodeComm = UPstream::commInterNode();
+
+    // Only communicate to the node leaders
+    labelList allOffsets;
+    if (UPstream::is_rank(interNodeComm))
+    {
+        // Send top-level offsets to the node leaders
+        if (UPstream::master(interNodeComm))
+        {
+            allOffsets = gi.offsets();
+        }
+        else  // ie, UPstream::is_subrank(interNodeComm)
+        {
+            allOffsets.resize_nocopy(numProc+1);
+        }
+
+        UPstream::broadcast
+        (
+            allOffsets.data_bytes(),
+            allOffsets.size_bytes(),
+            interNodeComm
+        );
+    }
+
+    // Ranges (node leaders only)
+    if (UPstream::is_rank(interNodeComm))
+    {
+        const auto& procIds = UPstream::procID(interNodeComm);
+        const int ranki = UPstream::myProcNo(interNodeComm);
+
+        // For reporting
+        nodeRange.reset
+        (
+            procIds[ranki],
+            (
+                (ranki+1 < procIds.size() ? procIds[ranki+1] : numProc)
+              - procIds[ranki]
+            )
+        );
+    }
+
+    Pout<< "node-range: " << nodeRange << nl;
+    Pout<< "all-offset: "; printList(Pout, allOffsets);
+    Pout<< "inter-offset: "; printList(Pout, interNodeOffsets);
+    Pout<< "intra-offset: "; printList(Pout, localNodeOffsets);
+}
+
+
+template<class Type>
+void globalIndexGather
+(
+    const globalIndex& gi,
+    const UList<Type>& sendData,
+    List<Type>& allData,
+    const int tag,
+    const UPstream::commsTypes commsType,
+    const label comm = UPstream::worldComm,
+    bool useWindow = false
+)
+{
+    if (!UPstream::parRun())
+    {
+        // Serial: direct copy
+        allData = sendData;
+        return;
+    }
+
+    if (UPstream::master(comm))
+    {
+        allData.resize_nocopy(gi.offsets().back());  // == totalSize()
+    }
+    else
+    {
+        allData.clear();  // zero-size on non-master
+    }
+
+
+    const auto& offsets = gi.offsets();  // needed on master only
+
+    Info<< "Using node-comms: " << UPstream::usingNodeComms(comm) << nl;
+
+    const auto interNodeComm = UPstream::commInterNode();
+    const auto localNodeComm = UPstream::commLocalNode();
+
+    if (UPstream::usingNodeComms(comm))
+    {
+        // Stage 0 : The inter-node/intra-node offsets
+        labelList interNodeOffsets;
+        labelList localNodeOffsets;
+
+        gi.splitNodeOffsets(interNodeOffsets, localNodeOffsets, comm);
+
+        // The first node re-uses the output (allData) when collecting
+        // content. All other nodes require temporary node-local storage.
+
+        List<Type> tmpNodeData;
+        if (UPstream::is_subrank(interNodeComm))
+        {
+            tmpNodeData.resize(localNodeOffsets.back());
+        }
+
+        List<Type>& nodeData =
+        (
+            UPstream::master(interNodeComm) ? allData : tmpNodeData
+        );
+
+
+        // Stage 1 : Gather data within the node
+        {
+            globalIndexGather
+            (
+                localNodeOffsets,  // (master only)
+                localNodeComm,
+                UPstream::allProcs(localNodeComm),
+                sendData,
+                nodeData,
+                tag,
+                commsType,
+                useWindow
+            );
+        }
+
+        // Stage 2 : Gather data between nodes
+        if (UPstream::is_rank(interNodeComm))
+        {
+            globalIndexGather
+            (
+                interNodeOffsets,  // (master only)
+                interNodeComm,
+                UPstream::allProcs(interNodeComm),
+                nodeData,
+                allData,
+                tag,
+                commsType,
+                useWindow
+            );
+        }
+    }
+    else
+    {
+        globalIndexGather
+        (
+            offsets,  // needed on master only
+            comm,
+            UPstream::allProcs(comm),   // All communicator ranks
+            sendData,
+            allData,
+            tag,
+            commsType,
+            useWindow
+        );
+    }
+}
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+//  Main program:
+
+int main(int argc, char *argv[])
+{
+    argList::noCheckProcessorDirectories();
+    argList::addVerboseOption("Set UPstream::debug level");
+    argList::addOption("split-size", "NUM", "split with ncores/node");
+    argList::addBoolOption("builtin", "only use builtin globalIndex::gather");
+    argList::addBoolOption("window", "get data via window");
+
+    // Check -verbose before initialisation
+    UPstream::debug = argList::verbose(argc, argv);
+
+    // Check -split-size before initialisation
+    {
+        int splitSize = option_splitsize(argc, argv);
+
+        if (splitSize >= 0)
+        {
+            UPstream::nodeCommsControl_ = splitSize;
+        }
+    }
+
+    #include "setRootCase.H"
+
+    const bool useLocalComms = UPstream::usingNodeComms();
+    bool useWindow = args.found("window");
+    bool useBuiltin = args.found("builtin");
+
+    Info<< nl
+        << "Getting local-comms: " << Switch::name(useLocalComms) << nl
+        << "Getting data with window: " << Switch::name(useWindow) << nl
+        << nl;
+
+    if (useWindow && useBuiltin)
+    {
+        Info<< "Selected '-window' and '-builtin' : ignoring -builtin'"
+            << nl;
+        useBuiltin = false;
+    }
+
+    Random rng(31 + 2*UPstream::myProcNo());
+
+    const label localSize = (5*rng.position<label>(1, 15));
+
+    globalIndex globIndex
+    (
+        globalIndex::gatherOnly{},
+        localSize,
+        UPstream::commWorld()
+    );
+
+    Info<< "global-index: ";
+    printGlobalIndex(Info, globIndex);
+    reportOffsets(globIndex);
+
+    Field<scalar> allData;
+    Field<scalar> localFld(localSize, scalar(UPstream::myProcNo()));
+
+    if (useBuiltin)
+    {
+        globIndex.gather
+        (
+            localFld,
+            allData,
+            UPstream::msgType(),
+            UPstream::commsTypes::nonBlocking,
+            UPstream::commWorld()
+        );
+    }
+    else
+    {
+        globalIndexGather
+        (
+            globIndex,
+            localFld,
+            allData,
+            UPstream::msgType(),
+            UPstream::commsTypes::nonBlocking,
+            UPstream::commWorld(),
+            useWindow
+        );
+    }
+
+    Pout<< "local: " << flatOutput(localFld) << nl;
+    Info<< "field: " << flatOutput(allData) << nl;
+
+    Info<< "\nEnd\n" << endl;
+    return 0;
+}
+
+
+// ************************************************************************* //
diff --git a/applications/test/nodeTopology/Make/files b/applications/test/nodeTopology/Make/files
new file mode 100644
index 0000000000000000000000000000000000000000..aa402b759deb27407abb981bb9af93ffa9a30a74
--- /dev/null
+++ b/applications/test/nodeTopology/Make/files
@@ -0,0 +1,3 @@
+Test-nodeTopology.cxx
+
+EXE = $(FOAM_USER_APPBIN)/Test-nodeTopology
diff --git a/applications/test/nodeTopology/Make/options b/applications/test/nodeTopology/Make/options
new file mode 100644
index 0000000000000000000000000000000000000000..18e6fe47afacb902cddccf82632772447704fd88
--- /dev/null
+++ b/applications/test/nodeTopology/Make/options
@@ -0,0 +1,2 @@
+/* EXE_INC = */
+/* EXE_LIBS = */
diff --git a/applications/test/nodeTopology/Test-nodeTopology.cxx b/applications/test/nodeTopology/Test-nodeTopology.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..db4a5eeaf4382fb6771438a9bb8b384a18fdebb7
--- /dev/null
+++ b/applications/test/nodeTopology/Test-nodeTopology.cxx
@@ -0,0 +1,198 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | www.openfoam.com
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+    Copyright (C) 2025 OpenCFD Ltd.
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software: you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.
+
+Application
+    Test-nodeTopology
+
+Description
+    Simple reporting of node topology
+
+\*---------------------------------------------------------------------------*/
+
+#include "argList.H"
+#include "IOstreams.H"
+
+using namespace Foam;
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+int main(int argc, char *argv[])
+{
+    argList::noBanner();
+    argList::noCheckProcessorDirectories();
+    argList::addOption
+    (
+        "numProcs",
+        "int",
+        "Num of ranks to simulate (default: 16)"
+    );
+    argList::addOption
+    (
+        "cores",
+        "int",
+        "Num of cores to simulate (default: 4)"
+    );
+
+    #include "setRootCase.H"
+
+    label nProcs = UPstream::nProcs(UPstream::worldComm);
+
+    List<int> interNodeProcs_fake;
+
+    if (UPstream::parRun())
+    {
+        if (args.found("numProcs"))
+        {
+            InfoErr<< "ignoring -np option in parallel" << nl;
+        }
+        if (args.found("cores"))
+        {
+            InfoErr<< "ignoring -cores option in parallel" << nl;
+        }
+    }
+    else
+    {
+        // serial
+        nProcs = args.getOrDefault<label>("numProcs", 16);
+        label nCores = args.getOrDefault<label>("cores", 4);
+
+        if (nCores > 1 && nCores < nProcs)
+        {
+            const label numNodes
+                = (nProcs/nCores) + ((nProcs % nCores) ? 1 : 0);
+
+            interNodeProcs_fake.resize(numNodes);
+
+            for (label nodei = 0; nodei < numNodes; ++nodei)
+            {
+                interNodeProcs_fake[nodei] = nodei * nCores;
+            }
+        }
+    }
+
+    const List<int>& interNodeProcs =
+    (
+        UPstream::parRun()
+      ? UPstream::procID(UPstream::commInterNode())
+      : interNodeProcs_fake
+    );
+
+
+    // Generate the graph
+    if (UPstream::master(UPstream::worldComm))
+    {
+        auto& os = Info.stream();
+
+        os << "// node topology graph:" << nl;
+        os.beginBlock("graph");
+
+        // Prefer left-to-right layout for large graphs
+        os << indent << "rankdir=LR" << nl;
+
+        int pos = 0;
+
+        // First level are the inter-node connections
+        const label parent = 0;
+        for (const auto proci : interNodeProcs)
+        {
+            if (parent == proci) continue;
+
+            if (pos)
+            {
+                os << "  ";
+            }
+            else
+            {
+                os << indent;
+            }
+            os << parent << " -- " << proci;
+
+            if (++pos >= 4)  // Max 4 items per line
+            {
+                pos = 0;
+                os << nl;
+            }
+        }
+
+        if (pos)
+        {
+            pos = 0;
+            os << nl;
+        }
+
+        // Next level are within the nodes
+        for (label nodei = 0; nodei < interNodeProcs.size(); ++nodei)
+        {
+            pos = 0;
+
+            label firstProc = interNodeProcs[nodei];
+            const label lastProc =
+            (
+                (nodei+1 < interNodeProcs.size())
+              ? interNodeProcs[nodei+1]
+              : nProcs
+            );
+
+            os << indent << "// inter-node " << nodei
+                << " [" << firstProc
+                << ".." << lastProc-1 << "]" << nl;
+
+            for (label proci = firstProc; proci < lastProc; ++proci)
+            {
+                if (firstProc == proci) continue;
+
+                if (pos)
+                {
+                    os << "  ";
+                }
+                else
+                {
+                    os << indent;
+                }
+                os << firstProc << " -- " << proci;
+
+                if (++pos >= 4)  // Max 4 items per line
+                {
+                    pos = 0;
+                    os << nl;
+                }
+            }
+            if (pos)
+            {
+                pos = 0;
+                os << nl;
+            }
+        }
+
+        os.endBlock();
+        os << "// end graph" << nl;
+    }
+
+    InfoErr << "\nDone" << nl;
+    return 0;
+}
+
+
+// ************************************************************************* //
diff --git a/applications/test/parallel-comm1/Test-parallel-comm1.C b/applications/test/parallel-comm1/Test-parallel-comm1.C
index 33a6bd5bdcb54a81a0abd1eb98a01eb22d412026..b9b10f442be5a87042c0935fdc6c3d05b18d8bcc 100644
--- a/applications/test/parallel-comm1/Test-parallel-comm1.C
+++ b/applications/test/parallel-comm1/Test-parallel-comm1.C
@@ -158,7 +158,7 @@ int main(int argc, char *argv[])
 
     for (label count = 0; count < repeat; ++count)
     {
-        label comm = UPstream::allocateCommunicator(UPstream::worldComm, top);
+        label comm = UPstream::newCommunicator(UPstream::worldComm, top);
 
         scalar localValue = 111*UPstream::myProcNo(UPstream::worldComm);
 
diff --git a/applications/test/parallel-comm2/Test-parallel-comm2.C b/applications/test/parallel-comm2/Test-parallel-comm2.C
index 38fc45361f90d185bb732a2b0bc0c3c64a4289ca..b5f01bc5040092da6300440cf7d7f8498d2bff3f 100644
--- a/applications/test/parallel-comm2/Test-parallel-comm2.C
+++ b/applications/test/parallel-comm2/Test-parallel-comm2.C
@@ -68,14 +68,14 @@ int main(int argc, char *argv[])
     argList::noCheckProcessorDirectories();
     argList::addBoolOption("info", "information");
     argList::addBoolOption("print-tree", "Report tree(s) as graph");
-    argList::addBoolOption("comm-split", "Test simple comm split");
-    argList::addBoolOption("mpi-host-comm", "Test DIY host-comm split");
+    argList::addBoolOption("no-test", "Disable general tests");
     argList::addBoolOption("host-comm", "Test Pstream host-comm");
     argList::addBoolOption("host-broadcast", "Test host-base broadcasts");
 
     #include "setRootCase.H"
 
     const bool optPrintTree = args.found("print-tree");
+    bool generalTest = !args.found("no-test");
 
     Info<< nl
         << "parallel:" << UPstream::parRun()
@@ -89,6 +89,18 @@ int main(int argc, char *argv[])
         UPstream::printCommTree(UPstream::commWorld());
     }
 
+    if (UPstream::parRun())
+    {
+        Pout<< "world ranks: 0.."
+            << UPstream::nProcs(UPstream::commWorld())-1 << nl;
+
+        Pout<< "inter-node ranks: " << UPstream::numNodes() << ' '
+            << flatOutput(UPstream::procID(UPstream::commInterNode())) << nl;
+
+        Pout<< "local-node ranks: "
+            << flatOutput(UPstream::procID(UPstream::commLocalNode())) << nl;
+    }
+
     if (args.found("info"))
     {
         Info<< nl;
@@ -104,334 +116,29 @@ int main(int argc, char *argv[])
         Pout<< endl;
     }
 
-    bool generalTest = true;
-
-    if (UPstream::parRun() && args.found("comm-split"))
-    {
-        generalTest = false;
-
-        int world_nprocs = 0;
-        int world_rank = -1;
-        MPI_Comm_size(MPI_COMM_WORLD, &world_nprocs);
-        MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
-
-        int host_nprocs = 0;
-        int host_rank = -1;
-        MPI_Comm commIntraHost;
-        MPI_Comm_split_type
-        (
-            MPI_COMM_WORLD,
-            MPI_COMM_TYPE_SHARED,  // OMPI_COMM_TYPE_NODE
-            0, MPI_INFO_NULL, &commIntraHost
-        );
-
-        MPI_Comm_size(commIntraHost, &host_nprocs);
-        MPI_Comm_rank(commIntraHost, &host_rank);
-
-        int leader_nprocs = 0;
-        int leader_rank = -1;
-        MPI_Comm commInterHost;
-
-        if (false)
-        {
-            // Easy enough to use MPI_Comm_split, but slightly annoying
-            // that it returns MPI_COMM_NULL for unused ranks...
-            MPI_Comm commInterHost;
-            MPI_Comm_split
-            (
-                MPI_COMM_WORLD,
-                (host_rank == 0) ? 0 : MPI_UNDEFINED,
-                0, &commInterHost
-            );
-
-            if (commInterHost != MPI_COMM_NULL)
-            {
-                MPI_Comm_size(commInterHost, &leader_nprocs);
-                MPI_Comm_rank(commInterHost, &leader_rank);
-            }
-        }
-        else
-        {
-            boolList isHostLeader(world_nprocs, false);
-            isHostLeader[world_rank] = (host_rank == 0);
-
-            MPI_Allgather
-            (
-                // recv is also send
-                MPI_IN_PLACE, 1, MPI_C_BOOL,
-                isHostLeader.data(), 1, MPI_C_BOOL,
-                MPI_COMM_WORLD
-            );
-
-            Pout<< "leaders: " << isHostLeader << endl;
-
-            DynamicList<int> subRanks(isHostLeader.size());
-            forAll(isHostLeader, proci)
-            {
-                if (isHostLeader[proci])
-                {
-                    subRanks.push_back(proci);
-                }
-            }
-            // Starting from parent
-            MPI_Group parent_group;
-            MPI_Comm_group(MPI_COMM_WORLD, &parent_group);
-
-            MPI_Group active_group;
-            MPI_Group_incl
-            (
-                parent_group,
-                subRanks.size(),
-                subRanks.cdata(),
-                &active_group
-            );
-
-            // Create new communicator for this group
-            MPI_Comm_create_group
-            (
-                MPI_COMM_WORLD,
-                active_group,
-                UPstream::msgType(),
-                &commInterHost
-            );
-
-            // Groups not needed after this...
-            MPI_Group_free(&parent_group);
-            MPI_Group_free(&active_group);
-
-            MPI_Comm_size(commInterHost, &leader_nprocs);
-            MPI_Comm_rank(commInterHost, &leader_rank);
-        }
-
-        Pout<< nl << "[MPI_Comm_split_type]" << nl
-            << "Host rank " << host_rank << " / " << host_nprocs
-            << " on " << hostName()
-            << " inter-rank: " << leader_rank << " / " << leader_nprocs
-            << " host leader:" << (leader_rank == 0)
-            << " sub-rank:" << (leader_rank > 0)
-            << nl;
-
-        if (commInterHost != MPI_COMM_NULL)
-        {
-            MPI_Comm_free(&commInterHost);
-        }
-        if (commIntraHost != MPI_COMM_NULL)
-        {
-            MPI_Comm_free(&commIntraHost);
-        }
-    }
-
-    if (UPstream::parRun() && args.found("mpi-host-comm"))
-    {
-        generalTest = false;
-
-        // Host communicator, based on the current world communicator
-        // Use hostname
-        // Lowest rank per hostname is the IO rank
-
-        label numprocs = UPstream::nProcs(UPstream::commGlobal());
-
-        // Option 1: using hostnames
-        // - pro: trivial coding
-        // - con: unequal lengths, more allocations and 'hops'
-        stringList hosts(numprocs);
-        hosts[Pstream::myProcNo(UPstream::commGlobal())] = hostName();
-        Pstream::gatherList(hosts, UPstream::msgType(), UPstream::commGlobal());
-
-
-        // Option 2: using SHA1 of hostnames
-        // - con: uglier coding (but only needed locally!)
-        // - pro: fixed digest length enables direct MPI calls
-        //        can avoid Pstream::gatherList() during setup...
-
-        List<SHA1Digest> digests;
-        if (UPstream::master(UPstream::commGlobal()))
-        {
-            digests.resize(numprocs);
-        }
-
-        {
-            const SHA1Digest myDigest(SHA1(hostName()).digest());
-
-            UPstream::mpiGather
-            (
-                myDigest.cdata_bytes(),     // Send
-                digests.data_bytes(),       // Recv
-                SHA1Digest::max_size(),     // Num send/recv per rank
-                UPstream::commGlobal()
-            );
-        }
-
-
-        labelList hostIDs(numprocs);
-        DynamicList<label> subRanks(numprocs);
-
-        Info<< "digests: " << digests << nl;
-
-        // Compact numbering
-        if (UPstream::master(UPstream::commGlobal()))
-        {
-            DynamicList<word> hostNames(numprocs);
-
-            forAll(hosts, proci)
-            {
-                const word& host = hosts[proci];
-
-                hostIDs[proci] = hostNames.find(host);
-
-                if (hostIDs[proci] < 0)
-                {
-                    // First appearance of host (encode as leader)
-                    hostIDs[proci] = -(hostNames.size() + 1);
-                    hostNames.push_back(host);
-                 }
-            }
-            hostIDs = -1;
-
-
-            DynamicList<SHA1Digest> uniqDigests(numprocs);
-
-            forAll(digests, proci)
-            {
-                const SHA1Digest& dig = digests[proci];
-
-                hostIDs[proci] = uniqDigests.find(dig);
-
-                if (hostIDs[proci] < 0)
-                {
-                    // First appearance of host (encode as leader)
-                    hostIDs[proci] = -(uniqDigests.size() + 1);
-                    uniqDigests.push_back(dig);
-                }
-            }
-        }
-
-
-        Info<< "hosts =  " << hosts << endl;
-        Info<< "hostIDs =  " << hostIDs << endl;
-
-        UPstream::broadcast
-        (
-            hostIDs.data_bytes(),
-            hostIDs.size_bytes(),
-            UPstream::commGlobal(),
-            UPstream::masterNo()
-        );
-
-        // Ranks for world to inter-host communicator
-        // - very straightforward
-
-        #if 0
-        subRanks.clear();
-        forAll(hostIDs, proci)
-        {
-            // Is host leader?
-            if (hostIDs[proci] < 0)
-            {
-                subRanks.push_back(proci);
-
-                // Flip back to generic host id
-                hostIDs[proci] = -(hostIDs[proci] + 1);
-            }
-        }
-
-        // From world to hostMaster
-        const label commInterHost =
-            UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks);
-        #endif
-
-        const label myWorldProci = UPstream::myProcNo(UPstream::commGlobal());
-
-        label myHostId = hostIDs[myWorldProci];
-        if (myHostId < 0) myHostId = -(myHostId + 1);  // Flip to generic id
-
-        // Ranks for within a host
-        subRanks.clear();
-        forAll(hostIDs, proci)
-        {
-            label id = hostIDs[proci];
-            if (id < 0) id = -(id + 1);  // Flip to generic id
-
-            if (id == myHostId)
-            {
-                subRanks.push_back(proci);
-            }
-        }
-
-        // The intra-host ranks
-        const label commIntraHost =
-            UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks);
-
-
-        // Test what if we have intra-host comm and we want host-master
-
-        List<bool> isHostMaster(numprocs, false);
-        if (UPstream::master(commIntraHost))
-        {
-            isHostMaster[myWorldProci] = true;
-        }
-
-        UPstream::mpiAllGather
-        (
-            isHostMaster.data_bytes(),
-            sizeof(bool),
-            UPstream::commGlobal()
-        );
-
-        // Ranks for world to hostMaster
-        // - very straightforward
-        subRanks.clear();
-        forAll(isHostMaster, proci)
-        {
-            if (isHostMaster[proci])
-            {
-                subRanks.push_back(proci);
-            }
-        }
-
-        // From world to hostMaster
-        const label commInterHost =
-            UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks);
-
-
-        Pout<< nl << "[manual split]" << nl
-            << nl << "Host rank " << UPstream::myProcNo(commIntraHost)
-            << " / " << UPstream::nProcs(commIntraHost)
-            << " on " << hostName()
-            << ", inter-rank: " << UPstream::myProcNo(commInterHost)
-            << " / " << UPstream::nProcs(commInterHost)
-            << " host leader:" << UPstream::master(commInterHost)
-            << " sub-rank:" << UPstream::is_subrank(commInterHost)
-            << nl;
-
-        UPstream::freeCommunicator(commInterHost);
-        UPstream::freeCommunicator(commIntraHost);
-    }
-
     if (UPstream::parRun() && args.found("host-comm"))
     {
         generalTest = false;
         Info<< nl << "[pstream host-comm]" << nl << endl;
 
-        const label commInterHost = UPstream::commInterHost();
-        const label commIntraHost = UPstream::commIntraHost();
+        const label commInterNode = UPstream::commInterNode();
+        const label commLocalNode = UPstream::commLocalNode();
 
-        Pout<< "Host rank " << UPstream::myProcNo(commIntraHost)
-            << " / " << UPstream::nProcs(commIntraHost)
+        Pout<< "Host rank " << UPstream::myProcNo(commLocalNode)
+            << " / " << UPstream::nProcs(commLocalNode)
             << " on " << hostName()
-            << ", inter-rank: " << UPstream::myProcNo(commInterHost)
-            << " / " << UPstream::nProcs(commInterHost)
-            << ", host leader:" << UPstream::master(commInterHost)
-            << " sub-rank:" << UPstream::is_subrank(commInterHost)
+            << ", inter-rank: " << UPstream::myProcNo(commInterNode)
+            << " / " << UPstream::nProcs(commInterNode)
+            << ", host leader:" << UPstream::master(commInterNode)
+            << " sub-rank:" << UPstream::is_subrank(commInterNode)
             << endl;
 
-
         {
             Info<< "host-master: "
-                << UPstream::whichCommunication(commInterHost) << endl;
+                << UPstream::whichCommunication(commInterNode) << endl;
 
-            UPstream::printCommTree(commInterHost);
-            UPstream::printCommTree(commIntraHost);
+            UPstream::printCommTree(commInterNode);
+            UPstream::printCommTree(commLocalNode);
         }
     }
 
@@ -440,32 +147,32 @@ int main(int argc, char *argv[])
         generalTest = false;
         Info<< nl << "[pstream host-broadcast]" << nl << endl;
 
-        const label commInterHost = UPstream::commInterHost();
-        const label commIntraHost = UPstream::commIntraHost();
+        const label commInterNode = UPstream::commInterNode();
+        const label commLocalNode = UPstream::commLocalNode();
 
         Pout<< "world rank: " << UPstream::myProcNo(UPstream::commWorld())
             << " host-leader rank: "
-            << UPstream::myProcNo(UPstream::commInterHost())
+            << UPstream::myProcNo(UPstream::commInterNode())
             << " intra-host rank: "
-            << UPstream::myProcNo(UPstream::commIntraHost())
+            << UPstream::myProcNo(UPstream::commLocalNode())
             << endl;
 
         label value1(0), value2(0), value3(0);
-        label hostIndex = UPstream::myProcNo(commInterHost);
+        label hostIndex = UPstream::myProcNo(commInterNode);
 
-        if (UPstream::master(commInterHost))
+        if (UPstream::master(commInterNode))
         {
             value1 = 100;
             value2 = 200;
         }
-        if (UPstream::master(commIntraHost))
+        if (UPstream::master(commLocalNode))
         {
             value3 = 300;
         }
 
-        Pstream::broadcast(value1, commInterHost);
-        Pstream::broadcast(value2, commIntraHost);
-        Pstream::broadcast(hostIndex, commIntraHost);
+        Pstream::broadcast(value1, commInterNode);
+        Pstream::broadcast(value2, commLocalNode);
+        Pstream::broadcast(hostIndex, commLocalNode);
 
         Pout<< "host: " << hostIndex
             << " broadcast 1: "
@@ -474,7 +181,7 @@ int main(int argc, char *argv[])
             << value3 << endl;
 
         // re-broadcast
-        Pstream::broadcast(value1, commIntraHost);
+        Pstream::broadcast(value1, commLocalNode);
         Pout<< "host: " << hostIndex
             << " broadcast 2: "
             << value1 << endl;
@@ -483,42 +190,42 @@ int main(int argc, char *argv[])
         label reduced1 = value1;
         label reduced2 = value1;
 
-        reduce
+        Foam::reduce
         (
             reduced1,
             sumOp<label>(),
             UPstream::msgType(),
-            commIntraHost
+            commLocalNode
         );
 
-        reduce
+        Foam::reduce
         (
             reduced2,
             sumOp<label>(),
             UPstream::msgType(),
-            commInterHost
+            commInterNode
         );
 
         Pout<< "value1: (host) " << reduced1
             << " (leader) " << reduced2 << endl;
 
-        // Pout<< "ranks: " << UPstream::nProcs(commInterHost) << endl;
+        // Pout<< "ranks: " << UPstream::nProcs(commInterNode) << endl;
 
         wordList strings;
-        if (UPstream::is_rank(commInterHost))
+        if (UPstream::is_rank(commInterNode))
         {
-            strings.resize(UPstream::nProcs(commInterHost));
-            strings[UPstream::myProcNo(commInterHost)] = name(pid());
+            strings.resize(UPstream::nProcs(commInterNode));
+            strings[UPstream::myProcNo(commInterNode)] = name(pid());
         }
 
         // Some basic gather/scatter
-        Pstream::allGatherList(strings, UPstream::msgType(), commInterHost);
+        Pstream::allGatherList(strings, UPstream::msgType(), commInterNode);
 
         Pout<< "pids " << flatOutput(strings) << endl;
 
         Foam::reverse(strings);
 
-        Pstream::broadcast(strings, commIntraHost);
+        Pstream::broadcast(strings, commLocalNode);
         Pout<< "PIDS " << flatOutput(strings) << endl;
     }
 
diff --git a/applications/test/treeComms/Test-treeComms.C b/applications/test/treeComms/Test-treeComms.C
index 832f1459f7ab8557a3791c33f1a31aa5310fa9e5..7408358464c16d7137bf10dea8b42006542f1f1a 100644
--- a/applications/test/treeComms/Test-treeComms.C
+++ b/applications/test/treeComms/Test-treeComms.C
@@ -51,7 +51,7 @@ void printConnection(Ostream& os, const label proci, const labelUList& below)
 // The number of receives - as per gatherList (v2112)
 void printRecvCount_gatherList
 (
-    const UList<UPstream::commsStruct>& comms,
+    const UPstream::commsStructList& comms,
     const label comm = UPstream::worldComm
 )
 {
@@ -91,7 +91,7 @@ void printRecvCount_gatherList
 // The number of sends - as per scatterList (v2112)
 void printSendCount_scatterList
 (
-    const UList<UPstream::commsStruct>& comms,
+    const UPstream::commsStructList& comms,
     const label comm = UPstream::worldComm
 )
 {
@@ -131,7 +131,7 @@ void printSendCount_scatterList
 // Transmission widths (contiguous data)
 void printWidths
 (
-    const UList<UPstream::commsStruct>& comms,
+    const UPstream::commsStructList& comms,
     const label comm = UPstream::worldComm
 )
 {
diff --git a/etc/controlDict b/etc/controlDict
index 5ca48e8d73e0494c07dcc487faf69832951886ad..700a988be1b17132c4d64bacce40de45104bb2be 100644
--- a/etc/controlDict
+++ b/etc/controlDict
@@ -135,6 +135,18 @@ OptimisationSwitches
     // Default communication type (nonBlocking | scheduled | buffered)
     commsType       nonBlocking;
 
+    // Use host/node topology-aware routines
+    //  0: disabled
+    //  1: split by hostname [default]
+    //  2: split by shared
+    //  >=4: (debug/manual) split with given number per node
+    nodeComms       1;
+
+    // Minimum number of nodes before topology-aware routines are enabled
+    //  <= 2 : always
+    //  >= 3 : when there are more than N nodes
+    nodeComms.min   0;
+
     // Transfer double as float for processor boundaries. Mostly defunct.
     floatTransfer   0;
 
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H
index 04e3009503767a627469eb0ad385daa40cb11942..05dc64eb5135757473d450a61390151621965a8f 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H
@@ -310,7 +310,7 @@ public:
         template<class T>
         static void gatherList
         (
-            const UList<commsStruct>& comms,
+            const UPstream::commsStructList& comms,
             //! [in,out]
             UList<T>& values,
             const int tag,
@@ -349,7 +349,7 @@ public:
         template<class T>
         static void scatterList
         (
-            const UList<commsStruct>& comms,
+            const UPstream::commsStructList& comms,
             UList<T>& values,
             const int tag,
             const label comm
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C
index 267d3d1c98cc19bf416cca1b7f4df688af788304..edc269a067819e71fbf064bcbab83b3f7bb52ed5 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamGatherList.C
@@ -45,7 +45,7 @@ Description
 template<class T>
 void Foam::Pstream::gatherList
 (
-    const UList<UPstream::commsStruct>& comms,
+    const UPstream::commsStructList& comms,
     UList<T>& values,
     const int tag,
     const label comm
@@ -190,7 +190,7 @@ void Foam::Pstream::gatherList
 template<class T>
 void Foam::Pstream::scatterList
 (
-    const UList<UPstream::commsStruct>& comms,
+    const UPstream::commsStructList& comms,
     UList<T>& values,
     const int tag,
     const label comm
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C
index 9553a4bb73b97fc80f36c850a1bbb55bc897e9ab..4f47d5f98377194a56fa177de4bc9a8f18624094 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2017 OpenFOAM Foundation
-    Copyright (C) 2015-2023 OpenCFD Ltd.
+    Copyright (C) 2015-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -58,82 +58,6 @@ Foam::UPstream::commsTypeNames
 });
 
 
-// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
-
-namespace Foam
-{
-
-// Determine host grouping.
-// Uses SHA1 of hostname instead of MPI_Comm_split or MPI_Comm_split_type
-// for two reasons:
-// - Comm_split returns an MPI_COMM_NULL on non-participating process
-//   which does not easily fit into the OpenFOAM framework
-//
-// - use the SHA1 of hostname allows a single MPI_Gather, determination of
-//   the inter-host vs intra-host (on the master) followed by a single
-//   broadcast of integers.
-//
-// Returns: the unique host indices with the leading hosts encoded
-// with negative values
-static List<int> getHostGroupIds(const label parentCommunicator)
-{
-    const label numProcs = UPstream::nProcs(parentCommunicator);
-
-    List<SHA1Digest> digests;
-    if (UPstream::master(parentCommunicator))
-    {
-        digests.resize(numProcs);
-    }
-
-    // Could also add lowercase etc, but since hostName()
-    // will be consistent within the same node, there is no need.
-    SHA1Digest myDigest(SHA1(hostName()).digest());
-
-    // The fixed-length digest allows use of MPI_Gather
-    UPstream::mpiGather
-    (
-        myDigest.cdata_bytes(),     // Send
-        digests.data_bytes(),       // Recv
-        SHA1Digest::size_bytes(),   // Num send/recv data per rank
-        parentCommunicator
-    );
-
-    List<int> hostIDs(numProcs);
-
-    // Compact numbering of hosts.
-    if (UPstream::master(parentCommunicator))
-    {
-        DynamicList<SHA1Digest> uniqDigests;
-
-        forAll(digests, proci)
-        {
-            const SHA1Digest& dig = digests[proci];
-
-            hostIDs[proci] = uniqDigests.find(dig);
-
-            if (hostIDs[proci] < 0)
-            {
-                // First appearance of host. Encode as leader
-                hostIDs[proci] = -(uniqDigests.size() + 1);
-                uniqDigests.push_back(dig);
-            }
-        }
-    }
-
-    UPstream::broadcast
-    (
-        hostIDs.data_bytes(),
-        hostIDs.size_bytes(),
-        parentCommunicator,
-        UPstream::masterNo()
-    );
-
-    return hostIDs;
-}
-
-} // End namespace Foam
-
-
 // * * * * * * * * * * * * * Private Member Functions  * * * * * * * * * * * //
 
 void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads)
@@ -158,7 +82,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads)
         freeCommunicator(UPstream::commGlobal());
 
         // 0: COMM_WORLD : commWorld() / commGlobal()
-        comm = allocateCommunicator(-1, singleProc, false);
+        comm = newCommunicator(-1, singleProc, false);
         if (comm != UPstream::commGlobal())
         {
             // Failed sanity check
@@ -169,7 +93,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads)
         }
 
         // 1: COMM_SELF
-        comm = allocateCommunicator(-2, singleProc, false);
+        comm = newCommunicator(-2, singleProc, false);
         if (comm != UPstream::commSelf())
         {
             // Failed sanity check
@@ -192,7 +116,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads)
         freeCommunicator(UPstream::commGlobal());
 
         // 0: COMM_WORLD : commWorld() / commGlobal()
-        comm = allocateCommunicator(-1, labelRange(nProcs), true);
+        comm = newCommunicator(-1, labelRange(nProcs), true);
         if (comm != UPstream::commGlobal())
         {
             // Failed sanity check
@@ -202,10 +126,12 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads)
                 << Foam::exit(FatalError);
         }
 
+        const int globalRanki = UPstream::myProcNo(UPstream::commGlobal());
+
         // 1: COMM_SELF
         // - Processor number wrt world communicator
-        singleProc.start() = UPstream::myProcNo(UPstream::commGlobal());
-        comm = allocateCommunicator(-2, singleProc, true);
+        singleProc.start() = globalRanki;
+        comm = newCommunicator(-2, singleProc, true);
         if (comm != UPstream::commSelf())
         {
             // Failed sanity check
@@ -215,7 +141,7 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads)
                 << Foam::exit(FatalError);
         }
 
-        Pout.prefix() = '[' + Foam::name(myProcNo(commGlobal())) + "] ";
+        Pout.prefix() = '[' + std::to_string(globalRanki) + "] ";
         Perr.prefix() = Pout.prefix();
     }
 
@@ -243,6 +169,7 @@ Foam::label Foam::UPstream::getAvailableCommIndex(const label parentIndex)
         parentComm_[index] = parentIndex;
 
         procIDs_[index].clear();
+        // Sizing and filling are demand-driven
         linearCommunication_[index].clear();
         treeCommunication_[index].clear();
     }
@@ -255,15 +182,16 @@ Foam::label Foam::UPstream::getAvailableCommIndex(const label parentIndex)
         parentComm_.push_back(parentIndex);
 
         procIDs_.emplace_back();
-        linearCommunication_.emplace_back();
-        treeCommunication_.emplace_back();
+        // Sizing and filling are demand-driven
+        linearCommunication_.emplace_back(index);
+        treeCommunication_.emplace_back(index);
     }
 
     return index;
 }
 
 
-Foam::label Foam::UPstream::allocateCommunicator
+Foam::label Foam::UPstream::newCommunicator
 (
     const label parentIndex,
     const labelRange& subRanks,
@@ -274,57 +202,46 @@ Foam::label Foam::UPstream::allocateCommunicator
 
     if (debug)
     {
-        Perr<< "Allocating communicator " << index << nl
-            << "    parent : " << parentIndex << nl
-            << "    procs  : " << subRanks << nl
+        Perr<< "Allocate communicator ["
+            << index << "] from [" << parentIndex
+            << "] ranks : " << subRanks << nl
             << endl;
     }
 
     // Initially treat as master,
     // overwritten by allocateCommunicatorComponents
     myProcNo_[index] = UPstream::masterNo();
+    auto& procIds = procIDs_[index];
 
     // The selected sub-ranks.
     // - transcribe from label to int
-    // - already in incremental order
-    auto& procIds = procIDs_[index];
-    procIds.resize_nocopy(subRanks.size());
-
-    label numSubRanks = 0;
-    for (const label subRanki : subRanks)
+    // - already in monotonic order
+    if
+    (
+        (withComponents && UPstream::parRun())
+      ? (parentIndex < 0 || subRanks.contains(myProcNo_[parentIndex]))
+      : !subRanks.empty()
+    )
     {
-        procIds[numSubRanks] = subRanki;
-        ++numSubRanks;
+        procIds.resize_nocopy(subRanks.size());
+        std::iota(procIds.begin(), procIds.end(), subRanks.start());
+    }
+    else
+    {
+        // Not involved
+        procIds.clear();
     }
 
-    // Sizing and filling are demand-driven
-    linearCommunication_[index].clear();
-    treeCommunication_[index].clear();
-
-    if (withComponents && parRun())
+    if (withComponents && UPstream::parRun())
     {
         allocateCommunicatorComponents(parentIndex, index);
-
-        // Could 'remember' locations of uninvolved ranks
-        /// if (myProcNo_[index] < 0 && parentIndex >= 0)
-        /// {
-        ///     // As global rank
-        ///     myProcNo_[index] = -(myProcNo_[worldComm]+1);
-        ///
-        /// OR:
-        ///     // As parent rank number
-        ///     if (myProcNo_[parentIndex] >= 0)
-        ///     {
-        ///         myProcNo_[index] = -(myProcNo_[parentIndex]+1);
-        ///     }
-        /// }
     }
 
     return index;
 }
 
 
-Foam::label Foam::UPstream::allocateCommunicator
+Foam::label Foam::UPstream::newCommunicator
 (
     const label parentIndex,
     const labelUList& subRanks,
@@ -335,236 +252,288 @@ Foam::label Foam::UPstream::allocateCommunicator
 
     if (debug)
     {
-        Perr<< "Allocating communicator " << index << nl
-            << "    parent : " << parentIndex << nl
-            << "    procs  : " << flatOutput(subRanks) << nl
+        Perr<< "Allocate communicator ["
+            << index << "] from [" << parentIndex
+            << "] ranks : " << flatOutput(subRanks) << nl
             << endl;
     }
 
     // Initially treat as master,
     // overwritten by allocateCommunicatorComponents
     myProcNo_[index] = UPstream::masterNo();
-
-    // The selected sub-ranks.
-    // - transcribe from label to int. Treat negative values as 'ignore'
-    // - enforce incremental order (so index is rank in next communicator)
-
     auto& procIds = procIDs_[index];
-    procIds.resize_nocopy(subRanks.size());
 
-    label numSubRanks = 0;
-    bool monotonicOrder = true;
-    for (const label subRanki : subRanks)
+    // The selected sub-ranks.
+    // - transcribe from label to int
+    // - sort into monotonic order (if needed)
+    if
+    (
+        (withComponents && UPstream::parRun())
+      ? (parentIndex < 0 || subRanks.contains(myProcNo_[parentIndex]))
+      : !subRanks.empty()
+    )
     {
-        if (subRanki < 0)
+        procIds.resize_nocopy(subRanks.size());
+
+        label count = 0;
+        bool monotonicOrder = true;
+        for (const auto ranki : subRanks)
         {
-            continue;
+            if (ranki < 0)
+            {
+                continue;
+            }
+            // Could also flag/ignore out-of-range ranks
+            // (ranki >= numProcs)
+
+            if (monotonicOrder && count)
+            {
+                monotonicOrder = (procIds[count-1] < ranki);
+            }
+
+            procIds[count] = ranki;
+            ++count;
         }
-        if (monotonicOrder && numSubRanks)
+
+        if (!monotonicOrder)
         {
-            monotonicOrder = (procIds[numSubRanks-1] < subRanki);
+            auto last = procIds.begin() + count;
+            std::sort(procIds.begin(), last);
+            last = std::unique(procIds.begin(), last);
+            count = label(last - procIds.begin());
         }
 
-        procIds[numSubRanks] = subRanki;
-        ++numSubRanks;
+        procIds.resize(count);
     }
-
-    if (!monotonicOrder)
+    else
     {
-        auto last = procIds.begin() + numSubRanks;
-        std::sort(procIds.begin(), last);
-        last = std::unique(procIds.begin(), last);
-        numSubRanks = label(last - procIds.begin());
+        // Not involved
+        procIds.clear();
     }
 
-    procIds.resize(numSubRanks);
-
-    // Sizing and filling are demand-driven
-    linearCommunication_[index].clear();
-    treeCommunication_[index].clear();
-
-    if (withComponents && parRun())
+    if (withComponents && UPstream::parRun())
     {
         allocateCommunicatorComponents(parentIndex, index);
-
-        // Could 'remember' locations of uninvolved ranks
-        /// if (myProcNo_[index] < 0 && parentIndex >= 0)
-        /// {
-        ///     // As global rank
-        ///     myProcNo_[index] = -(myProcNo_[worldComm]+1);
-        ///
-        /// OR:
-        ///     // As parent rank number
-        ///     if (myProcNo_[parentIndex] >= 0)
-        ///     {
-        ///         myProcNo_[index] = -(myProcNo_[parentIndex]+1);
-        ///     }
-        /// }
     }
 
     return index;
 }
 
 
-Foam::label Foam::UPstream::allocateInterHostCommunicator
+Foam::label Foam::UPstream::dupCommunicator
 (
-    const label parentCommunicator
+    const label parentIndex
 )
 {
-    List<int> hostIDs = getHostGroupIds(parentCommunicator);
+    #ifdef FULLDEBUG
+    if (FOAM_UNLIKELY(parentIndex < 0))
+    {
+        // Failed sanity check
+        FatalErrorInFunction
+            << "Attempted to duplicate an invalid communicator: "
+            << parentIndex
+            << Foam::exit(FatalError);
+    }
+    #endif
 
-    DynamicList<label> subRanks(hostIDs.size());
+    const label index = getAvailableCommIndex(parentIndex);
 
-    // From master to host-leader. Ranks between hosts.
-    forAll(hostIDs, proci)
+    if (debug)
     {
-        // Is host leader?
-        if (hostIDs[proci] < 0)
-        {
-            subRanks.push_back(proci);
-        }
+        Perr<< "Duplicate communicator ["
+            << index << "] from [" << parentIndex << "]" << endl;
+    }
+
+    // Initially treat as unknown,
+    // overwritten by dupCommunicatorComponents
+    myProcNo_[index] = -1;
+    procIDs_[index].clear();
+
+    if (UPstream::parRun())
+    {
+        dupCommunicatorComponents(parentIndex, index);
     }
 
-    return allocateCommunicator(parentCommunicator, subRanks);
+    return index;
 }
 
 
-Foam::label Foam::UPstream::allocateIntraHostCommunicator
+Foam::label Foam::UPstream::splitCommunicator
 (
-    const label parentCommunicator
+    const label parentIndex,
+    const int colour
 )
 {
-    List<int> hostIDs = getHostGroupIds(parentCommunicator);
-
-    DynamicList<label> subRanks(hostIDs.size());
+    #ifdef FULLDEBUG
+    if (FOAM_UNLIKELY(parentIndex < 0))
+    {
+        // Failed sanity check
+        FatalErrorInFunction
+            << "Attempted to split an invalid communicator: "
+            << parentIndex
+            << Foam::exit(FatalError);
+    }
+    #endif
 
-    // Intra-host ranks. Ranks within a host
-    int myHostId = hostIDs[UPstream::myProcNo(parentCommunicator)];
-    if (myHostId < 0) myHostId = -(myHostId + 1);  // Flip to generic id
+    const label index = getAvailableCommIndex(parentIndex);
 
-    forAll(hostIDs, proci)
+    if (debug)
     {
-        int id = hostIDs[proci];
-        if (id < 0) id = -(id + 1);  // Flip to generic id
+        Perr<< "Split communicator ["
+            << index << "] from [" << parentIndex
+            << "] using colour=" << colour << endl;
+    }
 
-        if (id == myHostId)
-        {
-            subRanks.push_back(proci);
-        }
+    // Initially treat as unknown,
+    // overwritten by splitCommunicatorComponents
+    myProcNo_[index] = -1;
+    procIDs_[index].clear();
+
+    if (UPstream::parRun())
+    {
+        splitCommunicatorComponents(parentIndex, index, colour);
     }
 
-    return allocateCommunicator(parentCommunicator, subRanks);
+    return index;
 }
 
 
-bool Foam::UPstream::allocateHostCommunicatorPairs()
+bool Foam::UPstream::setHostCommunicators(const int numPerNode)
 {
-    // Use the world communicator (not global communicator)
-    const label parentCommunicator = worldComm;
+    // Uses the world communicator (not global communicator)
 
     // Skip if non-parallel
-    if (!parRun())
+    if (!UPstream::parRun())
     {
+        numNodes_ = 1;
         return false;
     }
 
-    if (interHostComm_ >= 0 || intraHostComm_ >= 0)
+    if (FOAM_UNLIKELY(commInterNode_ >= 0 || commLocalNode_ >= 0))
     {
         // Failed sanity check
         FatalErrorInFunction
-            << "Host communicator(s) already created!" << endl
-            << Foam::exit(FatalError);
+            << "Node communicator(s) already created!" << endl
+            << Foam::abort(FatalError);
         return false;
     }
 
-    interHostComm_ = getAvailableCommIndex(parentCommunicator);
-    intraHostComm_ = getAvailableCommIndex(parentCommunicator);
+    commInterNode_ = getAvailableCommIndex(constWorldComm_);
+    commLocalNode_ = getAvailableCommIndex(constWorldComm_);
+
+    // Overwritten later
+    myProcNo_[commInterNode_] = UPstream::masterNo();
+    myProcNo_[commLocalNode_] = UPstream::masterNo();
 
     // Sorted order, purely cosmetic
-    if (intraHostComm_ < interHostComm_)
+    if (commLocalNode_ < commInterNode_)
     {
-        std::swap(intraHostComm_, interHostComm_);
+        std::swap(commLocalNode_, commInterNode_);
     }
 
-    // Overwritten later
-    myProcNo_[intraHostComm_] = UPstream::masterNo();
-    myProcNo_[interHostComm_] = UPstream::masterNo();
-
     if (debug)
     {
-        Perr<< "Allocating host communicators "
-            << interHostComm_ << ", " << intraHostComm_ << nl
-            << "    parent : " << parentCommunicator << nl
+        Perr<< "Allocating node communicators "
+            << commInterNode_ << ", " << commLocalNode_
+            << " on parent : " << constWorldComm_ << nl
             << endl;
     }
 
-    List<int> hostIDs = getHostGroupIds(parentCommunicator);
+    const int worldRank = UPstream::myProcNo(constWorldComm_);
+    const int worldSize = UPstream::nProcs(constWorldComm_);
 
-    DynamicList<int> subRanks(hostIDs.size());
-
-    // From master to host-leader. Ranks between hosts.
+    if (numPerNode > 1)
     {
-        subRanks.clear();
-        forAll(hostIDs, proci)
+        // Manual splitting based on given number of ranks per node
+        const int myNodeId = (worldRank/numPerNode);
+
+        // Establish the topology
         {
-            // Is host leader?
-            if (hostIDs[proci] < 0)
-            {
-                subRanks.push_back(proci);
+            DynamicList<int> nodeGroup(numPerNode);
+            DynamicList<int> nodeLeader(1+worldSize/numPerNode);
 
-                // Flip to generic host id
-                hostIDs[proci] = -(hostIDs[proci] + 1);
+            for (int proci = 0; proci < worldSize; ++proci)
+            {
+                if (myNodeId == (proci/numPerNode))
+                {
+                    nodeGroup.push_back(proci);
+                }
+
+                if ((proci % numPerNode) == 0)
+                {
+                    // Local rank 0 is a node leader
+                    nodeLeader.push_back(proci);
+                }
             }
-        }
-
-        const label index = interHostComm_;
-
-        // Direct copy (subRanks is also int)
-        procIDs_[index] = subRanks;
 
-        // Implicitly: withComponents = true
-        if (parRun())  // Already checked...
-        {
-            allocateCommunicatorComponents(parentCommunicator, index);
+            procIDs_[commInterNode_] = std::move(nodeLeader);
+            procIDs_[commLocalNode_] = std::move(nodeGroup);
         }
-
-        // Sizing and filling are demand-driven
-        linearCommunication_[index].clear();
-        treeCommunication_[index].clear();
     }
-
-    // Intra-host ranks. Ranks within a host
+    else
     {
-        int myHostId = hostIDs[UPstream::myProcNo(parentCommunicator)];
-        if (myHostId < 0) myHostId = -(myHostId + 1);  // Flip to generic id
+        // Determine inter-host/inter-host grouping based on the SHA1 of the
+        // hostnames. This allows a single initial Allgather to establish
+        // the overall topology. The alternative is to use MPI_Split_comm_type()
+        // on SHARED and then MPI_Comm_split() on the leader ranks.
+
+        // Could also add lowercase etc, but since hostName()
+        // will be consistent within the same node, there is no need.
+        const SHA1Digest myDigest(SHA1(hostName()).digest());
+
+        List<SHA1Digest> digests(worldSize);
+        digests[worldRank] = myDigest;
+
+        // The fixed-length digest allows use of MPI_Allgather.
+        UPstream::mpiAllGather
+        (
+            digests.data_bytes(),       // Send/Rev
+            SHA1Digest::size_bytes(),   // Num send/recv data per rank
+            UPstream::constWorldComm_
+        );
 
-        subRanks.clear();
-        forAll(hostIDs, proci)
+        // Establish the topology
         {
-            int id = hostIDs[proci];
-            if (id < 0) id = -(id + 1);  // Flip to generic id
+            DynamicList<int> nodeGroup(64);
+            DynamicList<int> nodeLeader(64);
+            DynamicList<SHA1Digest> uniqDigests(64);
 
-            if (id == myHostId)
+            for (int proci = 0; proci < worldSize; ++proci)
             {
-                subRanks.push_back(proci);
+                const auto& dig = digests[proci];
+
+                if (myDigest == dig)
+                {
+                    nodeGroup.push_back(proci);
+                }
+
+                if (!uniqDigests.contains(dig))
+                {
+                    // First appearance of host
+                    uniqDigests.push_back(dig);
+                    nodeLeader.push_back(proci);
+                }
             }
+
+            procIDs_[commInterNode_] = std::move(nodeLeader);
+            procIDs_[commLocalNode_] = std::move(nodeGroup);
         }
+    }
 
-        const label index = intraHostComm_;
 
-        // Direct copy (subRanks is also int)
-        procIDs_[index] = subRanks;
+    // Capture the size (number of nodes) before doing anything further
+    numNodes_ = procIDs_[commInterNode_].size();
 
-        // Implicitly: withComponents = true
-        if (parRun())  // Already checked...
-        {
-            allocateCommunicatorComponents(parentCommunicator, index);
-        }
+    // ~~~~~~~~~
+    // IMPORTANT
+    // ~~~~~~~~~
+    // Always retain knowledge of the inter-node leaders,
+    // even if this process is not on that communicator.
+    // This will help when constructing topology-aware communication.
 
-        // Sizing and filling are demand-driven
-        linearCommunication_[index].clear();
-        treeCommunication_[index].clear();
-    }
+    // Allocate backend MPI components
+    allocateCommunicatorComponents(constWorldComm_, commInterNode_);
+    allocateCommunicatorComponents(constWorldComm_, commLocalNode_);
 
     return true;
 }
@@ -582,10 +551,6 @@ void Foam::UPstream::freeCommunicator
         return;
     }
 
-    // Update demand-driven communicators
-    if (interHostComm_ == communicator) interHostComm_ = -1;
-    if (intraHostComm_ == communicator) intraHostComm_ = -1;
-
     if (debug)
     {
         Perr<< "Communicators : Freeing communicator " << communicator
@@ -651,26 +616,24 @@ Foam::label Foam::UPstream::procNo
 }
 
 
-const Foam::List<Foam::UPstream::commsStruct>&
+const Foam::UPstream::commsStructList&
 Foam::UPstream::linearCommunication(const label communicator)
 {
     if (linearCommunication_[communicator].empty())
     {
-        linearCommunication_[communicator] =
-            List<commsStruct>(UPstream::nProcs(communicator));
+        linearCommunication_[communicator].init(communicator);
     }
 
     return linearCommunication_[communicator];
 }
 
 
-const Foam::List<Foam::UPstream::commsStruct>&
+const Foam::UPstream::commsStructList&
 Foam::UPstream::treeCommunication(const label communicator)
 {
     if (treeCommunication_[communicator].empty())
     {
-        treeCommunication_[communicator] =
-            List<commsStruct>(UPstream::nProcs(communicator));
+        treeCommunication_[communicator].init(communicator);
     }
 
     return treeCommunication_[communicator];
@@ -683,50 +646,28 @@ void Foam::UPstream::printCommTree(const label communicator)
 
     if (UPstream::master(communicator))
     {
-        commsStruct::printGraph(Info(), comms);
-    }
-}
-
-
-Foam::label Foam::UPstream::commIntraHost()
-{
-    if (!parRun())
-    {
-        return worldComm;  // Don't know anything better to return
-    }
-    if (intraHostComm_ < 0)
-    {
-        allocateHostCommunicatorPairs();
-    }
-    return intraHostComm_;
-}
-
-
-Foam::label Foam::UPstream::commInterHost()
-{
-    if (!parRun())
-    {
-        return worldComm;  // Don't know anything better to return
-    }
-    if (interHostComm_ < 0)
-    {
-        allocateHostCommunicatorPairs();
+        comms.printGraph(Info());
     }
-    return interHostComm_;
 }
 
 
-bool Foam::UPstream::hasHostComms()
+bool Foam::UPstream::usingNodeComms(const label communicator)
 {
-    return (intraHostComm_ >= 0 || interHostComm_ >= 0);
-}
-
+    // Starting point must be "real" world-communicator
+    // ("real" means without any local trickery with worldComm)
+    // Avoid corner cases:
+    // - everthing is on one node
+    // - everthing is on different nodes
 
-void Foam::UPstream::clearHostComms()
-{
-    // Always with Pstream
-    freeCommunicator(intraHostComm_, true);
-    freeCommunicator(interHostComm_, true);
+    return
+    (
+        parRun_ && (constWorldComm_ == communicator)
+     && (nodeCommsControl_ > 0)
+        // More than one node and above defined threshold
+     && (numNodes_ > 1) && (numNodes_ >= nodeCommsMin_)
+        // Some processes do share nodes
+     && (numNodes_ < procIDs_[constWorldComm_].size())
+    );
 }
 
 
@@ -749,17 +690,19 @@ Foam::DynamicList<Foam::List<int>> Foam::UPstream::procIDs_(16);
 Foam::DynamicList<Foam::label> Foam::UPstream::parentComm_(16);
 Foam::DynamicList<Foam::label> Foam::UPstream::freeComms_;
 
-Foam::DynamicList<Foam::List<Foam::UPstream::commsStruct>>
+Foam::DynamicList<Foam::UPstream::commsStructList>
 Foam::UPstream::linearCommunication_(16);
 
-Foam::DynamicList<Foam::List<Foam::UPstream::commsStruct>>
+Foam::DynamicList<Foam::UPstream::commsStructList>
 Foam::UPstream::treeCommunication_(16);
 
 
-Foam::label Foam::UPstream::intraHostComm_(-1);
-Foam::label Foam::UPstream::interHostComm_(-1);
+Foam::label Foam::UPstream::constWorldComm_(0);
+Foam::label Foam::UPstream::numNodes_(1);
+Foam::label Foam::UPstream::commInterNode_(-1);
+Foam::label Foam::UPstream::commLocalNode_(-1);
 
-Foam::label Foam::UPstream::worldComm(0);
+Foam::label Foam::UPstream::worldComm(0);  // Initially same as constWorldComm_
 Foam::label Foam::UPstream::warnComm(-1);
 
 
@@ -767,16 +710,39 @@ Foam::label Foam::UPstream::warnComm(-1);
 // These are overwritten in parallel mode (by UPstream::setParRun())
 const Foam::label nPredefinedComm = []()
 {
-    // 0: COMM_WORLD : commWorld() / commGlobal()
-    (void) Foam::UPstream::allocateCommunicator(-1, Foam::labelRange(1), false);
+    // 0: COMM_WORLD : commGlobal(), constWorldComm_, worldComm
+    (void) Foam::UPstream::newCommunicator(-1, Foam::labelRange(1), false);
 
     // 1: COMM_SELF
-    (void) Foam::UPstream::allocateCommunicator(-2, Foam::labelRange(1), false);
+    (void) Foam::UPstream::newCommunicator(-2, Foam::labelRange(1), false);
 
     return Foam::UPstream::nComms();
 }();
 
 
+int Foam::UPstream::nodeCommsControl_
+(
+    Foam::debug::optimisationSwitch("nodeComms", 1)
+);
+registerOptSwitch
+(
+    "nodeComms",
+    int,
+    Foam::UPstream::nodeCommsControl_
+);
+
+int Foam::UPstream::nodeCommsMin_
+(
+    Foam::debug::optimisationSwitch("nodeComms.min", 0)
+);
+registerOptSwitch
+(
+    "nodeComms.min",
+    int,
+    Foam::UPstream::nodeCommsMin_
+);
+
+
 bool Foam::UPstream::floatTransfer
 (
     Foam::debug::optimisationSwitch("floatTransfer", 0)
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
index e737bc71ff97fadb540178e26224fedb7d935796..b7df5c279ee4da7d96784d24025b54242e3783bb 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
@@ -151,19 +151,10 @@ public:
 
             // Member Functions
 
-                //- Print un-directed graph in graphviz dot format
-                static void printGraph
-                (
-                    Ostream& os,
-                    const UList<UPstream::commsStruct>& comms,
-                    const label proci = 0  // starting node
-                );
-
-
             // Access
 
                 //- The number of processors addressed by the structure
-                label nProcs() const;
+                label nProcs() const noexcept;
 
                 //- The procID of the processor \em directly above
                 label above() const noexcept { return above_; }
@@ -188,8 +179,14 @@ public:
                 //- Reset to default constructed state
                 void reset();
 
-                //- Reset with automatic linear/tree selection
-                void reset(const label procID, const label numProcs);
+                //- Reset (automatic linear/tree selection),
+                //- possibly with communicator-specific adjustments
+                void reset
+                (
+                    const label procID,
+                    const label numProcs,
+                    const label comm = -1
+                );
 
 
             // Member / Friend Operators
@@ -200,6 +197,67 @@ public:
                 friend Ostream& operator<<(Ostream&, const commsStruct&);
         };
 
+        //- Collection of communication structures
+        class commsStructList
+        {
+            // Private Data
+
+                //- The communicator index
+                label comm_;
+
+                //- The communication tree
+                List<commsStruct> tree_;
+
+        public:
+
+            // Constructors
+
+                //- Construct empty with invalid communicator
+                commsStructList() noexcept : comm_(-1) {}
+
+                //- Construct empty with given communicator
+                commsStructList(label comm) noexcept : comm_(comm) {}
+
+
+            // Static Functions
+
+                //- An empty structure. Used for placeholders etc.
+                static const commsStructList& null();
+
+
+            // Member Functions
+
+                //- True if communicator is non-negative (ie, was assigned)
+                bool good() const noexcept { return (comm_ >= 0); }
+
+                //- The communicator label
+                label comm() const noexcept { return comm_; }
+
+                //- Clear the list
+                void clear() { return tree_.clear(); }
+
+                //- True if the list is empty
+                bool empty() const noexcept { return tree_.empty(); }
+
+                //- The number of entries
+                label size() const noexcept { return tree_.size(); }
+
+                //- Reset communicator index and clear demand-driven entries
+                void init(const label comm);
+
+                //- Get existing or create (demand-driven) entry
+                const UPstream::commsStruct& get(const label proci) const;
+
+                //- Get existing or create (demand-driven) entry
+                const UPstream::commsStruct& operator[](const label proci) const
+                {
+                    return get(proci);
+                }
+
+                //- Print un-directed graph in graphviz dot format
+                void printGraph(Ostream& os, label proci = 0) const;
+        };
+
 
 private:
 
@@ -220,18 +278,28 @@ private:
         //- Standard transfer message type
         static int msgType_;
 
+        //- Index to the world-communicator as defined at startup
+        //- (after any multi-world definitions).
+        //- Is unaffected by any later changes to worldComm.
+        static label constWorldComm_;
+
+        //- The number of shared/host nodes in the (const) world communicator.
+        static label numNodes_;
+
+        //- Index to the inter-node communicator (between nodes),
+        //- defined based on constWorldComm_
+        static label commInterNode_;
+
+        //- Index to the intra-host communicator (within a node),
+        //- defined based on constWorldComm_
+        static label commLocalNode_;
+
         //- Names of all worlds
         static wordList allWorlds_;
 
         //- Per processor the world index (into allWorlds_)
         static labelList worldIDs_;
 
-        //- Intra-host communicator
-        static label intraHostComm_;
-
-        //- Inter-host communicator (between host leaders)
-        static label interHostComm_;
-
 
     // Communicator specific data
 
@@ -248,10 +316,10 @@ private:
         static DynamicList<label> freeComms_;
 
         //- Linear communication schedule
-        static DynamicList<List<commsStruct>> linearCommunication_;
+        static DynamicList<commsStructList> linearCommunication_;
 
         //- Multi level communication schedule
-        static DynamicList<List<commsStruct>> treeCommunication_;
+        static DynamicList<commsStructList> treeCommunication_;
 
 
     // Private Member Functions
@@ -259,24 +327,59 @@ private:
         //- Set data for parallel running
         static void setParRun(const label nProcs, const bool haveThreads);
 
-        //- Initialise entries for new communicator. Return the index
+        //- Initialise entries for new communicator.
+        //
+        //  Resets corresponding entry in myProcNo_, procIDs_,
+        //  linearCommunication_, treeCommunication_
+        //  \return the communicator index
         static label getAvailableCommIndex(const label parentIndex);
 
-        //- Allocate MPI components of communicator with given index
+        //- Define inter-host/intra-host communicators (uses commConstWorld).
+        //  Optionally specify a given number per node.
+        static bool setHostCommunicators(const int numPerNode = 0);
+
+        //- Define inter-host/intra-host communicators based on
+        //- shared-memory information. Uses comm-world.
+        static bool setSharedMemoryCommunicators();
+
+        //- Allocate MPI components of communicator with given index.
+        //  This represents a "top-down" approach, creating a communicator
+        //  based on the procIDs_ groupings.
+        //
+        //  Modifies myProcNo_, reads and modifies procIDs_
         static void allocateCommunicatorComponents
         (
             const label parentIndex,
             const label index
         );
 
+        //- Allocate MPI components as duplicate of the parent communicator
+        //
+        //  Modifies myProcNo_, procIDs_
+        static void dupCommunicatorComponents
+        (
+            const label parentIndex,
+            const label index
+        );
+
+        //- Allocate MPI components for the given index by splitting
+        //- the parent communicator on the given \em colour.
+        //  This represents a "bottom-up" approach, when the individual ranks
+        //  only know which group they should belong to, but don't yet know
+        //  which other ranks will be in their group.
+        //
+        //  Modifies myProcNo_, procIDs_
+        static void splitCommunicatorComponents
+        (
+            const label parentIndex,
+            const label index,
+            const int colour
+        );
+
         //- Free MPI components of communicator.
         //  Does not touch the first two communicators (SELF, WORLD)
         static void freeCommunicatorComponents(const label index);
 
-        //- Allocate inter-host, intra-host communicators
-        //- with comm-world as parent
-        static bool allocateHostCommunicatorPairs();
-
 
 public:
 
@@ -286,6 +389,18 @@ public:
 
     // Static Data
 
+        //- Use of host/node topology-aware routines
+        //  0: disabled
+        //  1: split by hostname [default]
+        //  2: split by shared
+        //  >=4: (debug) split with given number per node
+        static int nodeCommsControl_;
+
+        //- Minimum number of nodes before topology-aware routines are enabled
+        //  <= 2 : always
+        //  >= 3 : when there are more than N nodes
+        static int nodeCommsMin_;
+
         //- Should compact transfer be used in which floats replace doubles
         //- reducing the bandwidth requirement at the expense of some loss
         //- in accuracy
@@ -323,12 +438,19 @@ public:
         //- Debugging: warn for use of any communicator differing from warnComm
         static label warnComm;
 
-        //- Communicator for all ranks, irrespective of any local worlds
+        //- Communicator for all ranks, irrespective of any local worlds.
+        //  This value \em never changes during a simulation.
         static constexpr label commGlobal() noexcept { return 0; }
 
         //- Communicator within the current rank only
+        //  This value \em never changes during a simulation.
         static constexpr label commSelf() noexcept { return 1; }
 
+        //- Communicator for all ranks (respecting any local worlds).
+        //  This value \em never changes after startup. Unlike the commWorld()
+        //  which can be temporarily overriden.
+        static label commConstWorld() noexcept { return constWorldComm_; }
+
         //- Communicator for all ranks (respecting any local worlds)
         static label commWorld() noexcept { return worldComm; }
 
@@ -343,6 +465,7 @@ public:
 
         //- Alter communicator debugging setting.
         //- Warns for use of any communicator differing from specified.
+        //- Negative values disable.
         //  \returns the previous warn index
         static label commWarn(const label communicator) noexcept
         {
@@ -360,17 +483,33 @@ public:
 
     // Host Communicators
 
-        //- Demand-driven: Intra-host communicator (respects any local worlds)
-        static label commIntraHost();
+        //- Communicator between nodes/hosts (respects any local worlds)
+        static label commInterNode() noexcept
+        {
+            return (parRun_ ? commInterNode_ : constWorldComm_);
+        }
 
-        //- Demand-driven: Inter-host communicator (respects any local worlds)
-        static label commInterHost();
+        //- Communicator within the node/host (respects any local worlds)
+        static label commLocalNode() noexcept
+        {
+            return (parRun_ ? commLocalNode_ : constWorldComm_);
+        }
 
-        //- Test for presence of any intra or inter host communicators
-        static bool hasHostComms();
+        //- Both inter-node and local-node communicators have been created
+        static bool hasNodeCommunicators() noexcept
+        {
+            return
+            (
+                (commInterNode_ > constWorldComm_)
+             && (commLocalNode_ > constWorldComm_)
+            );
+        }
 
-        //- Remove any existing intra and inter host communicators
-        static void clearHostComms();
+        //- True if node topology-aware routines have been enabled,
+        //- it is running in parallel, the starting point is the
+        //- world-communicator and it is not an odd corner case
+        //- (ie, all processes on one node, all processes on different nodes)
+        static bool usingNodeComms(const label communicator = worldComm);
 
 
     // Constructors
@@ -384,9 +523,8 @@ public:
 
     // Member Functions
 
-        //- Allocate new communicator with contiguous sub-ranks
-        //- on the parent communicator.
-        static label allocateCommunicator
+        //- Create new communicator with sub-ranks on the parent communicator
+        static label newCommunicator
         (
             //! The parent communicator
             const label parent,
@@ -398,8 +536,8 @@ public:
             const bool withComponents = true
         );
 
-        //- Allocate new communicator with sub-ranks on the parent communicator
-        static label allocateCommunicator
+        //- Creaet new communicator with sub-ranks on the parent communicator
+        static label newCommunicator
         (
             //! The parent communicator
             const label parent,
@@ -411,27 +549,36 @@ public:
             const bool withComponents = true
         );
 
-        //- Free a previously allocated communicator.
-        //  Ignores placeholder (negative) communicators.
-        static void freeCommunicator
+        //- Duplicate the parent communicator
+        //
+        //  Always calls dupCommunicatorComponents() internally
+        static label dupCommunicator
         (
-            const label communicator,
-            const bool withComponents = true
+            //! The parent communicator
+            const label parent
         );
 
-        //- Allocate an inter-host communicator
-        static label allocateInterHostCommunicator
+        //- Allocate a new communicator by splitting the parent communicator
+        //- on the given \em colour.
+        //  Always calls splitCommunicatorComponents() internally
+        static label splitCommunicator
         (
-            const label parentCommunicator = worldComm
+            //! The parent communicator
+            const label parent,
+
+            //! The colouring to select which ranks to include.
+            //! Negative values correspond to 'ignore'
+            const int colour
         );
 
-        //- Allocate an intra-host communicator
-        static label allocateIntraHostCommunicator
+        //- Free a previously allocated communicator.
+        //  Ignores placeholder (negative) communicators.
+        static void freeCommunicator
         (
-            const label parentCommunicator = worldComm
+            const label communicator,
+            const bool withComponents = true
         );
 
-
         //- Wrapper class for allocating/freeing communicators. Always invokes
         //- allocateCommunicatorComponents() and freeCommunicatorComponents()
         class communicator
@@ -457,12 +604,11 @@ public:
             (
                 //! The parent communicator
                 const label parentComm,
-
                 //! The contiguous sub-ranks of parent to use
                 const labelRange& subRanks
             )
             :
-                comm_(UPstream::allocateCommunicator(parentComm, subRanks))
+                comm_(UPstream::newCommunicator(parentComm, subRanks))
             {}
 
             //- Allocate communicator for sub-ranks on given parent
@@ -470,14 +616,38 @@ public:
             (
                 //! The parent communicator
                 const label parentComm,
-
                 //! The sub-ranks of parent to use (negative values ignored)
                 const labelUList& subRanks
             )
             :
-                comm_(UPstream::allocateCommunicator(parentComm, subRanks))
+                comm_(UPstream::newCommunicator(parentComm, subRanks))
             {}
 
+            //- Factory Method :
+            //- Duplicate the given communicator
+            static communicator duplicate(const label parentComm)
+            {
+                communicator c;
+                c.comm_ = UPstream::dupCommunicator(parentComm);
+                return c;
+            }
+
+            //- Factory Method :
+            //- Split the communicator on the given \em colour.
+            static communicator split
+            (
+                //! The parent communicator
+                const label parentComm,
+                //! The colouring to select which ranks to include.
+                //! Negative values correspond to 'ignore'
+                const int colour
+            )
+            {
+                communicator c;
+                c.comm_ = UPstream::splitCommunicator(parentComm, colour);
+                return c;
+            }
+
             //- Free allocated communicator
             ~communicator() { UPstream::freeCommunicator(comm_); }
 
@@ -498,14 +668,14 @@ public:
             void reset(label parent, const labelRange& subRanks)
             {
                 UPstream::freeCommunicator(comm_);
-                comm_ = UPstream::allocateCommunicator(parent, subRanks);
+                comm_ = UPstream::newCommunicator(parent, subRanks);
             }
 
             //- Allocate with sub-ranks of parent communicator
             void reset(label parent, const labelUList& subRanks)
             {
                 UPstream::freeCommunicator(comm_);
-                comm_ = UPstream::allocateCommunicator(parent, subRanks);
+                comm_ = UPstream::newCommunicator(parent, subRanks);
             }
 
             //- Take ownership, free allocated communicator
@@ -805,7 +975,7 @@ public:
         }
 
         //- Rank of this process in the communicator (starting from masterNo()).
-        //- Can be negative if the process is not a rank in the communicator
+        //- Negative if the process is not a rank in the communicator.
         static int myProcNo(const label communicator = worldComm)
         {
             return myProcNo_[communicator];
@@ -817,11 +987,11 @@ public:
             return myProcNo_[communicator] == masterNo();
         }
 
-        //- True if process corresponds to any rank (master or sub-rank)
+        //- True if process corresponds to \b any rank (master or sub-rank)
         //- in the given communicator
         static bool is_rank(const label communicator = worldComm)
         {
-            return myProcNo_[communicator] >= 0;
+            return myProcNo_[communicator] >= masterNo();
         }
 
         //- True if process corresponds to a sub-rank in the given communicator
@@ -842,6 +1012,12 @@ public:
             );
         }
 
+        //- The number of shared/host nodes in the (const) world communicator.
+        static label numNodes() noexcept
+        {
+            return numNodes_;
+        }
+
         //- The parent communicator
         static label parent(const label communicator)
         {
@@ -899,15 +1075,13 @@ public:
         }
 
         //- Communication schedule for linear all-to-master (proc 0)
-        static const List<commsStruct>&
-        linearCommunication
+        static const commsStructList& linearCommunication
         (
             const label communicator = worldComm
         );
 
         //- Communication schedule for tree all-to-master (proc 0)
-        static const List<commsStruct>&
-        treeCommunication
+        static const commsStructList& treeCommunication
         (
             const label communicator = worldComm
         );
@@ -915,7 +1089,7 @@ public:
         //- Communication schedule for all-to-master (proc 0) as
         //- linear/tree/none with switching based on UPstream::nProcsSimpleSum
         //- and the is_parallel() state
-        static const List<commsStruct>& whichCommunication
+        static const commsStructList& whichCommunication
         (
             const label communicator = worldComm
         )
@@ -930,8 +1104,8 @@ public:
             return
             (
                 np <= 1
-              ? List<commsStruct>::null()
-              : np < nProcsSimpleSum
+              ? commsStructList::null()
+              : (np <= 2 || np < nProcsSimpleSum)
               ? linearCommunication(communicator)
               : treeCommunication(communicator)
             );
@@ -983,7 +1157,7 @@ public:
         static void shutdown(int errNo = 0);
 
         //- Call MPI_Abort with no other checks or cleanup
-        static void abort();
+        static void abort(int errNo = 1);
 
         //- Shutdown (finalize) MPI as required and exit program with errNo.
         static void exit(int errNo = 1);
@@ -1205,27 +1379,43 @@ public:
 
     // Housekeeping
 
-        //- Wait for all requests to finish.
-        //  \deprecated(2023-01) Probably not what you want.
-        //     Should normally be restricted to a particular starting request.
-        FOAM_DEPRECATED_FOR(2023-01, "waitRequests(int) method")
-        static void waitRequests() { waitRequests(0); }
-
-        //- Process index of first sub-process
-        //  \deprecated(2020-09) use subProcs() method instead
-        FOAM_DEPRECATED_FOR(2020-09, "subProcs() method")
-        static constexpr int firstSlave() noexcept
+        //- Create new communicator with sub-ranks on the parent communicator
+        //  \deprecated(2025-02)
+        static label allocateCommunicator
+        (
+            const label parent,
+            const labelRange& subRanks,
+            const bool withComponents = true
+        )
         {
-            return 1;
+            return newCommunicator(parent, subRanks, withComponents);
         }
 
-        //- Process index of last sub-process
-        //  \deprecated(2020-09) use subProcs() method instead
-        FOAM_DEPRECATED_FOR(2020-09, "subProcs() or allProcs() method")
-        static int lastSlave(const label communicator = worldComm)
+        //- Create new communicator with sub-ranks on the parent communicator
+        //  \deprecated(2025-02)
+        static label allocateCommunicator
+        (
+            const label parent,
+            const labelUList& subRanks,
+            const bool withComponents = true
+        )
         {
-            return nProcs(communicator) - 1;
+            return newCommunicator(parent, subRanks, withComponents);
         }
+
+        //- Communicator between nodes (respects any local worlds)
+        FOAM_DEPRECATED_FOR(2025-02, "commInterNode()")
+        static label commInterHost() noexcept { return commInterNode(); }
+
+        //- Communicator within the node (respects any local worlds)
+        FOAM_DEPRECATED_FOR(2025-02, "commLocalNode()")
+        static label commIntraHost() noexcept { return commLocalNode(); }
+
+        //- Wait for all requests to finish.
+        //  \deprecated(2023-01) Probably not what you want.
+        //     Should normally be restricted to a particular starting request.
+        FOAM_DEPRECATED_FOR(2023-01, "waitRequests(int) method")
+        static void waitRequests() { waitRequests(0); }
 };
 
 
@@ -1447,18 +1637,6 @@ public:
 Ostream& operator<<(Ostream&, const UPstream::commsStruct&);
 
 
-// * * * * * * * * * * * * Template Specialisations  * * * * * * * * * * * * //
-
-// Template specialisation for access of commsStruct
-template<>
-UPstream::commsStruct&
-UList<UPstream::commsStruct>::operator[](const label procID);
-
-template<>
-const UPstream::commsStruct&
-UList<UPstream::commsStruct>::operator[](const label procID) const;
-
-
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
 } // End namespace Foam
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C b/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C
index 7aa432170bb2e2c281b0dfabfd9dfc1dfc95d381..72a41bba26823d08b76b6fe08157c33000aa7804 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstreamCommsStruct.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2016 OpenFOAM Foundation
-    Copyright (C) 2021-2023 OpenCFD Ltd.
+    Copyright (C) 2021-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -28,6 +28,178 @@ License
 
 #include "UPstream.H"
 
+// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
+
+namespace Foam
+{
+
+// This outputs as depth-first, but graphviz sorts that for us
+static void printGraph_impl
+(
+    Ostream& os,
+    const UPstream::commsStructList& comms,
+    const label proci,
+    label depth,
+    const label maxDepth = 1024
+)
+{
+    if (proci >= comms.size())
+    {
+        // Corner case when only a single rank involved
+        // (eg, for node-local communicator)
+        return;
+    }
+
+    const auto& below = comms[proci].below();
+
+    if (proci == 0)
+    {
+        os << nl << "// communication graph:" << nl;
+        os.beginBlock("graph");
+
+        // Prefer left-to-right layout for large graphs
+        os << indent << "rankdir=LR" << nl;
+
+        if (below.empty())
+        {
+            // A graph with a single-node (eg, self-comm)
+            os << indent << proci << nl;
+        }
+    }
+
+    int pos = 0;
+
+    for (const auto nbrProci : below)
+    {
+        if (pos)
+        {
+            os << "  ";
+        }
+        else
+        {
+            os << indent;
+        }
+        os << proci << " -- " << nbrProci;
+
+        if (++pos >= 4)  // Max 4 items per line
+        {
+            pos = 0;
+            os << nl;
+        }
+    }
+
+    if (pos)
+    {
+        os << nl;
+    }
+
+    // Limit the maximum depth
+    ++depth;
+    if (depth >= maxDepth && (proci != 0))
+    {
+        return;
+    }
+
+    for (const auto nbrProci : below)
+    {
+        // if (proci == nbrProci) continue;  // Extreme safety!
+        printGraph_impl(os, comms, nbrProci, depth, maxDepth);
+    }
+
+    if (proci == 0)
+    {
+        os.endBlock();
+
+        os << "// end graph" << nl;
+    }
+}
+
+} // End namespace Foam
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+// Create a tree-like schedule. For 8 procs:
+// (level 0)
+//      0 receives from 1
+//      2 receives from 3
+//      4 receives from 5
+//      6 receives from 7
+// (level 1)
+//      0 receives from 2
+//      4 receives from 6
+// (level 2)
+//      0 receives from 4
+//
+// The sends/receives for all levels are collected per processor
+// (one send per processor; multiple receives possible) creating
+// a table:
+//
+// So per processor:
+// proc     receives from   sends to
+// ----     -------------   --------
+//  0       1,2,4           -
+//  1       -               0
+//  2       3               0
+//  3       -               2
+//  4       5               0
+//  5       -               4
+//  6       7               4
+//  7       -               6
+
+namespace Foam
+{
+
+static label simpleTree
+(
+    const label procID,
+    const label numProcs,
+
+    DynamicList<label>& below,
+    DynamicList<label>& allBelow
+)
+{
+    label above(-1);
+
+    for (label mod = 2, step = 1; step < numProcs; step = mod)
+    {
+        mod = step * 2;
+
+        if (procID % mod)
+        {
+            // The rank above
+            above = procID - (procID % mod);
+            break;
+        }
+        else
+        {
+            for
+            (
+                label j = procID + step;
+                j < numProcs && j < procID + mod;
+                j += step
+            )
+            {
+                below.push_back(j);
+            }
+            for
+            (
+                label j = procID + step;
+                j < numProcs && j < procID + mod;
+                j++
+            )
+            {
+                allBelow.push_back(j);
+            }
+        }
+    }
+
+    return above;
+}
+
+} // End namespace Foam
+
+
 // * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
 
 Foam::UPstream::commsStruct::commsStruct
@@ -91,74 +263,26 @@ Foam::UPstream::commsStruct::commsStruct
 
 // * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * //
 
-// This outputs as depth-first, but graphviz sorts that for us
-void Foam::UPstream::commsStruct::printGraph
+void Foam::UPstream::commsStructList::printGraph
 (
     Ostream& os,
-    const UList<UPstream::commsStruct>& comms,
     const label proci
-)
+) const
 {
-    // if (proci >= comms.size()) return;  // Extreme safety!
-
-    const auto& below = comms[proci].below();
-
-    if (proci == 0)
-    {
-        os << nl << "// communication graph:" << nl;
-        os.beginBlock("graph");
-
-        if (below.empty())
-        {
-            // A graph with a single-node (eg, self-comm)
-            os << indent << proci << nl;
-        }
-    }
-
-    int pos = 0;
-
-    for (const label nbrProci : below)
-    {
-        if (pos)
-        {
-            os << "  ";
-        }
-        else
-        {
-            os << indent;
-        }
-        os << proci << " -- " << nbrProci;
+    // Print graph - starting at depth 0
+    // Avoid corner case when only a single rank involved
+    // (eg, for node-local communicator)
 
-        if (++pos >= 4)  // Max 4 items per line
-        {
-            pos = 0;
-            os << nl;
-        }
-    }
-
-    if (pos)
+    if (proci < size())
     {
-        os << nl;
-    }
-
-    for (const label nbrProci : below)
-    {
-        // if (proci == nbrProci) continue;  // Extreme safety!
-        printGraph(os, comms, nbrProci);
-    }
-
-    if (proci == 0)
-    {
-        os.endBlock();
-
-        os << "// end graph" << nl;
+        printGraph_impl(os, *this, proci, 0);
     }
 }
 
 
 // * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
 
-Foam::label Foam::UPstream::commsStruct::nProcs() const
+Foam::label Foam::UPstream::commsStruct::nProcs() const noexcept
 {
     return (1 + allBelow_.size() + allNotBelow_.size());
 }
@@ -176,121 +300,88 @@ void Foam::UPstream::commsStruct::reset()
 void Foam::UPstream::commsStruct::reset
 (
     const label procID,
-    const label numProcs
+    const label numProcs,
+    [[maybe_unused]] const label comm
 )
 {
     reset();
 
-    label above(-1);
-    DynamicList<label> below;
-    DynamicList<label> allBelow;
-
-    if (numProcs < UPstream::nProcsSimpleSum)
+    if (numProcs <= 2 || numProcs < UPstream::nProcsSimpleSum)
     {
-        // Linear schedule
+        // Linear communication pattern
+        label above(-1);
+        labelList below;
 
         if (procID == 0)
         {
             below = identity(numProcs-1, 1);
-            allBelow = below;
         }
         else
         {
             above = 0;
         }
-    }
-    else
-    {
-        // Use tree like schedule. For 8 procs:
-        // (level 0)
-        //      0 receives from 1
-        //      2 receives from 3
-        //      4 receives from 5
-        //      6 receives from 7
-        // (level 1)
-        //      0 receives from 2
-        //      4 receives from 6
-        // (level 2)
-        //      0 receives from 4
-        //
-        // The sends/receives for all levels are collected per processor
-        // (one send per processor; multiple receives possible) creating
-        // a table:
-        //
-        // So per processor:
-        // proc     receives from   sends to
-        // ----     -------------   --------
-        //  0       1,2,4           -
-        //  1       -               0
-        //  2       3               0
-        //  3       -               2
-        //  4       5               0
-        //  5       -               4
-        //  6       7               4
-        //  7       -               6
-
-        label mod = 0;
-
-        for (label step = 1; step < numProcs; step = mod)
-        {
-            mod = step * 2;
 
-            if (procID % mod)
-            {
-                above = procID - (procID % mod);
-                break;
-            }
-            else
-            {
-                for
-                (
-                    label j = procID + step;
-                    j < numProcs && j < procID + mod;
-                    j += step
-                )
-                {
-                    below.push_back(j);
-                }
-                for
-                (
-                    label j = procID + step;
-                    j < numProcs && j < procID + mod;
-                    j++
-                )
-                {
-                    allBelow.push_back(j);
-                }
-            }
-        }
+        *this = UPstream::commsStruct(numProcs, procID, above, below, below);
+        return;
     }
 
+
+    // Simple tree communication pattern
+    DynamicList<label> below;
+    DynamicList<label> allBelow;
+
+    label above = simpleTree
+    (
+        procID,
+        numProcs,
+        below,
+        allBelow
+    );
+
     *this = UPstream::commsStruct(numProcs, procID, above, below, allBelow);
 }
 
 
-// * * * * * * * * * * * * * * * Specializations * * * * * * * * * * * * * * //
+// * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * //
 
-template<>
-Foam::UPstream::commsStruct&
-Foam::UList<Foam::UPstream::commsStruct>::operator[](const label procID)
+const Foam::UPstream::commsStructList&
+Foam::UPstream::commsStructList::null()
 {
-    auto& val = this->v_[procID];   // or this->data()[procID]
+    static std::unique_ptr<commsStructList> singleton;
 
-    if (val.nProcs() != size())
+    if (!singleton)
     {
-        // Create/update
-        val.reset(procID, size());
+        singleton = std::make_unique<commsStructList>();
     }
 
-    return val;
+    return *singleton;
+}
+
+
+// * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
+
+void Foam::UPstream::commsStructList::init(const label comm)
+{
+    comm_ = comm;
+    tree_.clear();
+    tree_.resize(UPstream::nProcs(comm));
 }
 
 
-template<>
 const Foam::UPstream::commsStruct&
-Foam::UList<Foam::UPstream::commsStruct>::operator[](const label procID) const
+Foam::UPstream::commsStructList::get(const label proci) const
 {
-    return const_cast<UList<UPstream::commsStruct>&>(*this).operator[](procID);
+    const UPstream::commsStruct& entry = tree_[proci];
+    const auto numProcs = tree_.size();
+
+    if (entry.nProcs() != numProcs)
+    {
+        // Create/update
+        const_cast<UPstream::commsStruct&>(entry)
+            .reset(proci, numProcs, comm_);
+    }
+
+    return entry;
 }
 
 
diff --git a/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H b/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H
index ad95d44d123fb7c15bb5811c6441ee939a11165b..d69afb0cc4e1213d9d222ed9f1c7242e0cd63635 100644
--- a/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H
+++ b/src/OpenFOAM/db/IOstreams/Sstreams/prefixOSstream.H
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2014 OpenFOAM Foundation
-    Copyright (C) 2020-2023 OpenCFD Ltd.
+    Copyright (C) 2020-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -58,8 +58,10 @@ class prefixOSstream
 {
     // Private Data
 
+        //- Prefix printing is active
         bool printPrefix_;
 
+        //- The prefix to add
         string prefix_;
 
 
@@ -84,9 +86,9 @@ public:
 
     // Member Functions
 
-    // Enquiry
+    // Decorators
 
-        //- Return the stream prefix
+        //- The stream prefix
         const string& prefix() const noexcept { return prefix_; }
 
         //- Return non-const access to the stream prefix
diff --git a/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C b/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C
index 0b5cdd1ab4daefdc96fb01ea5fe7bdd6311bbc0e..d90fe722be18ebb4f79342f22c92e2ccfff578de 100644
--- a/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C
+++ b/src/OpenFOAM/db/dictionary/functionEntries/evalEntry/evalEntry.C
@@ -34,7 +34,7 @@ License
 #include <cctype>
 
 #undef  DetailInfo
-#define DetailInfo  if (::Foam::infoDetailLevel > 0) InfoErr
+#define DetailInfo  if (::Foam::infoDetailLevel > 0) ::Foam::InfoErr
 
 
 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
diff --git a/src/OpenFOAM/db/error/messageStream.H b/src/OpenFOAM/db/error/messageStream.H
index 9cc61264f0dc6c3002c3377729e9c2cb7ca8bc7e..e22d8d0abec143ec168947b85f822bee5f35fa90 100644
--- a/src/OpenFOAM/db/error/messageStream.H
+++ b/src/OpenFOAM/db/error/messageStream.H
@@ -401,15 +401,15 @@ extern messageStream SeriousError;
 
 //- Write to Foam::Info if the Foam::infoDetailLevel is +ve non-zero (default)
 #define DetailInfo                                                             \
-    if (::Foam::infoDetailLevel > 0) Info
+    if (::Foam::infoDetailLevel > 0) ::Foam::Info
 
 //- Report write to Foam::Info if the local log switch is true
 #define Log                                                                    \
-    if (log) Info
+    if (log) ::Foam::Info
 
 //- Report write to Foam::Info if the class log switch is true
 #define Log_                                                                   \
-    if (this->log) Info
+    if (this->log) ::Foam::Info
 
 
 //- Report an IO information message using Foam::Info
@@ -427,7 +427,7 @@ extern messageStream SeriousError;
 //- Report an information message using Foam::Info
 //  if the local debug switch is true
 #define DebugInfo                                                              \
-    if (debug) Info
+    if (debug) ::Foam::Info
 
 //- Report an information message using Foam::Info
 //  for FUNCTION_NAME in file __FILE__ at line __LINE__
@@ -438,7 +438,7 @@ extern messageStream SeriousError;
 //- Report an information message using Foam::Pout
 //  if the local debug switch is true
 #define DebugPout                                                              \
-    if (debug) Pout
+    if (debug) ::Foam::Pout
 
 //- Report an information message using Foam::Pout
 //  for FUNCTION_NAME in file __FILE__ at line __LINE__
diff --git a/src/OpenFOAM/global/argList/argList.C b/src/OpenFOAM/global/argList/argList.C
index 1e316317e543882f755eb12fe1044f604f808211..8abb9a5457f4a07a0bb75a2073b0f79dc06b91c2 100644
--- a/src/OpenFOAM/global/argList/argList.C
+++ b/src/OpenFOAM/global/argList/argList.C
@@ -2093,8 +2093,24 @@ void Foam::argList::parse
                     Info<< "Roots  : " << roots << nl;
                 }
             }
+
             Info<< "Pstream initialized with:" << nl
-                << "    floatTransfer      : "
+                << "    node communication : ";
+            if (UPstream::nodeCommsControl_ > 0)
+            {
+                Info<< Switch::name(UPstream::usingNodeComms())
+                    << " [min=" << UPstream::nodeCommsMin_
+                    << ", type=" << UPstream::nodeCommsControl_
+                    << "]";
+            }
+            else
+            {
+                Info<< "disabled";
+            }
+            Info<< " (" << UPstream::nProcs() << " ranks, "
+                << UPstream::numNodes() << " nodes)" << nl;
+
+            Info<< "    floatTransfer      : "
                 << Switch::name(UPstream::floatTransfer) << nl
                 << "    maxCommsSize       : "
                 << UPstream::maxCommsSize << nl
diff --git a/src/OpenFOAM/global/argList/parRun.H b/src/OpenFOAM/global/argList/parRun.H
index 0b7d8e3412f4eab3fea6d0cdcb124226573f0f60..1646cbdacee1e9eb4671ad97919a0584721bc33f 100644
--- a/src/OpenFOAM/global/argList/parRun.H
+++ b/src/OpenFOAM/global/argList/parRun.H
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2018 OpenFOAM Foundation
-    Copyright (C) 2018-2021 OpenCFD Ltd.
+    Copyright (C) 2018-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -77,7 +77,7 @@ public:
     // Constructors
 
         //- Default construct
-        ParRunControl() noexcept
+        constexpr ParRunControl() noexcept
         :
             dryRun_(0),
             verbose_(0),
@@ -92,7 +92,9 @@ public:
     {
         if (parallel_)
         {
-            Info<< "Finalising parallel run" << endl;
+            // Report shutdown (stdout or stderr)
+            (Foam::infoDetailLevel > 0 ? Info.stream() : InfoErr.stream())
+                << "Finalising parallel run" << endl;
         }
         UPstream::shutdown();
     }
@@ -104,10 +106,10 @@ public:
         int dryRun() const noexcept { return dryRun_; }
 
         //- Increase the dry-run level
-        void incrDryRun(int level = 1) noexcept { dryRun_ += level; }
+        void incrDryRun(int i=1) noexcept { dryRun_ += i; }
 
         //- Change dry-run level, returns old value
-        int dryRun(const int level) noexcept
+        int dryRun(int level) noexcept
         {
             int old(dryRun_);
             dryRun_ = level;
@@ -118,10 +120,10 @@ public:
         int verbose() const noexcept { return verbose_; }
 
         //- Increase the verbosity level
-        void incrVerbose(int level = 1) noexcept { verbose_ += level; }
+        void incrVerbose(int i=1) noexcept { verbose_ += i; }
 
         //- Change verbosity level, returns old value
-        int verbose(const int level) noexcept
+        int verbose(int level) noexcept
         {
             int old(verbose_);
             verbose_ = level;
@@ -140,7 +142,7 @@ public:
         //- Set as parallel run on/off, return the previous value.
         //  Use with \b extreme caution if runPar() has already been
         //  called.
-        bool parRun(const bool on) noexcept
+        bool parRun(bool on) noexcept
         {
             bool old(parallel_);
             parallel_ = on;
@@ -176,7 +178,10 @@ public:
         {
             if (!UPstream::init(argc, argv, needsThread_))
             {
-                Info<< "Failed to start parallel run" << endl;
+                // Report failure (stdout or stderr)
+                (Foam::infoDetailLevel > 0 ? Info.stream() : InfoErr.stream())
+                    << "Failed to start parallel run" << endl;
+
                 UPstream::exit(1);
             }
             parallel_ = true;
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
index 5aa41da120035e07c6a4a852f5dd0e11201209e6..2f341fbabaf7b34aa72deb9d09d1511a0339b896 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2017-2018 OpenFOAM Foundation
-    Copyright (C) 2019-2023 OpenCFD Ltd.
+    Copyright (C) 2019-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -290,14 +290,7 @@ Foam::OFstreamCollator::OFstreamCollator(const off_t maxBufferSize)
     maxBufferSize_(maxBufferSize),
     threadRunning_(false),
     localComm_(UPstream::worldComm),
-    threadComm_
-    (
-        UPstream::allocateCommunicator
-        (
-            localComm_,
-            labelRange(UPstream::nProcs(localComm_))
-        )
-    )
+    threadComm_(UPstream::dupCommunicator(localComm_))
 {}
 
 
@@ -310,14 +303,7 @@ Foam::OFstreamCollator::OFstreamCollator
     maxBufferSize_(maxBufferSize),
     threadRunning_(false),
     localComm_(comm),
-    threadComm_
-    (
-        UPstream::allocateCommunicator
-        (
-            localComm_,
-            labelRange(UPstream::nProcs(localComm_))
-        )
-    )
+    threadComm_(UPstream::dupCommunicator(localComm_))
 {}
 
 
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C
index 71303704958c4428571b01f809bb1857744eb9a2..a9dda1fcb7d61eb6c19f072bdcc17aecdd9520eb 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C
@@ -236,7 +236,7 @@ static Tuple2<label, labelList> getCommPattern()
     if (UPstream::parRun() && commAndIORanks.second().size() > 1)
     {
         // Multiple masters: ranks for my IO range
-        commAndIORanks.first() = UPstream::allocateCommunicator
+        commAndIORanks.first() = UPstream::newCommunicator
         (
             UPstream::worldComm,
             fileOperation::subRanks(commAndIORanks.second())
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C
index 8ee087e691944ddd06eacc05607174eacd573bc4..999db2ed9c3cf5b69fd09dc924594537632700be 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/hostCollatedFileOperation.C
@@ -84,7 +84,7 @@ static Tuple2<label, labelList> getCommPattern()
     if (UPstream::parRun() && commAndIORanks.second().size() > 1)
     {
         // Multiple masters: ranks for my IO range
-        commAndIORanks.first() = UPstream::allocateCommunicator
+        commAndIORanks.first() = UPstream::newCommunicator
         (
             UPstream::worldComm,
             fileOperation::subRanks(commAndIORanks.second())
diff --git a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C
index 5b05fe46c83f45192e29e28d23fdb160a69e7750..16d8fc64a0095e2bcc7e4ad889c3993ddfc60078 100644
--- a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C
+++ b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationNew.C
@@ -362,7 +362,7 @@ Foam::fileOperation::New_impl
             // Warning: MS-MPI currently uses MPI_Comm_create() instead of
             // MPI_Comm_create_group() so it will block there!
 
-            commAndIORanks.first() = UPstream::allocateCommunicator
+            commAndIORanks.first() = UPstream::newCommunicator
             (
                 UPstream::worldComm,
                 siblings
diff --git a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C
index bad09da2b9b4ad71027f785506d16fb5e66b97dd..b325663c7d461cff7da7a4cd011c0907818a8dbc 100644
--- a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C
+++ b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationRanks.C
@@ -106,54 +106,44 @@ Foam::labelRange Foam::fileOperation::subRanks(const labelUList& mainIOranks)
 
 Foam::labelList Foam::fileOperation::getGlobalHostIORanks()
 {
-    const label numProcs = UPstream::nProcs(UPstream::worldComm);
+    // Very similar to the code in UPstream::setHostCommunicators()
+    // except we need the leader information on *all* ranks!
 
-    // Use hostname
-    // Lowest rank per hostname is the IO rank
-
-    List<SHA1Digest> digests;
-    if (UPstream::master(UPstream::worldComm))
-    {
-        digests.resize(numProcs);
-    }
+    const label myProci = UPstream::myProcNo(UPstream::worldComm);
+    const label numProc = UPstream::nProcs(UPstream::worldComm);
 
     // Could also add lowercase etc, but since hostName()
     // will be consistent within the same node, there is no need.
-    SHA1Digest myDigest(SHA1(hostName()).digest());
+    const SHA1Digest myDigest(SHA1(hostName()).digest());
+
+    List<SHA1Digest> digests(numProc);
+    digests[myProci] = myDigest;
 
-    // The fixed-length digest allows use of MPI_Gather
-    UPstream::mpiGather
+    // The fixed-length digest allows use of MPI_Allgather.
+    UPstream::mpiAllGather
     (
-        myDigest.cdata_bytes(),     // Send
-        digests.data_bytes(),       // Recv
+        digests.data_bytes(),       // Send/Recv
         SHA1Digest::size_bytes(),   // Num send/recv per rank
         UPstream::worldComm
     );
 
-    labelList ranks;
-    DynamicList<label> dynRanks;
 
-    if (UPstream::master(UPstream::worldComm))
-    {
-        dynRanks.reserve(numProcs);
-
-        dynRanks.push_back(0);  // Always include master
-        label previ = 0;
+    DynamicList<label> hostLeaders(UPstream::numNodes());
 
-        for (label proci = 1; proci < digests.size(); ++proci)
+    hostLeaders.push_back(0);  // Always include master
+    for (label previ = 0, proci = 1; proci < digests.size(); ++proci)
+    {
+        if (digests[previ] != digests[proci])
         {
-            if (digests[proci] != digests[previ])
-            {
-                dynRanks.push_back(proci);
-                previ = proci;
-            }
+            hostLeaders.push_back(proci);
+            previ = proci;
         }
-
-        ranks.transfer(dynRanks);
     }
 
-    Pstream::broadcast(ranks, UPstream::worldComm);
-    return ranks;
+    return labelList(std::move(hostLeaders));
+
+    // Alternative is to recover information from commInterNode()
+    // and broadcast via commLocalNode()
 }
 
 
diff --git a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C
index 1d610096901dbc69c6f19935f3a0b4bbe701cf51..95c5360b8f155a1a686cd251476965563a7b69e0 100644
--- a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C
+++ b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C
@@ -612,7 +612,7 @@ static Tuple2<label, labelList> getCommPattern()
     if (UPstream::parRun() && commAndIORanks.second().size() > 1)
     {
         // Multiple masters: ranks for my IO range
-        commAndIORanks.first() = UPstream::allocateCommunicator
+        commAndIORanks.first() = UPstream::newCommunicator
         (
             UPstream::worldComm,
             fileOperation::subRanks(commAndIORanks.second())
diff --git a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C
index e1541444bdcefd0b078618905da32c542ada7dda..ba35d1ba9d283281a9a69a5e0b82b005b6e868ee 100644
--- a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C
+++ b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/hostUncollatedFileOperation.C
@@ -85,7 +85,7 @@ static Tuple2<label, labelList> getCommPattern()
     if (UPstream::parRun() && commAndIORanks.second().size() > 1)
     {
         // Multiple masters: ranks for my IO range
-        commAndIORanks.first() = UPstream::allocateCommunicator
+        commAndIORanks.first() = UPstream::newCommunicator
         (
             UPstream::worldComm,
             fileOperation::subRanks(commAndIORanks.second())
diff --git a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C
index 180783d578cae562504578e00b51cae6a7663e82..ab41512a302aa81325550d89e64a2c8397bcc5ce 100644
--- a/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C
+++ b/src/OpenFOAM/global/fileOperations/uncollatedFileOperation/uncollatedFileOperation.C
@@ -204,7 +204,7 @@ static Tuple2<label, labelList> getCommPattern()
     if (UPstream::parRun() && commAndIORanks.second().size() > 1)
     {
         // Multiple masters: ranks for my IO range
-        commAndIORanks.first() = UPstream::allocateCommunicator
+        commAndIORanks.first() = UPstream::newCommunicator
         (
             UPstream::worldComm,
             fileOperation::subRanks(commAndIORanks.second())
diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C
index bdbc65d281a0d41a90785b661948a716d26f56bb..f231f5d778bb7b54658075e662c5c3da9f8e37c4 100644
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2017 OpenFOAM Foundation
-    Copyright (C) 2019-2023 OpenCFD Ltd.
+    Copyright (C) 2019-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -559,36 +559,65 @@ void Foam::GAMGAgglomeration::procAgglomerateRestrictAddressing
     const label levelIndex
 )
 {
-    // Collect number of cells
-    labelList nFineCells;
-    globalIndex::gatherValues
+    const bool master =
     (
-        comm,
-        procIDs,
-        restrictAddressing_[levelIndex].size(),
-        nFineCells,
-
-        UPstream::msgType(),
-        UPstream::commsTypes::scheduled
+        UPstream::myProcNo(comm) == (procIDs.empty() ? 0 : procIDs[0])
     );
-    labelList fineOffsets(globalIndex::calcOffsets(nFineCells));
 
-    // Combine and renumber nCoarseCells
-    labelList nCoarseCells;
-    globalIndex::gatherValues
-    (
-        comm,
-        procIDs,
-        nCells_[levelIndex],
-        nCoarseCells,
+    // Determine the fine/coarse sizes (offsets) for gathering
+    labelList fineOffsets;
+    labelList coarseOffsets;
+
+    {
+        List<labelPair> sizes = globalIndex::listGatherValues
+        (
+            comm,
+            procIDs,
+            labelPair
+            (
+                // fine
+                restrictAddressing_[levelIndex].size(),
+                // coarse
+                nCells_[levelIndex]
+            ),
+            UPstream::msgType(),
+            UPstream::commsTypes::scheduled
+        );
+
+        // Calculate offsets, as per globalIndex::calcOffsets()
+        // but extracting from the pair
+        if (master && !sizes.empty())
+        {
+            const label len = sizes.size();
+
+            fineOffsets.resize(len+1);
+            coarseOffsets.resize(len+1);
+
+            label fineCount = 0;
+            label coarseCount = 0;
+
+            for (label i = 0; i < len; ++i)
+            {
+                fineOffsets[i] = fineCount;
+                fineCount += sizes[i].first();
+
+                coarseOffsets[i] = coarseCount;
+                coarseCount += sizes[i].second();
+            }
+
+            fineOffsets[len] = fineCount;
+            coarseOffsets[len] = coarseCount;
+        }
+    }
 
-        UPstream::msgType(),
-        UPstream::commsTypes::scheduled
-    );
-    labelList coarseOffsets(globalIndex::calcOffsets(nCoarseCells));
 
     // (cell)restrictAddressing
     labelList procRestrictAddressing;
+    if (master)
+    {
+        // pre-size on master
+        procRestrictAddressing.resize(fineOffsets.back());
+    }
     globalIndex::gather
     (
         fineOffsets,
@@ -596,15 +625,13 @@ void Foam::GAMGAgglomeration::procAgglomerateRestrictAddressing
         procIDs,
         restrictAddressing_[levelIndex],
         procRestrictAddressing,
-
         UPstream::msgType(),
-        Pstream::commsTypes::nonBlocking    //Pstream::commsTypes::scheduled
+        UPstream::commsTypes::nonBlocking
     );
 
-
-    if (Pstream::myProcNo(comm) == procIDs[0])
+    if (master)
     {
-        nCells_[levelIndex] = coarseOffsets.last();  // ie, totalSize()
+        nCells_[levelIndex] = coarseOffsets.back();  // ie, totalSize()
 
         // Renumber consecutively
         for (label proci = 1; proci < procIDs.size(); ++proci)
diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C
index 8d13291110e0b409a7669d9e410b4b8ad078501b..f236bde8a52872d6656e0c6a42e30759e57221bf 100644
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2017 OpenFOAM Foundation
-    Copyright (C) 2023 OpenCFD Ltd.
+    Copyright (C) 2023-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -78,17 +78,17 @@ void Foam::GAMGAgglomeration::restrictField
         const label coarseComm =
             UPstream::parent(procCommunicator_[coarseLevelIndex]);
 
-        const List<label>& procIDs = agglomProcIDs(coarseLevelIndex);
-        const labelList& offsets = cellOffsets(coarseLevelIndex);
+        const auto& procIDs = agglomProcIDs(coarseLevelIndex);
+        const auto& offsets = cellOffsets(coarseLevelIndex);
 
-        globalIndex::gather
+        globalIndex::gatherInplace
         (
             offsets,
             coarseComm,
             procIDs,
             cf,
             UPstream::msgType(),
-            Pstream::commsTypes::nonBlocking    //Pstream::commsTypes::scheduled
+            UPstream::commsTypes::nonBlocking
         );
     }
 }
@@ -145,8 +145,8 @@ void Foam::GAMGAgglomeration::prolongField
         const label coarseComm =
             UPstream::parent(procCommunicator_[coarseLevelIndex]);
 
-        const List<label>& procIDs = agglomProcIDs(coarseLevelIndex);
-        const labelList& offsets = cellOffsets(coarseLevelIndex);
+        const auto& procIDs = agglomProcIDs(coarseLevelIndex);
+        const auto& offsets = cellOffsets(coarseLevelIndex);
 
         const label localSize = nCells_[levelIndex];
 
@@ -159,7 +159,7 @@ void Foam::GAMGAgglomeration::prolongField
             cf,
             allCf,
             UPstream::msgType(),
-            Pstream::commsTypes::nonBlocking    //Pstream::commsTypes::scheduled
+            UPstream::commsTypes::nonBlocking
         );
 
         forAll(fineToCoarse, i)
@@ -195,8 +195,8 @@ const Foam::Field<Type>& Foam::GAMGAgglomeration::prolongField
         const label coarseComm =
             UPstream::parent(procCommunicator_[coarseLevelIndex]);
 
-        const List<label>& procIDs = agglomProcIDs(coarseLevelIndex);
-        const labelList& offsets = cellOffsets(coarseLevelIndex);
+        const auto& procIDs = agglomProcIDs(coarseLevelIndex);
+        const auto& offsets = cellOffsets(coarseLevelIndex);
 
         const label localSize = nCells_[levelIndex];
         allCf.resize_nocopy(localSize);
@@ -209,7 +209,7 @@ const Foam::Field<Type>& Foam::GAMGAgglomeration::prolongField
             cf,
             allCf,
             UPstream::msgType(),
-            Pstream::commsTypes::nonBlocking    //Pstream::commsTypes::scheduled
+            UPstream::commsTypes::nonBlocking
         );
 
         forAll(fineToCoarse, i)
diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C
index 33139bb24cf30461f9d6725d248f8c6150759ea0..da30b3bebaa43995eb9ed6ed5e9d4906777fcea9 100644
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/eagerGAMGProcAgglomeration/eagerGAMGProcAgglomeration.C
@@ -119,7 +119,7 @@ bool Foam::eagerGAMGProcAgglomeration::agglomerate()
                     // Communicator for the processor-agglomerated matrix
                     comms_.push_back
                     (
-                        UPstream::allocateCommunicator
+                        UPstream::newCommunicator
                         (
                             levelComm,
                             masterProcs
diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C
index b8906aaca0d53386fc92af43fc104b99a280964f..e33b2539a11c5df6de986954c219693ef3b293d5 100644
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C
@@ -167,7 +167,7 @@ bool Foam::manualGAMGProcAgglomeration::agglomerate()
                     // Communicator for the processor-agglomerated matrix
                     comms_.push_back
                     (
-                        UPstream::allocateCommunicator
+                        UPstream::newCommunicator
                         (
                             levelMesh.comm(),
                             coarseToMaster
diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C
index 4eab56651592d4d343d05154aae3e3adea9fe97a..d5024bb9e9b88182887cce6f9b8eb9619abe761c 100644
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/masterCoarsestGAMGProcAgglomeration/masterCoarsestGAMGProcAgglomeration.C
@@ -194,7 +194,7 @@ bool Foam::masterCoarsestGAMGProcAgglomeration::agglomerate()
                 // Communicator for the processor-agglomerated matrix
                 comms_.push_back
                 (
-                    UPstream::allocateCommunicator
+                    UPstream::newCommunicator
                     (
                         levelComm,
                         masterProcs
diff --git a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C
index 52baecc75b3aae118ca611c442273c03c6d4b3a4..6a35bd8077e58f240c3ab7e29eec3ddcaaafe41e 100644
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/procFacesGAMGProcAgglomeration/procFacesGAMGProcAgglomeration.C
@@ -286,7 +286,7 @@ bool Foam::procFacesGAMGProcAgglomeration::agglomerate()
                     // Communicator for the processor-agglomerated matrix
                     comms_.push_back
                     (
-                        UPstream::allocateCommunicator
+                        UPstream::newCommunicator
                         (
                             levelComm,
                             masterProcs
diff --git a/src/OpenFOAM/parallel/globalIndex/globalIndex.C b/src/OpenFOAM/parallel/globalIndex/globalIndex.C
index 218869d33b388873bcbd9397f9a14ad55df4b9ce..95f64b161c6ca1f94a990b133e31d6909f1dffce 100644
--- a/src/OpenFOAM/parallel/globalIndex/globalIndex.C
+++ b/src/OpenFOAM/parallel/globalIndex/globalIndex.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2016 OpenFOAM Foundation
-    Copyright (C) 2018-2023 OpenCFD Ltd.
+    Copyright (C) 2018-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -263,6 +263,117 @@ Foam::globalIndex::calcRanges
 }
 
 
+bool Foam::globalIndex::splitNodeOffsets
+(
+    labelList& interNodeOffsets,
+    labelList& localNodeOffsets,
+    const label communicator,
+    const bool absoluteLocalNodeOffsets
+) const
+{
+    // Require const-world as the starting point
+    if (!UPstream::parRun() || communicator != UPstream::commConstWorld())
+    {
+        interNodeOffsets.clear();
+        localNodeOffsets.clear();
+        return false;
+    }
+
+    const auto interNodeComm = UPstream::commInterNode();
+
+    // Only generate information on the node leaders
+    if (!UPstream::is_rank(interNodeComm))
+    {
+        interNodeOffsets.clear();
+        localNodeOffsets.clear();
+        return true;  // Not involved, but return true to match others...
+    }
+
+    const label numProc = UPstream::nProcs(UPstream::commConstWorld());
+    const auto& procIds = UPstream::procID(interNodeComm);
+    const int ranki = UPstream::myProcNo(interNodeComm);
+
+    if (FOAM_UNLIKELY(procIds.empty()))
+    {
+        // Should not happen...
+        interNodeOffsets.clear();
+        localNodeOffsets.clear();
+        return true;  // Return true to match others...
+    }
+
+    // The inter-node offsets from the node-specific segment of the
+    // overall offsets, but avoiding MPI_Scatterv (slow, doesn't
+    // handle overlaps) and using MPI_Bcast() instead.
+
+    // Send top-level offsets to the node leaders.
+    // Could also be a mutable operation and use offsets_ directly.
+    //
+    // - number of overall offsets is always (nProc+1) [worldComm]
+    labelList allOffsets;
+    if (UPstream::master(interNodeComm))
+    {
+        allOffsets = offsets_;
+    }
+    else  // ie, UPstream::is_subrank(interNodeComm)
+    {
+        allOffsets.resize_nocopy(numProc+1);
+    }
+
+    UPstream::broadcast
+    (
+        allOffsets.data_bytes(),
+        allOffsets.size_bytes(),
+        interNodeComm
+    );
+
+
+    if (FOAM_UNLIKELY(allOffsets.empty()))
+    {
+        // Should not happen...
+        interNodeOffsets.clear();
+        localNodeOffsets.clear();
+        return true;  // Return true to match others...
+    }
+
+    // The local node span
+    const label firstProc = procIds[ranki];
+    const label lastProc =
+    (
+        (ranki+1 < procIds.size())
+      ? procIds[ranki+1]
+      : numProc
+    );
+
+    // Offsets (within a node)
+    localNodeOffsets = allOffsets.slice
+    (
+        firstProc,
+        (lastProc - firstProc) + 1  // +1 since offsets
+    );
+
+    if (!absoluteLocalNodeOffsets && !localNodeOffsets.empty())
+    {
+        const auto start0 = localNodeOffsets.front();
+        for (auto& val : localNodeOffsets)
+        {
+            val -= start0;
+        }
+    }
+
+    // Offsets (between nodes)
+    interNodeOffsets.resize_nocopy(procIds.size()+1);
+    {
+        forAll(procIds, i)
+        {
+            interNodeOffsets[i] = allOffsets[procIds[i]];
+        }
+        interNodeOffsets.back() = allOffsets.back();
+    }
+
+    return true;
+}
+
+
 // * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
 
 Foam::globalIndex::globalIndex(Istream& is)
diff --git a/src/OpenFOAM/parallel/globalIndex/globalIndex.H b/src/OpenFOAM/parallel/globalIndex/globalIndex.H
index 598c82e5521ce4eb01fa6804919b8d23c49d5968..19f6ef1a9d2a6a92790806bf521e8b983e19c4de 100644
--- a/src/OpenFOAM/parallel/globalIndex/globalIndex.H
+++ b/src/OpenFOAM/parallel/globalIndex/globalIndex.H
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2017 OpenFOAM Foundation
-    Copyright (C) 2018-2024 OpenCFD Ltd.
+    Copyright (C) 2018-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -87,9 +87,8 @@ class globalIndex
         // template<class Type>
         // inline static UPstream::commsTypes getCommsType
         // (
-        //     const UPstream::commsTypes preferred
-        //   = UPstream::commsTypes::nonBlocking
-        // );
+        //     UPstream::commsTypes commsType
+        // ) noexcept;
 
         //- Report overflow at specified (non-negative) index
         static void reportOverflowAndExit
@@ -582,17 +581,35 @@ public:
             const bool checkOverflow = false
         );
 
+        //- Split the top-level offsets into inter-node and local-node
+        //- components suitable to a two-stage hierarchy.
+        bool splitNodeOffsets
+        (
+            //! [out] Offsets between nodes (only non-empty on node leaders)
+            labelList& interNodeOffsets,
+            //! [out] Offsets within a node (only non-empty on node leaders)
+            labelList& localNodeOffsets,
+            //! The communicator. Must resolve to const world-comm
+            const label communicator = UPstream::worldComm,
+            //! Retain absolute values for the localNode offsets
+            const bool absoluteLocalNodeOffsets = false
+        ) const;
+
+
+    // Misc low-level gather routines
+
         //- Collect single values in processor order on master (== procIDs[0]).
         //  Handles contiguous/non-contiguous data.
+        //  non-zero output field (master only)
         template<class ProcIDsContainer, class Type>
-        static void gatherValues
+        [[nodiscard]]
+        static List<Type> listGatherValues
         (
             const label comm,           //!< communicator
             const ProcIDsContainer& procIDs,
             const Type& localValue,
-            List<Type>& allValues,      //! output field (master only)
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
         );
 
         //- Collect data in processor order on master (== procIDs[0]).
@@ -603,10 +620,11 @@ public:
             const labelUList& offsets,  //!< offsets (master only)
             const label comm,           //!< communicator
             const ProcIDsContainer& procIDs,
-            const UList<Type>& fld,
-            List<Type>& allFld,         //! output field (master only)
+            const UList<Type>& fld,     //!< [in] all ranks
+            //! [out] result (master only). Must be adequately sized!
+            UList<Type>& allFld,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
         );
 
         //- Collect indirect data in processor order on master
@@ -617,37 +635,24 @@ public:
             const labelUList& offsets,  //!< offsets (master only)
             const label comm,           //!< communicator
             const ProcIDsContainer& procIDs,
-            const IndirectListBase<Type, Addr>& fld,
-            List<Type>& allFld,         //! output field (master only)
+            const IndirectListBase<Type, Addr>& fld,  //!< [in] all ranks
+            //! [out] result (master only). Must be adequately sized!
+            UList<Type>& allFld,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
         );
 
-
-    // Misc low-level gather routines
-
         //- Inplace collect in processor order on master (== procIDs[0]).
-        //  Note: adjust naming?
         template<class ProcIDsContainer, class Type>
-        static void gather
+        static void gatherInplace
         (
             const labelUList& offsets,  //!< offsets (master only)
             const label comm,           //!< communicator
             const ProcIDsContainer& procIDs,
-            List<Type>& fld,            //!< in/out field
+            List<Type>& fld,            //!< [in,out]
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes ct = UPstream::commsTypes::nonBlocking
-        )
-        {
-            List<Type> allData;
-            gather(offsets, comm, procIDs, fld, allData, tag, ct);
-
-            const int masterProci = procIDs.size() ? procIDs[0] : 0;
-            if (UPstream::myProcNo(comm) == masterProci)
-            {
-                fld.transfer(allData);
-            }
-        }
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
+        );
 
         //- Collect data in processor order on master (== procIDs[0]).
         //  \note the globalIndex offsets needed on master only.
@@ -656,29 +661,26 @@ public:
         (
             const label comm,           //!< communicator
             const ProcIDsContainer& procIDs,
-            const UList<Type>& fld,     //!< input field
-            List<Type>& allFld,         //! output field (master only)
+            const UList<Type>& fld,     //!< [in] input field
+            //! [out] resized to have results on master, empty elsewhere.
+            List<Type>& allFld,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes ct = UPstream::commsTypes::nonBlocking
-        ) const
-        {
-            gather(offsets_, comm, procIDs, fld, allFld, tag, ct);
-        }
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
+        ) const;
 
         //- Inplace collect in processor order on master (== procIDs[0]).
         //  \note the globalIndex offsets needed on master only.
-        //  Note: adjust naming?
         template<class ProcIDsContainer, class Type>
-        void gather
+        void gatherInplace
         (
             const label comm,           //!< communicator
             const ProcIDsContainer& procIDs,
-            List<Type>& fld,            //!< in/out field
+            List<Type>& fld,            //!< [in,out]
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes ct = UPstream::commsTypes::nonBlocking
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
         ) const
         {
-            gather(offsets_, comm, procIDs, fld, tag, ct);
+            gatherInplace(offsets_, comm, procIDs, fld, tag, commsType);
         }
 
 
@@ -690,10 +692,12 @@ public:
         template<class Type>
         void gather
         (
+            //! [in] input on all ranks
             const UList<Type>& sendData,
+            //! [out] resized to have results on master, empty elsewhere.
             List<Type>& allData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         ) const;
 
@@ -703,10 +707,12 @@ public:
         template<class Type, class Addr>
         void gather
         (
+            //! [in] input on all ranks
             const IndirectListBase<Type, Addr>& sendData,
+            //! [out] resized to have results on master, empty elsewhere.
             List<Type>& allData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         ) const;
 
@@ -716,11 +722,12 @@ public:
         //
         //  \return output (master), zero-sized on non-master
         template<class Type, class OutputContainer = List<Type>>
+        [[nodiscard]]
         OutputContainer gather
         (
-            const UList<Type>& sendData,
+            const UList<Type>& sendData,  //!< [in] all ranks
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         ) const;
 
@@ -729,11 +736,12 @@ public:
         //
         //  \return output (master), zero-sized on non-master
         template<class Type, class Addr, class OutputContainer = List<Type>>
+        [[nodiscard]]
         OutputContainer gather
         (
-            const IndirectListBase<Type, Addr>& sendData,
+            const IndirectListBase<Type, Addr>& sendData, //!< [in] all ranks
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         ) const;
 
@@ -744,10 +752,9 @@ public:
         template<class Type>
         void gatherInplace
         (
-            //! [in,out]
-            List<Type>& fld,
+            List<Type>& fld,  //!< [in,out]
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         ) const;
 
@@ -765,7 +772,7 @@ public:
             const label comm = UPstream::worldComm,  //!< communicator
 
             // For fallback routines:
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const int tag = UPstream::msgType()
         ) const;
 
@@ -777,13 +784,14 @@ public:
         //
         //  \return output (master), zero-sized on non-master
         template<class Type, class OutputContainer = List<Type>>
+        [[nodiscard]]
         OutputContainer mpiGather
         (
             const UList<Type>& sendData,
             const label comm = UPstream::worldComm,  //!< communicator
 
             // For fallback routines:
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const int tag = UPstream::msgType()
         ) const;
 
@@ -803,7 +811,7 @@ public:
             const label comm = UPstream::worldComm,  //!< communicator
 
             // For fallback routines:
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const int tag = UPstream::msgType()
         ) const;
 
@@ -823,7 +831,7 @@ public:
             const label comm = UPstream::worldComm,  //!< communicator
 
             // For fallback routines:
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const int tag = UPstream::msgType()
         );
 
@@ -833,13 +841,14 @@ public:
         //
         //  \return output (master), zero-sized on non-master
         template<class Type, class OutputContainer = List<Type>>
+        [[nodiscard]]
         static OutputContainer mpiGatherOp
         (
             const UList<Type>& sendData,
             const label comm = UPstream::worldComm,  //!< communicator
 
             // For fallback routines:
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const int tag = UPstream::msgType()
         );
 
@@ -857,7 +866,7 @@ public:
             const label comm = UPstream::worldComm,  //!< communicator
 
             // For fallback routines:
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const int tag = UPstream::msgType()
         );
 
@@ -871,7 +880,7 @@ public:
             //! [out] output on master, zero-sized on non-master
             List<Type>& allData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         );
 
@@ -885,7 +894,7 @@ public:
             //! [out] output on master, zero-sized on non-master
             List<Type>& allData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         );
 
@@ -895,11 +904,12 @@ public:
         //
         //  \return output (master), zero-sized on non-master
         template<class Type, class OutputContainer = List<Type>>
+        [[nodiscard]]
         static OutputContainer gatherOp
         (
             const UList<Type>& sendData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         );
 
@@ -909,11 +919,12 @@ public:
         //
         //  \return output (master), zero-sized on non-master
         template<class Type, class Addr, class OutputContainer = List<Type>>
+        [[nodiscard]]
         static OutputContainer gatherOp
         (
             const IndirectListBase<Type, Addr>& sendData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         );
 
@@ -928,7 +939,7 @@ public:
             //! [in,out]
             List<Type>& fld,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         );
 
@@ -947,7 +958,7 @@ public:
             const UList<Type>& allFld,
             UList<Type>& fld,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
         );
 
         //- Distribute data in processor order.
@@ -962,11 +973,10 @@ public:
             const UList<Type>& allFld,
             UList<Type>& fld,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes ct =
-                UPstream::commsTypes::nonBlocking
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking
         ) const
         {
-            scatter(offsets_, comm, procIDs, allFld, fld, tag, ct);
+            scatter(offsets_, comm, procIDs, allFld, fld, tag, commsType);
         }
 
         //- Distribute data in processor order.
@@ -979,7 +989,7 @@ public:
             const UList<Type>& allData,
             UList<Type>& localData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         ) const;
 
@@ -988,11 +998,12 @@ public:
         //  Communication with default/specified communicator, message tag.
         //  \note the globalIndex offsets needed on master only.
         template<class Type, class OutputContainer = List<Type>>
+        [[nodiscard]]
         OutputContainer scatter
         (
             const UList<Type>& allData,
             const int tag = UPstream::msgType(),
-            const UPstream::commsTypes = UPstream::commsTypes::nonBlocking,
+            UPstream::commsTypes commsType = UPstream::commsTypes::nonBlocking,
             const label comm = UPstream::worldComm  //!< communicator
         ) const;
 
diff --git a/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C b/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C
index 75bf7d4a93baa6c345401e591737b7123e5bd723..6c3c46397d8ad6a26f7a945612996d6604a10483 100644
--- a/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C
+++ b/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C
@@ -32,22 +32,22 @@ License
 
 // Cannot use non-blocking for non-contiguous data.
 // template<class Type>
-// inline Foam::UPstream::commsTypes getCommsType
+// inline Foam::UPstream::commsTypes Foam::globalIndex::getCommsType
 // (
-//     const UPstream::commsTypes preferred
-// )
+//     UPstream::commsTypes commsType
+// ) noexcept
 // {
-//     return
-//     (
-//         (
-//             !is_contiguous_v<Type>
-//          && UPstream::commsTypes::nonBlocking == preferred
-//         )
-//       ? UPstream::commsTypes::scheduled
-//       : preferred
-//     );
+//     if constexpr (!is_contiguous_v<Type>)
+//     {
+//         return UPstream::commsTypes::scheduled;
+//     }
+//     else
+//     {
+//         return commsType;
+//     }
 // }
 
+// Helpers
 
 template<class Addr>
 Foam::labelList
@@ -121,33 +121,38 @@ Foam::globalIndex::calcListOffsets
 }
 
 
+// Low-level
+
 template<class ProcIDsContainer, class Type>
-void Foam::globalIndex::gatherValues
+Foam::List<Type> Foam::globalIndex::listGatherValues
 (
     const label comm,
     const ProcIDsContainer& procIDs,
     const Type& localValue,
-    List<Type>& allValues,
     const int tag,
-    const UPstream::commsTypes preferredCommsType
+    UPstream::commsTypes commsType
 )
 {
-    // low-level: no parRun guard
+    // low-level: no parRun guard?
+    const int masterProci = (procIDs.empty() ? 0 : procIDs[0]);
 
-    // Cannot use non-blocking for non-contiguous data.
-    const UPstream::commsTypes commsType =
-    (
-        (
-            !is_contiguous_v<Type>
-         && UPstream::commsTypes::nonBlocking == preferredCommsType
-        )
-      ? UPstream::commsTypes::scheduled
-      : preferredCommsType
-    );
+    // if (!UPstream::is_parallel(comm))
+    // {
+    //     List<Type> allValues(1);
+    //     allValues[0] = localValue;
+    //     return allValues;
+    // }
 
-    const label startOfRequests = UPstream::nRequests();
+    List<Type> allValues;
 
-    const int masterProci = procIDs.size() ? procIDs[0] : 0;
+    // Cannot use non-blocking for non-contiguous data
+    if constexpr (!is_contiguous_v<Type>)
+    {
+        commsType = UPstream::commsTypes::scheduled;
+    }
+
+
+    const label startOfRequests = UPstream::nRequests();
 
     if (UPstream::myProcNo(comm) == masterProci)
     {
@@ -176,8 +181,6 @@ void Foam::globalIndex::gatherValues
     }
     else
     {
-        allValues.clear();  // safety: zero-size on non-master
-
         if constexpr (is_contiguous_v<Type>)
         {
             UOPstream::write
@@ -196,11 +199,10 @@ void Foam::globalIndex::gatherValues
         }
     }
 
-    if (commsType == UPstream::commsTypes::nonBlocking)
-    {
-        // Wait for outstanding requests
-        UPstream::waitRequests(startOfRequests);
-    }
+    // Process sync
+    UPstream::waitRequests(startOfRequests);
+
+    return allValues;
 }
 
 
@@ -211,45 +213,37 @@ void Foam::globalIndex::gather
     const label comm,
     const ProcIDsContainer& procIDs,
     const UList<Type>& fld,
-    List<Type>& allFld,
+    UList<Type>& allFld,    // must be adequately sized on master
     const int tag,
-    const UPstream::commsTypes preferredCommsType
+    UPstream::commsTypes commsType
 )
 {
     // low-level: no parRun guard
+    const int masterProci = (procIDs.empty() ? 0 : procIDs[0]);
 
-    // Cannot use non-blocking for non-contiguous data.
-    const UPstream::commsTypes commsType =
-    (
-        (
-            !is_contiguous_v<Type>
-         && UPstream::commsTypes::nonBlocking == preferredCommsType
-        )
-      ? UPstream::commsTypes::scheduled
-      : preferredCommsType
-    );
+    // Cannot use non-blocking for non-contiguous data
+    if constexpr (!is_contiguous_v<Type>)
+    {
+        commsType = UPstream::commsTypes::scheduled;
+    }
 
     const label startOfRequests = UPstream::nRequests();
 
-    const int masterProci = procIDs.size() ? procIDs[0] : 0;
-
     if (UPstream::myProcNo(comm) == masterProci)
     {
-        allFld.resize_nocopy(off.back());  // == totalSize()
-
-        // Assign my local data - respect offset information
-        // so that we can request 0 entries to be copied.
-        // Also handle the case where we have a slice of the full
-        // list.
-
-        SubList<Type>(allFld, off[1]-off[0], off[0]) =
-           SubList<Type>(fld, off[1]-off[0]);
+        if (FOAM_UNLIKELY(allFld.size() < off.back()))  // ie, totalSize()
+        {
+            FatalErrorInFunction
+                << "[out] UList size=" << allFld.size()
+                << " too small to receive " << off.back() << nl
+                << Foam::abort(FatalError);
+        }
 
         for (label i = 1; i < procIDs.size(); ++i)
         {
-            SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
+            SubList<Type> slot(allFld, off[i+1]-off[i], off[i]);
 
-            if (procSlot.empty())
+            if (slot.empty())
             {
                 // Nothing to do
             }
@@ -259,14 +253,28 @@ void Foam::globalIndex::gather
                 (
                     commsType,
                     procIDs[i],
-                    procSlot,
+                    slot,
                     tag,
                     comm
                 );
             }
             else
             {
-                IPstream::recv(procSlot, procIDs[i], tag, comm);
+                IPstream::recv(slot, procIDs[i], tag, comm);
+            }
+        }
+
+        // Assign my local data - respect offset information
+        // so that we can request 0 entries to be copied.
+        // Also handle the case where we have a slice of the full
+        // list.
+        {
+            SubList<Type> dst(allFld, off[1]-off[0], off[0]);
+            SubList<Type> src(fld, off[1]-off[0]);
+
+            if (!dst.empty() && (dst.data() != src.data()))
+            {
+                dst = src;
             }
         }
     }
@@ -293,11 +301,8 @@ void Foam::globalIndex::gather
         }
     }
 
-    if (commsType == UPstream::commsTypes::nonBlocking)
-    {
-        // Wait for outstanding requests
-        UPstream::waitRequests(startOfRequests);
-    }
+    // Process sync
+    UPstream::waitRequests(startOfRequests);
 }
 
 
@@ -308,71 +313,74 @@ void Foam::globalIndex::gather
     const label comm,
     const ProcIDsContainer& procIDs,
     const IndirectListBase<Type, Addr>& fld,
-    List<Type>& allFld,
+    UList<Type>& allFld,    // must be adequately sized on master
     const int tag,
-    const UPstream::commsTypes preferredCommsType
+    UPstream::commsTypes commsType
 )
 {
     // low-level: no parRun guard
+    const int masterProci = (procIDs.empty() ? 0 : procIDs[0]);
 
     if constexpr (is_contiguous_v<Type>)
     {
-        // Flatten list (locally) so that we can benefit from using direct
-        // read/write of contiguous data
+        if (commsType == UPstream::commsTypes::nonBlocking)
+        {
+            // Contiguous data and requested nonBlocking.
+            //
+            // Flatten list (locally) so that we can benefit from using
+            // direct read/write of contiguous data
 
-        gather
-        (
-            off,
-            comm,
-            procIDs,
-            List<Type>(fld),
-            allFld,
-            tag,
-            preferredCommsType
-        );
-        return;
-    }
+            List<Type> flattened(fld);
 
-    // Cannot use non-blocking for non-contiguous data.
-    const UPstream::commsTypes commsType =
-    (
-        (
-            !is_contiguous_v<Type>
-         && UPstream::commsTypes::nonBlocking == preferredCommsType
-        )
-      ? UPstream::commsTypes::scheduled
-      : preferredCommsType
-    );
+            gather
+            (
+                off,
+                comm,
+                procIDs,
+                flattened,
+                allFld,
+                tag,
+                commsType
+            );
+            return;
+        }
+    }
 
-    const label startOfRequests = UPstream::nRequests();
 
-    const int masterProci = procIDs.size() ? procIDs[0] : 0;
+    // Non-contiguous is always non-blocking
 
     if (UPstream::myProcNo(comm) == masterProci)
     {
-        allFld.resize_nocopy(off.back());  // == totalSize()
-
-        // Assign my local data - respect offset information
-        // so that we can request 0 entries to be copied
-
-        SubList<Type> localSlot(allFld, off[1]-off[0], off[0]);
-        if (!localSlot.empty())
+        if (FOAM_UNLIKELY(allFld.size() < off.back()))  // ie, totalSize()
         {
-            localSlot = fld;
+            FatalErrorInFunction
+                << "[out] UList size=" << allFld.size()
+                << " too small to receive " << off.back() << nl
+                << Foam::abort(FatalError);
         }
 
-        // Already verified commsType != nonBlocking
         for (label i = 1; i < procIDs.size(); ++i)
         {
-            SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
+            SubList<Type> slot(allFld, off[i+1]-off[i], off[i]);
 
-            if (procSlot.empty())
+            if (slot.empty())
             {
                 // Nothing to do
             }
             else
             {
-                IPstream::recv(procSlot, procIDs[i], tag, comm);
+                IPstream::recv(slot, procIDs[i], tag, comm);
+            }
+        }
+
+        // Assign my local data - respect offset information
+        // so that we can request 0 entries to be copied
+        {
+            SubList<Type> dst(allFld, off[1]-off[0], off[0]);
+
+            if (!dst.empty() && (dst.size() == fld.size()))
+            {
+                dst.deepCopy(fld);
             }
         }
     }
@@ -384,18 +392,84 @@ void Foam::globalIndex::gather
         }
         else
         {
-            OPstream::send(fld, commsType, masterProci, tag, comm);
+            OPstream::send(fld, masterProci, tag, comm);
         }
     }
+}
 
-    if (commsType == UPstream::commsTypes::nonBlocking)
+
+template<class ProcIDsContainer, class Type>
+void Foam::globalIndex::gatherInplace
+(
+    const labelUList& off,  // needed on master only
+    const label comm,
+    const ProcIDsContainer& procIDs,
+    List<Type>& fld,
+    const int tag,
+    UPstream::commsTypes commsType
+)
+{
+    if (!UPstream::is_parallel(comm))
+    {
+        // Serial: (no-op)
+        return;
+    }
+
+    const bool master =
+    (
+        UPstream::myProcNo(comm) == (procIDs.empty() ? 0 : procIDs[0])
+    );
+
+    List<Type> allData;
+    if (master)
     {
-        // Wait for outstanding requests
-        UPstream::waitRequests(startOfRequests);
+        allData.resize_nocopy(off.back());  // == totalSize()
+    }
+
+    globalIndex::gather(off, comm, procIDs, fld, allData, tag, commsType);
+
+    if (master)
+    {
+        fld = std::move(allData);
+    }
+    else
+    {
+        fld.clear();  // zero-size on non-master
     }
 }
 
 
+template<class ProcIDsContainer, class Type>
+void Foam::globalIndex::gather
+(
+    const label comm,
+    const ProcIDsContainer& procIDs,
+    const UList<Type>& fld,
+    List<Type>& allData,
+    const int tag,
+    UPstream::commsTypes commsType
+) const
+{
+    if (!UPstream::is_parallel(comm))
+    {
+        // Serial: (no-op)
+        return;
+    }
+
+    if (UPstream::myProcNo(comm) == (procIDs.empty() ? 0 : procIDs[0]))
+    {
+        // presize => totalSize()
+        allData.resize_nocopy(offsets_.back());
+    }
+    else
+    {
+        allData.clear();  // zero-size on non-master
+    }
+
+    globalIndex::gather(offsets_, comm, procIDs, fld, allData, tag, commsType);
+}
+
+
 // * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
 
 template<class Type>
@@ -404,7 +478,7 @@ void Foam::globalIndex::gather
     const UList<Type>& sendData,
     List<Type>& allData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 ) const
 {
@@ -415,6 +489,16 @@ void Foam::globalIndex::gather
         return;
     }
 
+    if (UPstream::master(comm))
+    {
+        allData.resize_nocopy(offsets_.back());  // == totalSize()
+    }
+    else
+    {
+        allData.clear();  // zero-size on non-master
+    }
+
+    if (!UPstream::usingNodeComms(comm))
     {
         globalIndex::gather
         (
@@ -426,9 +510,61 @@ void Foam::globalIndex::gather
             tag,
             commsType
         );
-        if (!UPstream::master(comm))
+    }
+    else
+    {
+        // Using node-based hierarchy
+
+        // Using comm-world and have node communication active
+        const auto interNodeComm = UPstream::commInterNode();
+        const auto localNodeComm = UPstream::commLocalNode();
+
+        // Stage 0 : The inter-node/intra-node offsets
+        labelList interNodeOffsets;
+        labelList localNodeOffsets;
+        this->splitNodeOffsets(interNodeOffsets, localNodeOffsets, comm);
+
+        // The first node re-uses the output (allData) when collecting
+        // content. All other nodes require temporary node-local storage.
+
+        List<Type> tmpNodeData;
+        if (UPstream::is_subrank(interNodeComm))
+        {
+            tmpNodeData.resize(localNodeOffsets.back());
+        }
+
+        List<Type>& nodeData =
+        (
+            UPstream::master(interNodeComm) ? allData : tmpNodeData
+        );
+
+        // Stage 1 : Gather data within the node
+        {
+            globalIndex::gather
+            (
+                localNodeOffsets,  // (master only)
+                localNodeComm,
+                UPstream::allProcs(localNodeComm),
+                sendData,
+                nodeData,  // node-local dest (or the allData parameter)
+                tag,
+                commsType
+            );
+        }
+
+        // Stage 2 : Gather data between nodes
+        if (UPstream::is_rank(interNodeComm))
         {
-            allData.clear();  // safety: zero-size on non-master
+            globalIndex::gather
+            (
+                interNodeOffsets,  // (master only)
+                interNodeComm,
+                UPstream::allProcs(interNodeComm),
+                nodeData,
+                allData,
+                tag,
+                commsType
+            );
         }
     }
 }
@@ -440,7 +576,7 @@ void Foam::globalIndex::gather
     const IndirectListBase<Type, Addr>& sendData,
     List<Type>& allData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 ) const
 {
@@ -450,6 +586,37 @@ void Foam::globalIndex::gather
         allData = sendData;
         return;
     }
+    else if constexpr (is_contiguous_v<Type>)
+    {
+        if (commsType == UPstream::commsTypes::nonBlocking)
+        {
+            // Contiguous data and requested nonBlocking.
+            //
+            // Flatten list (locally) so that we can benefit from using
+            // direct read/write of contiguous data
+
+            List<Type> flattened(sendData);
+
+            this->gather
+            (
+                flattened,
+                allData,
+                tag,
+                commsType,
+                comm
+            );
+            return;
+        }
+    }
+
+    if (UPstream::master(comm))
+    {
+        allData.resize_nocopy(offsets_.back());  // == totalSize()
+    }
+    else
+    {
+        allData.clear();  // zero-size on non-master
+    }
 
     {
         globalIndex::gather
@@ -462,10 +629,6 @@ void Foam::globalIndex::gather
             tag,
             commsType
         );
-        if (!UPstream::master(comm))
-        {
-            allData.clear();  // safety: zero-size on non-master
-        }
     }
 }
 
@@ -475,12 +638,12 @@ OutputContainer Foam::globalIndex::gather
 (
     const UList<Type>& sendData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 ) const
 {
     OutputContainer allData;
-    gather(sendData, allData, tag, commsType, comm);
+    this->gather(sendData, allData, tag, commsType, comm);
     return allData;
 }
 
@@ -490,12 +653,12 @@ OutputContainer Foam::globalIndex::gather
 (
     const IndirectListBase<Type, Addr>& sendData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 ) const
 {
     OutputContainer allData;
-    gather(sendData, allData, tag, commsType, comm);
+    this->gather(sendData, allData, tag, commsType, comm);
     return allData;
 }
 
@@ -505,18 +668,18 @@ void Foam::globalIndex::gatherInplace
 (
     List<Type>& fld,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 ) const
 {
     if (UPstream::parRun())
     {
         List<Type> allData;
-        gather(fld, allData, tag, commsType, comm);
+        this->gather(fld, allData, tag, commsType, comm);
 
         if (UPstream::master(comm))
         {
-            fld.transfer(allData);
+            fld = std::move(allData);
         }
         else
         {
@@ -533,8 +696,7 @@ void Foam::globalIndex::mpiGather
     const UList<Type>& sendData,
     OutputContainer& allData,
     const label comm,
-
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const int tag
 ) const
 {
@@ -696,8 +858,7 @@ OutputContainer Foam::globalIndex::mpiGather
 (
     const UList<Type>& sendData,
     const label comm,
-
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const int tag
 ) const
 {
@@ -712,8 +873,7 @@ void Foam::globalIndex::mpiGatherInplace
 (
     List<Type>& fld,
     const label comm,
-
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const int tag
 ) const
 {
@@ -724,7 +884,7 @@ void Foam::globalIndex::mpiGatherInplace
 
         if (UPstream::master(comm))
         {
-            fld.transfer(allData);
+            fld = std::move(allData);
         }
         else
         {
@@ -741,8 +901,7 @@ void Foam::globalIndex::mpiGatherOp
     const UList<Type>& sendData,
     OutputContainer& allData,
     const label comm,
-
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const int tag
 )
 {
@@ -765,8 +924,7 @@ OutputContainer Foam::globalIndex::mpiGatherOp
 (
     const UList<Type>& sendData,
     const label comm,
-
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const int tag
 )
 {
@@ -781,8 +939,7 @@ void Foam::globalIndex::mpiGatherInplaceOp
 (
     List<Type>& fld,
     const label comm,
-
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const int tag
 )
 {
@@ -793,7 +950,7 @@ void Foam::globalIndex::mpiGatherInplaceOp
 
         if (UPstream::master(comm))
         {
-            fld.transfer(allData);
+            fld = std::move(allData);
         }
         else
         {
@@ -810,7 +967,7 @@ void Foam::globalIndex::gatherOp
     const UList<Type>& sendData,
     List<Type>& allData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 )
 {
@@ -834,7 +991,7 @@ void Foam::globalIndex::gatherOp
     const IndirectListBase<Type, Addr>& sendData,
     List<Type>& allData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 )
 {
@@ -857,7 +1014,7 @@ OutputContainer Foam::globalIndex::gatherOp
 (
     const UList<Type>& sendData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 )
 {
@@ -872,7 +1029,7 @@ OutputContainer Foam::globalIndex::gatherOp
 (
     const IndirectListBase<Type, Addr>& sendData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 )
 {
@@ -887,7 +1044,7 @@ void Foam::globalIndex::gatherInplaceOp
 (
     List<Type>& fld,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 )
 {
@@ -910,33 +1067,28 @@ void Foam::globalIndex::scatter
     const UList<Type>& allFld,
     UList<Type>& fld,
     const int tag,
-    const UPstream::commsTypes preferredCommsType
+    UPstream::commsTypes commsType
 )
 {
     // low-level: no parRun guard
+    const int masterProci = (procIDs.empty() ? 0 : procIDs[0]);
 
-    // Cannot use non-blocking for non-contiguous data.
-    const UPstream::commsTypes commsType =
-    (
-        (
-            !is_contiguous_v<Type>
-         && UPstream::commsTypes::nonBlocking == preferredCommsType
-        )
-      ? UPstream::commsTypes::scheduled
-      : preferredCommsType
-    );
+    // Cannot use non-blocking for non-contiguous data
+    if constexpr (!is_contiguous_v<Type>)
+    {
+        commsType = UPstream::commsTypes::scheduled;
+    }
 
-    const label startOfRequests = UPstream::nRequests();
 
-    const int masterProci = procIDs.size() ? procIDs[0] : 0;
+    const label startOfRequests = UPstream::nRequests();
 
     if (UPstream::myProcNo(comm) == masterProci)
     {
         for (label i = 1; i < procIDs.size(); ++i)
         {
-            const SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
+            const SubList<Type> slot(allFld, off[i+1]-off[i], off[i]);
 
-            if (procSlot.empty())
+            if (slot.empty())
             {
                 // Nothing to do
             }
@@ -946,14 +1098,14 @@ void Foam::globalIndex::scatter
                 (
                     commsType,
                     procIDs[i],
-                    procSlot,
+                    slot,
                     tag,
                     comm
                 );
             }
             else
             {
-                OPstream::send(procSlot, commsType, procIDs[i], tag, comm);
+                OPstream::send(slot, commsType, procIDs[i], tag, comm);
             }
         }
 
@@ -962,8 +1114,15 @@ void Foam::globalIndex::scatter
         // Also handle the case where we have a slice of the full
         // list.
 
-        SubList<Type>(fld, off[1]-off[0]) =
-            SubList<Type>(allFld, off[1]-off[0], off[0]);
+        {
+            SubList<Type> dst(fld, off[1]-off[0]);
+            SubList<Type> src(allFld, off[1]-off[0], off[0]);
+
+            if (!dst.empty() && (dst.data() != src.data()))
+            {
+                dst = src;
+            }
+        }
     }
     else
     {
@@ -992,11 +1151,8 @@ void Foam::globalIndex::scatter
         }
     }
 
-    if (commsType == UPstream::commsTypes::nonBlocking)
-    {
-        // Wait for outstanding requests
-        UPstream::waitRequests(startOfRequests);
-    }
+    // Process sync
+    UPstream::waitRequests(startOfRequests);
 }
 
 
@@ -1006,7 +1162,7 @@ void Foam::globalIndex::scatter
     const UList<Type>& allData,
     UList<Type>& localData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 ) const
 {
@@ -1037,7 +1193,7 @@ OutputContainer Foam::globalIndex::scatter
 (
     const UList<Type>& allData,
     const int tag,
-    const UPstream::commsTypes commsType,
+    UPstream::commsTypes commsType,
     const label comm
 ) const
 {
@@ -1051,7 +1207,8 @@ OutputContainer Foam::globalIndex::scatter
             UPstream::listScatterValues<label>(this->localSizes(), comm)
         );
 
-        OutputContainer localData(count);
+        OutputContainer localData;
+        localData.resize(count);
         this->scatter(allData, localData, tag, commsType, comm);
 
         return localData;
diff --git a/src/Pstream/dummy/UPstream.C b/src/Pstream/dummy/UPstream.C
index c935914db63e1da9dd74ba905e662133cac2dcf8..50cf97f6071b64310a52ffa9d3096d6dc810a1f4 100644
--- a/src/Pstream/dummy/UPstream.C
+++ b/src/Pstream/dummy/UPstream.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2018 OpenFOAM Foundation
-    Copyright (C) 2016-2023 OpenCFD Ltd.
+    Copyright (C) 2016-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -55,6 +55,12 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
 }
 
 
+bool Foam::UPstream::setSharedMemoryCommunicators()
+{
+    return false;
+}
+
+
 void Foam::UPstream::shutdown(int errNo)
 {}
 
@@ -66,7 +72,7 @@ void Foam::UPstream::exit(int errNo)
 }
 
 
-void Foam::UPstream::abort()
+void Foam::UPstream::abort(int errNo)
 {
     // No MPI - just abort
     std::abort();
@@ -77,13 +83,29 @@ void Foam::UPstream::abort()
 
 void Foam::UPstream::allocateCommunicatorComponents
 (
-    const label,
-    const label
+    const label parentIndex,
+    const label index
+)
+{}
+
+
+void Foam::UPstream::dupCommunicatorComponents
+(
+    const label parentIndex,
+    const label index
+)
+{}
+
+void Foam::UPstream::splitCommunicatorComponents
+(
+    const label parentIndex,
+    const label index,
+    int colour
 )
 {}
 
 
-void Foam::UPstream::freeCommunicatorComponents(const label)
+void Foam::UPstream::freeCommunicatorComponents(const label index)
 {}
 
 
diff --git a/src/Pstream/mpi/PstreamGlobals.C b/src/Pstream/mpi/PstreamGlobals.C
index 0da82d704363812d12a49c840dd31b8b8b69be70..e5383b722e0831fbb0c247de998666ef6e1d6ce0 100644
--- a/src/Pstream/mpi/PstreamGlobals.C
+++ b/src/Pstream/mpi/PstreamGlobals.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2013-2015 OpenFOAM Foundation
-    Copyright (C) 2023 OpenCFD Ltd.
+    Copyright (C) 2023-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -35,22 +35,27 @@ Foam::DynamicList<MPI_Comm> Foam::PstreamGlobals::MPICommunicators_;
 Foam::DynamicList<MPI_Request> Foam::PstreamGlobals::outstandingRequests_;
 
 
-// * * * * * * * * * * * * * * * Global Functions  * * * * * * * * * * * * * //
+// * * * * * * * * * * * * * * * Communicators * * * * * * * * * * * * * * * //
 
-void Foam::PstreamGlobals::checkCommunicator
-(
-    const label comm,
-    const label toProcNo
-)
+void Foam::PstreamGlobals::initCommunicator(const label index)
 {
-    if (comm < 0 || comm >= PstreamGlobals::MPICommunicators_.size())
+    if (FOAM_UNLIKELY(index < 0 || index > MPICommunicators_.size()))
     {
         FatalErrorInFunction
-            << "toProcNo:" << toProcNo << " : illegal communicator "
-            << comm << nl
-            << "Communicator should be within range [0,"
-            << PstreamGlobals::MPICommunicators_.size()
-            << ')' << abort(FatalError);
+            << "PstreamGlobals out of sync with UPstream data. Problem."
+            << Foam::abort(FatalError);
+    }
+    else if (index == MPICommunicators_.size())
+    {
+        // Extend storage with null values
+        pendingMPIFree_.emplace_back(false);
+        MPICommunicators_.emplace_back(MPI_COMM_NULL);
+    }
+    else
+    {
+        // Init with null values
+        pendingMPIFree_[index] = false;
+        MPICommunicators_[index] = MPI_COMM_NULL;
     }
 }
 
diff --git a/src/Pstream/mpi/PstreamGlobals.H b/src/Pstream/mpi/PstreamGlobals.H
index c912c9876f217db227f439ed1bbe0b79a4c20ab2..48753956dbe44e773d32f7b648def8bf2744d034 100644
--- a/src/Pstream/mpi/PstreamGlobals.H
+++ b/src/Pstream/mpi/PstreamGlobals.H
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2013-2015 OpenFOAM Foundation
-    Copyright (C) 2022-2023 OpenCFD Ltd.
+    Copyright (C) 2022-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -50,6 +50,8 @@ namespace Foam
 namespace PstreamGlobals
 {
 
+// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
+
 // Track if MPI_Comm_free is needed for communicator index in MPICommunicators_
 extern DynamicList<bool> pendingMPIFree_;
 
@@ -61,11 +63,27 @@ extern DynamicList<MPI_Comm> MPICommunicators_;
 extern DynamicList<MPI_Request> outstandingRequests_;
 
 
-// * * * * * * * * * * * * * * * Global Functions  * * * * * * * * * * * * * //
+// * * * * * * * * * * * * * * * Communicators * * * * * * * * * * * * * * * //
+
+//- Initialize bookkeeping for MPI communicator index
+void initCommunicator(const label index);
+
+//- Fatal if communicator is outside the allocated range
+inline void checkCommunicator(int comm, int rank)
+{
+    if (FOAM_UNLIKELY(comm < 0 || comm >= MPICommunicators_.size()))
+    {
+        FatalErrorInFunction
+            << "rank:" << rank << " : illegal communicator "
+            << comm << nl
+            << "Communicator should be within range [0,"
+            << MPICommunicators_.size()
+            << ')' << Foam::abort(FatalError);
+    }
+}
 
-//- Fatal if comm is outside the allocated range
-void checkCommunicator(const label comm, const label toProcNo);
 
+// * * * * * * * * * * * * * * * * Requests  * * * * * * * * * * * * * * * * //
 
 //- Reset UPstream::Request to null and/or the index of the outstanding
 //- request to -1.
diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C
index 5314d8027aff1e82f0525f6ba06b3d3ea03cd8c2..91391c8442d246a6664ed7394f95132960fa744a 100644
--- a/src/Pstream/mpi/UPstream.C
+++ b/src/Pstream/mpi/UPstream.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2017 OpenFOAM Foundation
-    Copyright (C) 2016-2024 OpenCFD Ltd.
+    Copyright (C) 2016-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -26,14 +26,13 @@ License
 
 \*---------------------------------------------------------------------------*/
 
-#include "Pstream.H"
-#include "PstreamReduceOps.H"
+#include "UPstream.H"
 #include "PstreamGlobals.H"
 #include "profilingPstream.H"
-#include "int.H"
 #include "UPstreamWrapping.H"
 #include "collatedFileOperation.H"
 
+#include <algorithm>
 #include <cstdlib>
 #include <cstring>
 #include <memory>
@@ -197,9 +196,8 @@ bool Foam::UPstream::initNull()
 
 bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
 {
-    int numprocs = 0, myRank = 0;
-    int provided_thread_support = 0;
     int flag = 0;
+    int provided_thread_support = 0;
 
     MPI_Finalized(&flag);
     if (flag)
@@ -231,19 +229,25 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
         {
             Perr<< "UPstream::init : was already initialized\n";
         }
+
+        MPI_Query_thread(&provided_thread_support);
     }
     else
     {
+        // (SINGLE | FUNNELED | SERIALIZED | MULTIPLE)
+        int required_thread_support =
+        (
+            needsThread
+          ? MPI_THREAD_MULTIPLE
+          : MPI_THREAD_SINGLE
+        );
+
         MPI_Init_thread
         (
             &argc,
             &argv,
-            (
-                needsThread
-              ? MPI_THREAD_MULTIPLE
-              : MPI_THREAD_SINGLE
-            ),
-            &provided_thread_support
+            required_thread_support,
+           &provided_thread_support
         );
 
         ourMpi = true;
@@ -251,26 +255,26 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
 
     // Check argument list for local world
     label worldIndex = -1;
-    word world;
     for (int argi = 1; argi < argc; ++argi)
     {
         if (strcmp(argv[argi], "-world") == 0)
         {
-            worldIndex = argi++;
-            if (argi >= argc)
+            worldIndex = argi;
+            if (argi+1 >= argc)
             {
                 FatalErrorInFunction
-                    << "Missing world name to argument \"world\""
+                    << "Missing world name for option '-world'" << nl
                     << Foam::abort(FatalError);
             }
-            world = argv[argi];
             break;
         }
     }
 
-    // Filter 'world' option
+    // Extract world name and filter out '-world <name>' from argv list
+    word worldName;
     if (worldIndex != -1)
     {
+        worldName = argv[worldIndex+1];
         for (label i = worldIndex+2; i < argc; i++)
         {
             argv[i-2] = argv[i];
@@ -278,14 +282,15 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
         argc -= 2;
     }
 
-    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
-    MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
+    int numProcs = 0, globalRanki = 0;
+    MPI_Comm_rank(MPI_COMM_WORLD, &globalRanki);
+    MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
 
     if (UPstream::debug)
     {
         Perr<< "UPstream::init :"
             << " thread-support : requested:" << needsThread
-            << " obtained:"
+            << " provided:"
             << (
                    (provided_thread_support == MPI_THREAD_SINGLE)
                  ? "SINGLE"
@@ -295,12 +300,12 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
                  ? "MULTIPLE"
                  : "other"
                )
-            << " procs:" << numprocs
-            << " rank:" << myRank
-            << " world:" << world << endl;
+            << " procs:" << numProcs
+            << " rank:" << globalRanki
+            << " world:" << worldName << endl;
     }
 
-    if (worldIndex == -1 && numprocs <= 1)
+    if (worldIndex == -1 && numProcs <= 1)
     {
         FatalErrorInFunction
             << "attempt to run parallel on 1 processor"
@@ -308,46 +313,78 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
     }
 
     // Initialise parallel structure
-    setParRun(numprocs, provided_thread_support == MPI_THREAD_MULTIPLE);
+    setParRun(numProcs, provided_thread_support == MPI_THREAD_MULTIPLE);
 
     if (worldIndex != -1)
     {
+        // Using local worlds.
         // During startup, so commWorld() == commGlobal()
+        const auto mpiGlobalComm =
+            PstreamGlobals::MPICommunicators_[UPstream::commGlobal()];
 
-        wordList worlds(numprocs);
-        worlds[UPstream::myProcNo(UPstream::commGlobal())] = world;
-        Pstream::gatherList
-        (
-            worlds,
-            UPstream::msgType(),
-            UPstream::commGlobal()
-        );
+        // Gather the names of all worlds and determine unique names/indices.
+        //
+        // Minimize communication and use low-level MPI to relying on any
+        // OpenFOAM structures which not yet have been created
 
-        // Compact
-        if (UPstream::master(UPstream::commGlobal()))
         {
-            DynamicList<word> worldNames(numprocs);
-            worldIDs_.resize_nocopy(numprocs);
+            // Include a trailing nul character in the lengths
+            int stride = int(worldName.size()) + 1;
+
+            // Use identical size on all ranks (avoids MPI_Allgatherv)
+            MPI_Allreduce
+            (
+                MPI_IN_PLACE,
+               &stride,
+                1,
+                MPI_INT,
+                MPI_MAX,
+                mpiGlobalComm
+            );
 
-            forAll(worlds, proci)
+            // Gather as an extended C-string with embedded nul characters
+            auto buffer_storage = std::make_unique<char[]>(numProcs*stride);
+            char* allStrings = buffer_storage.get();
+
+            // Fill in local value, slot starts at (rank*stride)
             {
-                const word& world = worlds[proci];
+                char* slot = (allStrings + (globalRanki*stride));
+                std::fill_n(slot, stride, '\0');
+                std::copy_n(worldName.data(), worldName.size(), slot);
+            }
 
-                worldIDs_[proci] = worldNames.find(world);
+            // Gather everything into the extended C-string
+            MPI_Allgather
+            (
+                MPI_IN_PLACE, 0, MPI_CHAR,
+                allStrings, stride, MPI_CHAR,
+                mpiGlobalComm
+            );
+
+            worldIDs_.resize_nocopy(numProcs);
+
+            // Transcribe and compact (unique world names)
+            DynamicList<word> uniqWorlds(numProcs);
+
+            for (label proci = 0; proci < numProcs; ++proci)
+            {
+                // Create from C-string at slot=(rank*stride),
+                // relying on the embedded nul chars
+                word world(allStrings + (proci*stride));
+
+                worldIDs_[proci] = uniqWorlds.find(world);
 
                 if (worldIDs_[proci] == -1)
                 {
-                    worldIDs_[proci] = worldNames.size();
-                    worldNames.push_back(world);
+                    worldIDs_[proci] = uniqWorlds.size();
+                    uniqWorlds.push_back(std::move(world));
                 }
             }
 
-            allWorlds_.transfer(worldNames);
+            allWorlds_ = std::move(uniqWorlds);
         }
-        Pstream::broadcasts(UPstream::commGlobal(), allWorlds_, worldIDs_);
 
-        const label myWorldId =
-            worldIDs_[UPstream::myProcNo(UPstream::commGlobal())];
+        const label myWorldId = worldIDs_[globalRanki];
 
         DynamicList<label> subRanks;
         forAll(worldIDs_, proci)
@@ -358,54 +395,107 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
             }
         }
 
-        // Allocate new communicator with comm-global as its parent
-        const label subComm =
-            UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks);
+        // New local-world communicator with comm-global as its parent.
+        // - the updated (const) world comm does not change after this.
+
+        UPstream::constWorldComm_ =
+            UPstream::newCommunicator(UPstream::commGlobal(), subRanks);
 
+        UPstream::worldComm = UPstream::constWorldComm_;
+        UPstream::warnComm = UPstream::constWorldComm_;
 
-        // Override worldComm
-        UPstream::worldComm = subComm;
-        // For testing: warn use of non-worldComm
-        UPstream::warnComm = UPstream::worldComm;
+        const int worldRanki = UPstream::myProcNo(UPstream::constWorldComm_);
 
         // MPI_COMM_SELF : the processor number wrt the new world communicator
         if (procIDs_[UPstream::commSelf()].size())
         {
-            procIDs_[UPstream::commSelf()].front() =
-                UPstream::myProcNo(subComm);
+            procIDs_[UPstream::commSelf()].front() = worldRanki;
+        }
+
+        // Name the old world communicator as '<openfoam:global>'
+        // - it is the inter-world communicator
+        if (MPI_COMM_NULL != mpiGlobalComm)
+        {
+            MPI_Comm_set_name(mpiGlobalComm, "<openfoam:global>");
+        }
+
+        const auto mpiWorldComm =
+            PstreamGlobals::MPICommunicators_[UPstream::constWorldComm_];
+
+        if (MPI_COMM_NULL != mpiWorldComm)
+        {
+            MPI_Comm_set_name(mpiWorldComm, ("world=" + worldName).data());
         }
 
         if (UPstream::debug)
         {
             // Check
-            int subNumProcs, subRank;
-            MPI_Comm_size
-            (
-                PstreamGlobals::MPICommunicators_[subComm],
-                &subNumProcs
-            );
-            MPI_Comm_rank
-            (
-                PstreamGlobals::MPICommunicators_[subComm],
-                &subRank
-            );
+            int newRanki, newSize;
+            MPI_Comm_rank(mpiWorldComm, &newRanki);
+            MPI_Comm_size(mpiWorldComm, &newSize);
 
-            Perr<< "UPstream::init : in world:" << world
-                << " using local communicator:" << subComm
-                << " rank " << subRank
-                << " of " << subNumProcs
-                << endl;
+            Perr<< "UPstream::init : in world:" << worldName
+                << " using local communicator:" << constWorldComm_
+                << " rank " << newRanki << " of " << newSize << endl;
         }
 
         // Override Pout prefix (move to setParRun?)
-        Pout.prefix() = '[' + world + '/' +  name(myProcNo(subComm)) + "] ";
+        Pout.prefix() = '[' + worldName + '/' + Foam::name(worldRanki) + "] ";
         Perr.prefix() = Pout.prefix();
     }
     else
     {
         // All processors use world 0
-        worldIDs_.resize_nocopy(numprocs);
+        worldIDs_.resize_nocopy(numProcs);
         worldIDs_ = 0;
+
+        const auto mpiWorldComm =
+            PstreamGlobals::MPICommunicators_[UPstream::constWorldComm_];
+
+        // Name the world communicator as '<openfoam:world>'
+        if (MPI_COMM_NULL != mpiWorldComm)
+        {
+            MPI_Comm_set_name(mpiWorldComm, "<openfoam:world>");
+        }
+    }
+
+
+    // Define inter-node and intra-node communicators
+    if (UPstream::nodeCommsControl_ >= 4)
+    {
+        // Debugging: split with given number per node
+        setHostCommunicators(UPstream::nodeCommsControl_);
+    }
+    #ifndef MSMPI_VER  /* Uncertain if this would work with MSMPI */
+    else if (UPstream::nodeCommsControl_ == 2)
+    {
+        // Defined based on shared-memory hardware information
+        setSharedMemoryCommunicators();
+    }
+    #endif
+    else
+    {
+        // Defined based on hostname, even if nominally disabled
+        setHostCommunicators();
+    }
+
+
+    // Provide some names for these communicators
+    if (MPI_COMM_NULL != PstreamGlobals::MPICommunicators_[commInterNode_])
+    {
+        MPI_Comm_set_name
+        (
+            PstreamGlobals::MPICommunicators_[commInterNode_],
+            "<openfoam:inter-node>"
+        );
+    }
+    if (MPI_COMM_NULL != PstreamGlobals::MPICommunicators_[commLocalNode_])
+    {
+        MPI_Comm_set_name
+        (
+            PstreamGlobals::MPICommunicators_[commLocalNode_],
+            "<openfoam:local-node>"
+        );
     }
 
     attachOurBuffers();
@@ -455,7 +545,7 @@ void Foam::UPstream::shutdown(int errNo)
 
     if (errNo != 0)
     {
-        MPI_Abort(MPI_COMM_WORLD, errNo);
+        UPstream::abort(errNo);
         return;
     }
 
@@ -515,9 +605,26 @@ void Foam::UPstream::exit(int errNo)
 }
 
 
-void Foam::UPstream::abort()
+void Foam::UPstream::abort(int errNo)
 {
-    MPI_Abort(MPI_COMM_WORLD, 1);
+    // TBD: only abort on our own communicator?
+    #if 0
+    MPI_Comm abortComm = MPI_COMM_WORLD;
+
+    const label index = UPstream::commGlobal();
+
+    if (index > 0 && index < PstreamGlobals::MPICommunicators_.size())
+    {
+        abortComm = PstreamGlobals::MPICommunicators_[index];
+        if (MPI_COMM_NULL == abortComm)
+        {
+            abortComm = MPI_COMM_WORLD;
+        }
+    }
+    MPI_Abort(abortComm, errNo);
+    #endif
+
+    MPI_Abort(MPI_COMM_WORLD, errNo);
 }
 
 
@@ -529,19 +636,9 @@ void Foam::UPstream::allocateCommunicatorComponents
     const label index
 )
 {
-    if (index == PstreamGlobals::MPICommunicators_.size())
-    {
-        // Extend storage with null values
-        PstreamGlobals::pendingMPIFree_.emplace_back(false);
-        PstreamGlobals::MPICommunicators_.emplace_back(MPI_COMM_NULL);
-    }
-    else if (index > PstreamGlobals::MPICommunicators_.size())
-    {
-        FatalErrorInFunction
-            << "PstreamGlobals out of sync with UPstream data. Problem."
-            << Foam::exit(FatalError);
-    }
+    PstreamGlobals::initCommunicator(index);
 
+    int returnCode = MPI_SUCCESS;
 
     if (parentIndex == -1)
     {
@@ -554,27 +651,19 @@ void Foam::UPstream::allocateCommunicatorComponents
                 << UPstream::commGlobal()
                 << Foam::exit(FatalError);
         }
+        auto& mpiNewComm = PstreamGlobals::MPICommunicators_[index];
 
-        PstreamGlobals::pendingMPIFree_[index] = false;
-        PstreamGlobals::MPICommunicators_[index] = MPI_COMM_WORLD;
+        // PstreamGlobals::pendingMPIFree_[index] = false;
+        // PstreamGlobals::MPICommunicators_[index] = MPI_COMM_WORLD;
 
-        // TBD: MPI_Comm_dup(MPI_COMM_WORLD, ...);
-        // with pendingMPIFree_[index] = true
-        // Note: freeCommunicatorComponents() may need an update
+        PstreamGlobals::pendingMPIFree_[index] = true;
+        MPI_Comm_dup(MPI_COMM_WORLD, &mpiNewComm);
 
-        MPI_Comm_rank
-        (
-            PstreamGlobals::MPICommunicators_[index],
-           &myProcNo_[index]
-        );
+        MPI_Comm_rank(mpiNewComm, &myProcNo_[index]);
 
         // Set the number of ranks to the actual number
-        int numProcs;
-        MPI_Comm_size
-        (
-            PstreamGlobals::MPICommunicators_[index],
-           &numProcs
-        );
+        int numProcs = 0;
+        MPI_Comm_size(mpiNewComm, &numProcs);
 
         // identity [0-numProcs], as 'int'
         procIDs_[index].resize_nocopy(numProcs);
@@ -589,21 +678,6 @@ void Foam::UPstream::allocateCommunicatorComponents
 
         MPI_Comm_rank(MPI_COMM_SELF, &myProcNo_[index]);
 
-        // Number of ranks is always 1 (self communicator)
-
-        #ifdef FULLDEBUG
-        int numProcs;
-        MPI_Comm_size(MPI_COMM_SELF, &numProcs);
-
-        if (numProcs != 1)
-        {
-            // Already finalized - this is an error
-            FatalErrorInFunction
-                << "MPI_COMM_SELF had " << numProcs << " != 1 ranks!\n"
-                << Foam::abort(FatalError);
-        }
-        #endif
-
         // For MPI_COMM_SELF : the process IDs within the world communicator.
         // Uses MPI_COMM_WORLD in case called before UPstream::commGlobal()
         // was initialized
@@ -613,17 +687,20 @@ void Foam::UPstream::allocateCommunicatorComponents
     }
     else
     {
-        // General sub-communicator
+        // General sub-communicator.
+        // Create based on the groupings predefined by procIDs_
+
+        const auto mpiParentComm =
+            PstreamGlobals::MPICommunicators_[parentIndex];
+
+        auto& mpiNewComm =
+            PstreamGlobals::MPICommunicators_[index];
 
         PstreamGlobals::pendingMPIFree_[index] = true;
 
         // Starting from parent
         MPI_Group parent_group;
-        MPI_Comm_group
-        (
-            PstreamGlobals::MPICommunicators_[parentIndex],
-           &parent_group
-        );
+        MPI_Comm_group(mpiParentComm, &parent_group);
 
         MPI_Group active_group;
         MPI_Group_incl
@@ -638,18 +715,18 @@ void Foam::UPstream::allocateCommunicatorComponents
         // ms-mpi (10.0 and others?) does not have MPI_Comm_create_group
         MPI_Comm_create
         (
-            PstreamGlobals::MPICommunicators_[parentIndex],
+            mpiParentComm,
             active_group,
-           &PstreamGlobals::MPICommunicators_[index]
+           &mpiNewComm
         );
         #else
         // Create new communicator for this group
         MPI_Comm_create_group
         (
-            PstreamGlobals::MPICommunicators_[parentIndex],
+            mpiParentComm,
             active_group,
             UPstream::msgType(),
-           &PstreamGlobals::MPICommunicators_[index]
+           &mpiNewComm
         );
         #endif
 
@@ -657,27 +734,34 @@ void Foam::UPstream::allocateCommunicatorComponents
         MPI_Group_free(&parent_group);
         MPI_Group_free(&active_group);
 
-        if (PstreamGlobals::MPICommunicators_[index] == MPI_COMM_NULL)
+        if (MPI_COMM_NULL == mpiNewComm)
         {
-            // No communicator created
+            // This process is not involved in the new communication pattern
             myProcNo_[index] = -1;
             PstreamGlobals::pendingMPIFree_[index] = false;
+
+            // ~~~~~~~~~
+            // IMPORTANT
+            // ~~~~~~~~~
+            // Always retain knowledge of the inter-node leaders,
+            // even if this process is not on that communicator.
+            // This will help when constructing topology-aware communication.
+
+            if (index != commInterNode_)
+            {
+                procIDs_[index].clear();
+            }
         }
         else
         {
-            if
-            (
-                MPI_Comm_rank
-                (
-                    PstreamGlobals::MPICommunicators_[index],
-                   &myProcNo_[index]
-                )
-            )
+            returnCode = MPI_Comm_rank(mpiNewComm, &myProcNo_[index]);
+
+            if (FOAM_UNLIKELY(MPI_SUCCESS != returnCode))
             {
                 FatalErrorInFunction
                     << "Problem :"
                     << " when allocating communicator at " << index
-                    << " from ranks " << procIDs_[index]
+                    << " from ranks " << flatOutput(procIDs_[index])
                     << " of parent " << parentIndex
                     << " cannot find my own rank"
                     << Foam::exit(FatalError);
@@ -687,6 +771,99 @@ void Foam::UPstream::allocateCommunicatorComponents
 }
 
 
+void Foam::UPstream::dupCommunicatorComponents
+(
+    const label parentIndex,
+    const label index
+)
+{
+    PstreamGlobals::initCommunicator(index);
+
+    PstreamGlobals::pendingMPIFree_[index] = true;
+    MPI_Comm_dup
+    (
+        PstreamGlobals::MPICommunicators_[parentIndex],
+       &PstreamGlobals::MPICommunicators_[index]
+    );
+
+    myProcNo_[index] = myProcNo_[parentIndex];
+    procIDs_[index] = procIDs_[parentIndex];
+}
+
+
+void Foam::UPstream::splitCommunicatorComponents
+(
+    const label parentIndex,
+    const label index,
+    int colour
+)
+{
+    PstreamGlobals::initCommunicator(index);
+
+    // ------------------------------------------------------------------------
+    // Create sub-communicator according to its colouring
+    //     => MPI_Comm_split().
+    // Since other parts of OpenFOAM may still need a view of the siblings:
+    //     => MPI_Group_translate_ranks().
+    //
+    // The MPI_Group_translate_ranks() step can be replaced with an
+    // MPI_Allgather() of the involved parent ranks (since we alway maintain
+    // the relative rank order when splitting).
+    //
+    // Since MPI_Comm_split() already does an MPI_Allgather() internally
+    // to pick out the colours (and do any sorting), we can simply to
+    // do the same thing:
+    //
+    // Do the Allgather first and pickout identical colours to define the
+    // group and create a communicator based on that.
+    //
+    // This is no worse than the Allgather communication overhead of using
+    // MPI_Comm_split() and saves the extra translate_ranks step.
+    // ------------------------------------------------------------------------
+
+    const auto mpiParentComm = PstreamGlobals::MPICommunicators_[parentIndex];
+
+    int parentRank = 0;
+    int parentSize = 0;
+    MPI_Comm_rank(mpiParentComm, &parentRank);
+    MPI_Comm_size(mpiParentComm, &parentSize);
+
+    // Initialize, first marking the 'procIDs_' with the colours
+    auto& procIds = procIDs_[index];
+
+    myProcNo_[index] = -1;
+    procIds.resize_nocopy(parentSize);
+    procIds[parentRank] = colour;
+
+    MPI_Allgather
+    (
+        MPI_IN_PLACE, 0, MPI_INT,
+        procIds.data(), 1, MPI_INT,
+        mpiParentComm
+    );
+
+    if (colour < 0)
+    {
+        procIds.clear();
+    }
+    else
+    {
+        auto last =
+            std::copy_if
+            (
+                procIds.cbegin(),
+                procIds.cend(),
+                procIds.begin(),
+                [=](int c){ return (c == colour); }
+            );
+
+        procIds.resize(std::distance(procIds.begin(), last));
+    }
+
+    allocateCommunicatorComponents(parentIndex, index);
+}
+
+
 void Foam::UPstream::freeCommunicatorComponents(const label index)
 {
     if (UPstream::debug)
@@ -717,6 +894,164 @@ void Foam::UPstream::freeCommunicatorComponents(const label index)
 }
 
 
+bool Foam::UPstream::setSharedMemoryCommunicators()
+{
+    // Uses the world communicator (not global communicator)
+
+    // Skip if non-parallel
+    if (!UPstream::parRun())
+    {
+        numNodes_ = 1;
+        return false;
+    }
+
+    if (FOAM_UNLIKELY(commInterNode_ >= 0 || commLocalNode_ >= 0))
+    {
+        // Failed sanity check
+        FatalErrorInFunction
+            << "Node communicator(s) already created!" << endl
+            << Foam::abort(FatalError);
+        return false;
+    }
+
+    commInterNode_ = getAvailableCommIndex(constWorldComm_);
+    commLocalNode_ = getAvailableCommIndex(constWorldComm_);
+
+    PstreamGlobals::initCommunicator(commInterNode_);
+    PstreamGlobals::initCommunicator(commLocalNode_);
+
+    // Overwritten later
+    myProcNo_[commInterNode_] = UPstream::masterNo();
+    myProcNo_[commLocalNode_] = UPstream::masterNo();
+
+    // Sorted order, purely cosmetic
+    if (commLocalNode_ < commInterNode_)
+    {
+        std::swap(commLocalNode_, commInterNode_);
+    }
+
+    if (debug)
+    {
+        Perr<< "Allocating node communicators "
+            << commInterNode_ << ", " << commLocalNode_ << nl
+            << "    parent : " << constWorldComm_ << nl
+            << endl;
+    }
+
+
+    const auto mpiParentComm =
+        PstreamGlobals::MPICommunicators_[constWorldComm_];
+
+    auto& mpiLocalNode =
+        PstreamGlobals::MPICommunicators_[commLocalNode_];
+
+    int parentRank = 0;
+    int parentSize = 0;
+    MPI_Comm_rank(mpiParentComm, &parentRank);
+    MPI_Comm_size(mpiParentComm, &parentSize);
+
+    List<int> nodeLeaders(parentSize);
+    nodeLeaders = -1;
+
+    MPI_Comm_split_type
+    (
+        mpiParentComm,
+        MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,
+       &mpiLocalNode
+    );
+
+    if (FOAM_UNLIKELY(MPI_COMM_NULL == mpiLocalNode))
+    {
+        // This process is not involved in an intra-host communication?
+        // - should never happen!
+
+        const label index = commLocalNode_;
+        PstreamGlobals::pendingMPIFree_[index] = false;
+
+        myProcNo_[index] = -1;
+        procIDs_[index].clear();
+
+        FatalErrorInFunction
+            << "Comm_split_type(shared) failed\n"
+            << Foam::abort(FatalError);
+    }
+    else
+    {
+        // This process is involved in intra-host communication
+        const label index = commLocalNode_;
+        auto& procIds = procIDs_[index];
+
+        PstreamGlobals::pendingMPIFree_[index] = true;
+
+        int localRank = 0;
+        int localSize = 0;
+        MPI_Comm_rank(mpiLocalNode, &localRank);
+        MPI_Comm_size(mpiLocalNode, &localSize);
+
+        if (localRank == 0)
+        {
+            // This process is a host leader - mark its position
+            nodeLeaders[parentRank] = parentRank;
+        }
+
+        procIds.resize_nocopy(localSize);
+        procIds[localRank] = UPstream::myProcNo(UPstream::constWorldComm_);
+        // OR: procIds[localRank] = parentRank;
+
+        // Get all of the siblings (within the node)
+        MPI_Allgather
+        (
+            MPI_IN_PLACE, 0, MPI_INT,
+            procIds.data(), 1, MPI_INT,
+            mpiLocalNode
+        );
+    }
+
+
+    // Get all of the host-leader information and find who they are.
+    {
+        auto& procIds = procIDs_[commInterNode_];
+
+        MPI_Allgather
+        (
+            MPI_IN_PLACE, 0, MPI_INT,
+            nodeLeaders.data(), 1, MPI_INT,
+            mpiParentComm
+        );
+
+        // Capture the size (number of nodes) before doing anything further
+        numNodes_ = std::count_if
+        (
+            nodeLeaders.cbegin(),
+            nodeLeaders.cend(),
+            [](int rank){ return (rank >= 0); }
+        );
+
+        // ~~~~~~~~~
+        // IMPORTANT
+        // ~~~~~~~~~
+        // Always retain knowledge of the inter-node leaders,
+        // even if this process is not on that communicator.
+        // This will help when constructing topology-aware communication.
+
+        procIds.resize_nocopy(numNodes_);
+
+        std::copy_if
+        (
+            nodeLeaders.cbegin(),
+            nodeLeaders.cend(),
+            procIds.begin(),
+            [](int rank){ return (rank >= 0); }
+        );
+    }
+
+    // From master to host-leader. Ranks between hosts.
+    allocateCommunicatorComponents(UPstream::worldComm, commInterNode_);
+
+    return true;
+}
+
+
 void Foam::UPstream::barrier(const label communicator, UPstream::Request* req)
 {
     // No-op for non-parallel or not on communicator
diff --git a/src/functionObjects/field/AMIWeights/AMIWeights.C b/src/functionObjects/field/AMIWeights/AMIWeights.C
index 004853d8f517f3f0ce5fb2414e0f0210b945133b..7ec095d55dc73c200ca2fe31597826e1c4e97b6c 100644
--- a/src/functionObjects/field/AMIWeights/AMIWeights.C
+++ b/src/functionObjects/field/AMIWeights/AMIWeights.C
@@ -238,8 +238,7 @@ void Foam::functionObjects::AMIWeights::writeWeightField
     );
 
     // Collect field
-    scalarField mergedWeights;
-    globalFaces().gather(weightSum, mergedWeights);
+    scalarField mergedWeights = globalFaces().gather(weightSum);
 
     const bool isACMI = isA<cyclicACMIPolyPatch>(cpp);
 
@@ -248,7 +247,7 @@ void Foam::functionObjects::AMIWeights::writeWeightField
     {
         const cyclicACMIPolyPatch& pp = refCast<const cyclicACMIPolyPatch>(cpp);
 
-        globalFaces().gather(pp.mask(), mergedMask);
+        mergedMask = globalFaces().gather(pp.mask());
     }
 
     if (Pstream::master())
diff --git a/src/meshTools/multiWorld/multiWorldConnectionsObject.C b/src/meshTools/multiWorld/multiWorldConnectionsObject.C
index caa05034311033daf93bf28f3ab78a446d49f24e..d10def446416d4ffe80b5f986926c14be6ba9f41 100644
--- a/src/meshTools/multiWorld/multiWorldConnectionsObject.C
+++ b/src/meshTools/multiWorld/multiWorldConnectionsObject.C
@@ -162,7 +162,7 @@ Foam::label Foam::multiWorldConnections::createCommunicator(const edge& worlds)
     }
 
     // Allocate new communicator with global world
-    comm = UPstream::allocateCommunicator(UPstream::commGlobal(), subRanks);
+    comm = UPstream::newCommunicator(UPstream::commGlobal(), subRanks);
 
     if (debug & 2)
     {
diff --git a/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C b/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C
index bdb63f152e13184d6d624babf7be5be013612aa8..888e4dbf49c9a74f3b31694e56fba2d7639b86bd 100644
--- a/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C
+++ b/src/parallel/distributed/distributedTriSurfaceMesh/distributedTriSurfaceMesh.C
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2011-2016 OpenFOAM Foundation
-    Copyright (C) 2015-2024 OpenCFD Ltd.
+    Copyright (C) 2015-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -2441,12 +2441,7 @@ void Foam::distributedTriSurfaceMesh::independentlyDistributedBbs
 //        // Gather all borderTris
 //        //globalIndex globalBorderTris(borderTris.size());
 //        //pointField globalBorderCentres(allCentres, borderTris);
-//        //globalBorderTris.gather
-//        //(
-//        //    UPstream::worldComm,
-//        //    UPstream::allProcs(UPstream::worldComm),
-//        //    globalBorderCentres
-//        //);
+//        //globalBorderTris.gatherInplace(globalBorderCentres);
 //        pointField globalBorderCentres(allCentres);
 //        map.distribute(globalBorderCentres);
 //
@@ -2586,12 +2581,7 @@ void Foam::distributedTriSurfaceMesh::independentlyDistributedBbs
             {
                 allCentres[trii] = s[trii].centre(s.points());
             }
-            globalTris().gather
-            (
-                UPstream::worldComm,
-                UPstream::allProcs(UPstream::worldComm),
-                allCentres
-            );
+            globalTris().gatherInplace(allCentres);
         }
 
         // Determine local decomposition
@@ -2635,13 +2625,8 @@ void Foam::distributedTriSurfaceMesh::independentlyDistributedBbs
             }
 
             // Scatter back to processors
-            globalTris().scatter
-            (
-                UPstream::worldComm,
-                UPstream::allProcs(UPstream::worldComm),
-                allDistribution,
-                distribution
-            );
+            globalTris().scatter(allDistribution, distribution);
+
             if (debug)
             {
                 Pout<< "distributedTriSurfaceMesh::"
diff --git a/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C b/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C
index fa4473ee4a6486f2e523963d077aa83d9b602051..d92f86fec018e5615ce1189b3ef465869e386324 100644
--- a/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C
+++ b/src/randomProcesses/noise/noiseModels/surfaceNoise/surfaceNoise.C
@@ -5,7 +5,7 @@
     \\  /    A nd           | www.openfoam.com
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
-    Copyright (C) 2015-2023 OpenCFD Ltd.
+    Copyright (C) 2015-2025 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -274,12 +274,9 @@ scalar surfaceNoise::surfaceAverage
         if (Pstream::parRun())
         {
             // Collect the surface data so that we can output the surfaces
-            scalarField allData;
-
-            procFaceAddr.gather
+            scalarField allData = procFaceAddr.gather
             (
                 data,
-                allData,
                 UPstream::msgType(),
                 commType_,
                 UPstream::worldComm
@@ -343,12 +340,9 @@ scalar surfaceNoise::writeSurfaceData
     if (Pstream::parRun())
     {
         // Collect the surface data so that we can output the surfaces
-        scalarField allData;
-
-        procFaceAddr.gather
+        scalarField allData = procFaceAddr.gather
         (
             data,
-            allData,
             UPstream::msgType(),
             commType_,
             UPstream::worldComm