From 88bd9123748ad0606688c40e6257c928975786a8 Mon Sep 17 00:00:00 2001
From: Henry Weller <http://cfd.direct>
Date: Sat, 2 Apr 2016 18:32:11 +0100
Subject: [PATCH] Pstream: optimisation of data exchange

Contributed by Mattijs Janssens.

1. Any non-blocking data exchange needs to know in advance the sizes to
   receive so it can size the buffer.  For "halo" exchanges this is not
   a problem since the sizes are known in advance but or all other data
   exchanges these sizes need to be exchanged in advance.

   This was previously done by having all processors send the sizes of data to
   send to the master and send it back such that all processors
   - had the same information
   - all could work out who was sending what to where and hence what needed to
     be received.

   This is now changed such that we only send the size to the
   destination processor (instead of to all as previously). This means
   that
   - the list of sizes to send is now of size nProcs v.s. nProcs*nProcs before
   - we cut out the route to the master and back by using a native MPI
     call

   It causes a small change to the API of exchange and PstreamBuffers -
   they now return the sizes of the local buffers only (a labelList) and
   not the sizes of the buffers on all processors (labelListList)

2. Reversing the order of the way in which the sending is done when
   scattering information from the master processor to the other
   processors. This is done in a tree like fashion. Each processor has a
   set of processors to receive from/ send to. When receiving it will
   first receive from the processors with the least amount of
   sub-processors (i.e. the ones which return first). When sending it
   needs to do the opposite: start sending to the processor with the
   most amount of sub-tree since this is the critical path.
---
 applications/test/parallel/Test-parallel.C    | 13 ++--
 .../DelaunayMesh/DistributedDelaunayMesh.C    | 17 ++---
 .../backgroundMeshDecomposition.C             | 15 ++--
 .../IOobjects/IOdictionary/IOdictionaryIO.C   |  6 +-
 src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H  | 35 +++++++--
 .../db/IOstreams/Pstreams/PstreamBuffers.C    | 26 ++-----
 .../db/IOstreams/Pstreams/PstreamBuffers.H    |  4 +-
 src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H | 10 ++-
 .../IOstreams/Pstreams/combineGatherScatter.C |  6 +-
 src/OpenFOAM/db/IOstreams/Pstreams/exchange.C | 71 ++++++++++++++-----
 .../db/IOstreams/Pstreams/gatherScatter.C     |  6 +-
 .../db/IOstreams/Pstreams/gatherScatterList.C |  2 +-
 src/OpenFOAM/db/regIOobject/regIOobjectRead.C |  5 +-
 .../mapPolyMesh/mapDistribute/mapDistribute.C |  4 --
 src/Pstream/dummy/UPstream.C                  | 13 +++-
 src/Pstream/mpi/UPstream.C                    | 48 +++++++++++++
 src/lagrangian/basic/Cloud/Cloud.C            | 14 ++--
 .../basic/InteractionLists/InteractionLists.C | 16 ++---
 src/meshTools/regionSplit/regionSplit.C       | 18 +----
 19 files changed, 206 insertions(+), 123 deletions(-)

diff --git a/applications/test/parallel/Test-parallel.C b/applications/test/parallel/Test-parallel.C
index a2072eadd9e..62d6150ce4f 100644
--- a/applications/test/parallel/Test-parallel.C
+++ b/applications/test/parallel/Test-parallel.C
@@ -54,7 +54,7 @@ int main(int argc, char *argv[])
     // Test mapDistribute
     // ~~~~~~~~~~~~~~~~~~
 
-    if (false)
+    if (true)
     {
         Random rndGen(43544*Pstream::myProcNo());
 
@@ -80,11 +80,6 @@ int main(int argc, char *argv[])
             nSend[procI]++;
         }
 
-        // Sync how many to send
-        labelListList allNTrans(Pstream::nProcs());
-        allNTrans[Pstream::myProcNo()] = nSend;
-        combineReduce(allNTrans, UPstream::listEq());
-
         // Collect items to be sent
         labelListList sendMap(Pstream::nProcs());
         forAll(sendMap, procI)
@@ -98,11 +93,15 @@ int main(int argc, char *argv[])
             sendMap[procI][nSend[procI]++] = i;
         }
 
+        // Sync how many to send
+        labelList nRecv;
+        Pstream::exchangeSizes(sendMap, nRecv);
+
         // Collect items to be received
         labelListList recvMap(Pstream::nProcs());
         forAll(recvMap, procI)
         {
-            recvMap[procI].setSize(allNTrans[procI][Pstream::myProcNo()]);
+            recvMap[procI].setSize(nRecv[procI]);
         }
 
         label constructSize = 0;
diff --git a/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/DelaunayMesh/DistributedDelaunayMesh.C b/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/DelaunayMesh/DistributedDelaunayMesh.C
index 7a672354ecd..a698561822a 100644
--- a/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/DelaunayMesh/DistributedDelaunayMesh.C
+++ b/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/DelaunayMesh/DistributedDelaunayMesh.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2012-2015 OpenFOAM Foundation
+    \\  /    A nd           | Copyright (C) 2012-2016 OpenFOAM Foundation
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -56,14 +56,6 @@ Foam::DistributedDelaunayMesh<Triangulation>::buildMap
         nSend[procI]++;
     }
 
-    // Send over how many I need to receive
-    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    labelListList sendSizes(Pstream::nProcs());
-
-    sendSizes[Pstream::myProcNo()] = nSend;
-
-    combineReduce(sendSizes, UPstream::listEq());
 
     // 2. Size sendMap
     labelListList sendMap(Pstream::nProcs());
@@ -83,6 +75,11 @@ Foam::DistributedDelaunayMesh<Triangulation>::buildMap
         sendMap[procI][nSend[procI]++] = i;
     }
 
+    // 4. Send over how many I need to receive
+    labelList recvSizes;
+    Pstream::exchangeSizes(sendMap, recvSizes);
+
+
     // Determine receive map
     // ~~~~~~~~~~~~~~~~~~~~~
 
@@ -100,7 +97,7 @@ Foam::DistributedDelaunayMesh<Triangulation>::buildMap
     {
         if (procI != Pstream::myProcNo())
         {
-            label nRecv = sendSizes[procI][Pstream::myProcNo()];
+            label nRecv = recvSizes[procI];
 
             constructMap[procI].setSize(nRecv);
 
diff --git a/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/backgroundMeshDecomposition/backgroundMeshDecomposition.C b/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/backgroundMeshDecomposition/backgroundMeshDecomposition.C
index a21e6cecfcc..72a0b76e55d 100644
--- a/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/backgroundMeshDecomposition/backgroundMeshDecomposition.C
+++ b/applications/utilities/mesh/generation/foamyMesh/conformalVoronoiMesh/backgroundMeshDecomposition/backgroundMeshDecomposition.C
@@ -61,14 +61,6 @@ Foam::autoPtr<Foam::mapDistribute> Foam::backgroundMeshDecomposition::buildMap
         nSend[procI]++;
     }
 
-    // Send over how many I need to receive
-    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    labelListList sendSizes(Pstream::nProcs());
-
-    sendSizes[Pstream::myProcNo()] = nSend;
-
-    combineReduce(sendSizes, UPstream::listEq());
 
     // 2. Size sendMap
     labelListList sendMap(Pstream::nProcs());
@@ -88,6 +80,11 @@ Foam::autoPtr<Foam::mapDistribute> Foam::backgroundMeshDecomposition::buildMap
         sendMap[procI][nSend[procI]++] = i;
     }
 
+    // 4. Send over how many I need to receive
+    labelList recvSizes;
+    Pstream::exchangeSizes(sendMap, recvSizes);
+
+
     // Determine receive map
     // ~~~~~~~~~~~~~~~~~~~~~
 
@@ -105,7 +102,7 @@ Foam::autoPtr<Foam::mapDistribute> Foam::backgroundMeshDecomposition::buildMap
     {
         if (procI != Pstream::myProcNo())
         {
-            label nRecv = sendSizes[procI][Pstream::myProcNo()];
+            label nRecv = recvSizes[procI];
 
             constructMap[procI].setSize(nRecv);
 
diff --git a/src/OpenFOAM/db/IOobjects/IOdictionary/IOdictionaryIO.C b/src/OpenFOAM/db/IOobjects/IOdictionary/IOdictionaryIO.C
index 99204510075..5be036fcec5 100644
--- a/src/OpenFOAM/db/IOobjects/IOdictionary/IOdictionaryIO.C
+++ b/src/OpenFOAM/db/IOobjects/IOdictionary/IOdictionaryIO.C
@@ -109,8 +109,10 @@ void Foam::IOdictionary::readFile(const bool masterOnly)
             IOdictionary::readData(fromAbove);
         }
 
-        // Send to my downstairs neighbours
-        forAll(myComm.below(), belowI)
+        // Send to my downstairs neighbours. Note reverse order not
+        // necessary here but just for consistency with other uses
+        // (e.g. gatherScatter.C)
+        forAllReverse(myComm.below(), belowI)
         {
             if (debug)
             {
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H
index d501f3d4430..e2ec02e88be 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/Pstream.H
@@ -303,21 +303,42 @@ public:
 
         // Exchange
 
-            //- Exchange data. Sends sendData, receives into recvData, sets
-            //  sizes (not bytes). sizes[p0][p1] is what processor p0 has
-            //  sent to p1. Continuous data only.
-            //  If block=true will wait for all transfers to finish.
+            //- Helper: exchange contiguous data. Sends sendData, receives into
+            //  recvData. If block=true will wait for all transfers to finish.
             template<class Container, class T>
             static void exchange
             (
-                const List<Container >&,
-                List<Container >&,
-                labelListList& sizes,
+                const UList<Container>& sendData,
+                const labelUList& recvSizes,
+                List<Container>& recvData,
                 const int tag = UPstream::msgType(),
                 const label comm = UPstream::worldComm,
                 const bool block = true
             );
 
+            //- Helper: exchange sizes of sendData. sendData is the data per
+            //  processor (in the communicator). Returns sizes of sendData
+            //  on the sending processor.
+            template<class Container>
+            static void exchangeSizes
+            (
+                const Container& sendData,
+                labelList& sizes,
+                const label comm = UPstream::worldComm
+            );
+
+            //- Exchange contiguous data. Sends sendData, receives into
+            //  recvData. Determines sizes to receive.
+            //  If block=true will wait for all transfers to finish.
+            template<class Container, class T>
+            static void exchange
+            (
+                const UList<Container>& sendData,
+                List<Container>& recvData,
+                const int tag = UPstream::msgType(),
+                const label comm = UPstream::worldComm,
+                const bool block = true
+            );
 };
 
 
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C
index f6ab958e7c1..73616e681aa 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2011-2015 OpenFOAM Foundation
+    \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -85,12 +85,10 @@ void Foam::PstreamBuffers::finishedSends(const bool block)
 
     if (commsType_ == UPstream::nonBlocking)
     {
-        labelListList sizes;
         Pstream::exchange<DynamicList<char>, char>
         (
             sendBuf_,
             recvBuf_,
-            sizes,
             tag_,
             comm_,
             block
@@ -99,17 +97,19 @@ void Foam::PstreamBuffers::finishedSends(const bool block)
 }
 
 
-void Foam::PstreamBuffers::finishedSends(labelListList& sizes, const bool block)
+void Foam::PstreamBuffers::finishedSends(labelList& recvSizes, const bool block)
 {
     finishedSendsCalled_ = true;
 
     if (commsType_ == UPstream::nonBlocking)
     {
+        Pstream::exchangeSizes(sendBuf_, recvSizes, comm_);
+
         Pstream::exchange<DynamicList<char>, char>
         (
             sendBuf_,
+            recvSizes,
             recvBuf_,
-            sizes,
             tag_,
             comm_,
             block
@@ -123,22 +123,8 @@ void Foam::PstreamBuffers::finishedSends(labelListList& sizes, const bool block)
             << " since transfers already in progress. Use non-blocking instead."
             << exit(FatalError);
 
-        // Note: possible only if using different tag from write started
+        // Note: maybe possible only if using different tag from write started
         // by ~UOPstream. Needs some work.
-        //sizes.setSize(UPstream::nProcs(comm));
-        //labelList& nsTransPs = sizes[UPstream::myProcNo(comm)];
-        //nsTransPs.setSize(UPstream::nProcs(comm));
-        //
-        //forAll(sendBuf_, procI)
-        //{
-        //    nsTransPs[procI] = sendBuf_[procI].size();
-        //}
-        //
-        //// Send sizes across.
-        //int oldTag = UPstream::msgType();
-        //UPstream::msgType() = tag_;
-        //combineReduce(sizes, UPstream::listEq());
-        //UPstream::msgType() = oldTag;
     }
 }
 
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H
index 5da9dd218bb..16caa9d7209 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/PstreamBuffers.H
@@ -149,9 +149,9 @@ public:
         void finishedSends(const bool block = true);
 
         //- Mark all sends as having been done. Same as above but also returns
-        //  sizes (bytes) transferred. Note:currently only valid for
+        //  sizes (bytes) received. Note:currently only valid for
         //  non-blocking.
-        void finishedSends(labelListList& sizes, const bool block = true);
+        void finishedSends(labelList& recvSizes, const bool block = true);
 
         //- Clear storage and reset
         void clear();
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
index 67450666d26..9c66b0852c5 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
@@ -489,7 +489,15 @@ public:
         //- Abort program
         static void abort();
 
-
+        //- Exchange label with all processors (in the communicator).
+        //  sendData[procI] is the label to send to procI.
+        //  After return recvData contains the data from the other processors.
+        static void allToAll
+        (
+            const labelUList& sendData,
+            labelUList& recvData,
+            const label communicator = 0
+        );
 };
 
 
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/combineGatherScatter.C b/src/OpenFOAM/db/IOstreams/Pstreams/combineGatherScatter.C
index 345e766aea8..d1ac5499225 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/combineGatherScatter.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/combineGatherScatter.C
@@ -218,7 +218,7 @@ void Foam::Pstream::combineScatter
         }
 
         // Send to my downstairs neighbours
-        forAll(myComm.below(), belowI)
+        forAllReverse(myComm.below(), belowI)
         {
             label belowID = myComm.below()[belowI];
 
@@ -453,7 +453,7 @@ void Foam::Pstream::listCombineScatter
         }
 
         // Send to my downstairs neighbours
-        forAll(myComm.below(), belowI)
+        forAllReverse(myComm.below(), belowI)
         {
             label belowID = myComm.below()[belowI];
 
@@ -651,7 +651,7 @@ void Foam::Pstream::mapCombineScatter
         }
 
         // Send to my downstairs neighbours
-        forAll(myComm.below(), belowI)
+        forAllReverse(myComm.below(), belowI)
         {
             label belowID = myComm.below()[belowI];
 
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/exchange.C b/src/OpenFOAM/db/IOstreams/Pstreams/exchange.C
index a3aff129811..95ad2537a4b 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/exchange.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/exchange.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2011-2015 OpenFOAM Foundation
+    \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -36,9 +36,9 @@ Description
 template<class Container, class T>
 void Foam::Pstream::exchange
 (
-    const List<Container>& sendBufs,
+    const UList<Container>& sendBufs,
+    const labelUList& recvSizes,
     List<Container>& recvBufs,
-    labelListList& sizes,
     const int tag,
     const label comm,
     const bool block
@@ -53,23 +53,13 @@ void Foam::Pstream::exchange
     if (sendBufs.size() != UPstream::nProcs(comm))
     {
         FatalErrorInFunction
-            << "Size of list:" << sendBufs.size()
-            << " does not equal the number of processors:"
+            << "Size of list " << sendBufs.size()
+            << " does not equal the number of processors "
             << UPstream::nProcs(comm)
             << Foam::abort(FatalError);
     }
 
-    sizes.setSize(UPstream::nProcs(comm));
-    labelList& nsTransPs = sizes[UPstream::myProcNo(comm)];
-    nsTransPs.setSize(UPstream::nProcs(comm));
-
-    forAll(sendBufs, procI)
-    {
-        nsTransPs[procI] = sendBufs[procI].size();
-    }
-
-    // Send sizes across. Note: blocks.
-    combineReduce(sizes, UPstream::listEq(), tag, comm);
+    recvBufs.setSize(sendBufs.size());
 
     if (UPstream::nProcs(comm) > 1)
     {
@@ -78,10 +68,9 @@ void Foam::Pstream::exchange
         // Set up receives
         // ~~~~~~~~~~~~~~~
 
-        recvBufs.setSize(sendBufs.size());
-        forAll(sizes, procI)
+        forAll(recvSizes, procI)
         {
-            label nRecv = sizes[procI][UPstream::myProcNo(comm)];
+            label nRecv = recvSizes[procI];
 
             if (procI != Pstream::myProcNo(comm) && nRecv > 0)
             {
@@ -143,4 +132,48 @@ void Foam::Pstream::exchange
 }
 
 
+template<class Container>
+void Foam::Pstream::exchangeSizes
+(
+    const Container& sendBufs,
+    labelList& recvSizes,
+    const label comm
+)
+{
+    if (sendBufs.size() != UPstream::nProcs(comm))
+    {
+        FatalErrorInFunction
+            << "Size of container " << sendBufs.size()
+            << " does not equal the number of processors "
+            << UPstream::nProcs(comm)
+            << Foam::abort(FatalError);
+    }
+
+    labelList sendSizes(sendBufs.size());
+    forAll(sendBufs, procI)
+    {
+        sendSizes[procI] = sendBufs[procI].size();
+    }
+    recvSizes.setSize(sendSizes.size());
+    allToAll(sendSizes, recvSizes, comm);
+}
+
+
+template<class Container, class T>
+void Foam::Pstream::exchange
+(
+    const UList<Container>& sendBufs,
+    List<Container>& recvBufs,
+    const int tag,
+    const label comm,
+    const bool block
+)
+{
+    labelList recvSizes;
+    exchangeSizes(sendBufs, recvSizes, comm);
+
+    exchange<Container, T>(sendBufs, recvSizes, recvBufs, tag, comm, block);
+}
+
+
 // ************************************************************************* //
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatter.C b/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatter.C
index 50a3b7660eb..a270bea29eb 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatter.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatter.C
@@ -185,8 +185,10 @@ void Pstream::scatter
             }
         }
 
-        // Send to my downstairs neighbours
-        forAll(myComm.below(), belowI)
+        // Send to my downstairs neighbours. Note reverse order (compared to
+        // receiving). This is to make sure to send to the critical path
+        // (only when using a tree schedule!) first.
+        forAllReverse(myComm.below(), belowI)
         {
             if (contiguous<T>())
             {
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatterList.C b/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatterList.C
index 930a51bc858..abd4dba8f95 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatterList.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/gatherScatterList.C
@@ -274,7 +274,7 @@ void Pstream::scatterList
         }
 
         // Send to my downstairs neighbours
-        forAll(myComm.below(), belowI)
+        forAllReverse(myComm.below(), belowI)
         {
             label belowID = myComm.below()[belowI];
             const labelList& notBelowLeaves = comms[belowID].allNotBelow();
diff --git a/src/OpenFOAM/db/regIOobject/regIOobjectRead.C b/src/OpenFOAM/db/regIOobject/regIOobjectRead.C
index 83f27c0ccee..0ea3228226b 100644
--- a/src/OpenFOAM/db/regIOobject/regIOobjectRead.C
+++ b/src/OpenFOAM/db/regIOobject/regIOobjectRead.C
@@ -248,8 +248,9 @@ bool Foam::regIOobject::read()
             ok = readData(fromAbove);
         }
 
-        // Send to my downstairs neighbours
-        forAll(myComm.below(), belowI)
+        // Send to my downstairs neighbours. Note reverse order not
+        // nessecary here - just for consistency reasons.
+        forAllReverse(myComm.below(), belowI)
         {
             OPstream toBelow
             (
diff --git a/src/OpenFOAM/meshes/polyMesh/mapPolyMesh/mapDistribute/mapDistribute.C b/src/OpenFOAM/meshes/polyMesh/mapPolyMesh/mapDistribute/mapDistribute.C
index bbb4c0ceb88..72d7376b85a 100644
--- a/src/OpenFOAM/meshes/polyMesh/mapPolyMesh/mapDistribute/mapDistribute.C
+++ b/src/OpenFOAM/meshes/polyMesh/mapPolyMesh/mapDistribute/mapDistribute.C
@@ -528,12 +528,10 @@ void Foam::mapDistribute::exchangeAddressing
     }
 
     subMap_.setSize(Pstream::nProcs());
-    labelListList sendSizes;
     Pstream::exchange<labelList, label>
     (
         wantedRemoteElements,
         subMap_,
-        sendSizes,
         tag,
         Pstream::worldComm  //TBD
     );
@@ -608,12 +606,10 @@ void Foam::mapDistribute::exchangeAddressing
     }
 
     subMap_.setSize(Pstream::nProcs());
-    labelListList sendSizes;
     Pstream::exchange<labelList, label>
     (
         wantedRemoteElements,
         subMap_,
-        sendSizes,
         tag,
         Pstream::worldComm      //TBD
     );
diff --git a/src/Pstream/dummy/UPstream.C b/src/Pstream/dummy/UPstream.C
index cd3bc60f39d..3f82f8a4ddd 100644
--- a/src/Pstream/dummy/UPstream.C
+++ b/src/Pstream/dummy/UPstream.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2011-2015 OpenFOAM Foundation
+    \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -81,6 +81,17 @@ void Foam::reduce(scalar&, const sumOp<scalar>&, const int, const label, label&)
 {}
 
 
+void Foam::UPstream::allToAll
+(
+    const labelUList& sendData,
+    labelUList& recvData,
+    const label communicator
+)
+{
+    recvData.assign(sendData);
+}
+
+
 void Foam::UPstream::allocatePstreamCommunicator
 (
     const label,
diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C
index c5407cb52d1..4f95592eccb 100644
--- a/src/Pstream/mpi/UPstream.C
+++ b/src/Pstream/mpi/UPstream.C
@@ -295,6 +295,54 @@ void Foam::reduce
 }
 
 
+void Foam::UPstream::allToAll
+(
+    const labelUList& sendData,
+    labelUList& recvData,
+    const label communicator
+)
+{
+    label np = nProcs(communicator);
+
+    if (sendData.size() != np || recvData.size() != np)
+    {
+        FatalErrorInFunction
+            << "Size of sendData " << sendData.size()
+            << " or size of recvData " << recvData.size()
+            << " is not equal to the number of processors in the domain "
+            << np
+            << Foam::abort(FatalError);
+    }
+
+    if (!UPstream::parRun())
+    {
+        recvData.assign(sendData);
+    }
+    else
+    {
+        if
+        (
+            MPI_Alltoall
+            (
+                sendData.begin(),
+                sizeof(label),
+                MPI_BYTE,
+                recvData.begin(),
+                sizeof(label),
+                MPI_BYTE,
+                PstreamGlobals::MPICommunicators_[communicator]
+            )
+        )
+        {
+            FatalErrorInFunction
+                << "MPI_Alltoall failed for " << sendData
+                << " on communicator " << communicator
+                << Foam::abort(FatalError);
+        }
+    }
+}
+
+
 void Foam::UPstream::allocatePstreamCommunicator
 (
     const label parentIndex,
diff --git a/src/lagrangian/basic/Cloud/Cloud.C b/src/lagrangian/basic/Cloud/Cloud.C
index 4a573a46ee3..0ea3551bb1a 100644
--- a/src/lagrangian/basic/Cloud/Cloud.C
+++ b/src/lagrangian/basic/Cloud/Cloud.C
@@ -322,7 +322,7 @@ void Foam::Cloud<ParticleType>::move(TrackData& td, const scalar trackTime)
 
 
         // Start sending. Sets number of bytes transferred
-        labelListList allNTrans(Pstream::nProcs());
+        labelList allNTrans(Pstream::nProcs());
         pBufs.finishedSends(allNTrans);
 
 
@@ -330,15 +330,13 @@ void Foam::Cloud<ParticleType>::move(TrackData& td, const scalar trackTime)
 
         forAll(allNTrans, i)
         {
-            forAll(allNTrans[i], j)
+            if (allNTrans[i])
             {
-                if (allNTrans[i][j])
-                {
-                    transfered = true;
-                    break;
-                }
+                transfered = true;
+                break;
             }
         }
+        reduce(transfered, orOp<bool>());
 
         if (!transfered)
         {
@@ -350,7 +348,7 @@ void Foam::Cloud<ParticleType>::move(TrackData& td, const scalar trackTime)
         {
             label neighbProci = neighbourProcs[i];
 
-            label nRec = allNTrans[neighbProci][Pstream::myProcNo()];
+            label nRec = allNTrans[neighbProci];
 
             if (nRec)
             {
diff --git a/src/lagrangian/basic/InteractionLists/InteractionLists.C b/src/lagrangian/basic/InteractionLists/InteractionLists.C
index 4551a403f51..4b250dd7b8f 100644
--- a/src/lagrangian/basic/InteractionLists/InteractionLists.C
+++ b/src/lagrangian/basic/InteractionLists/InteractionLists.C
@@ -844,15 +844,6 @@ void Foam::InteractionLists<ParticleType>::buildMap
         nSend[procI]++;
     }
 
-    // Send over how many I need to receive
-    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    labelListList sendSizes(Pstream::nProcs());
-
-    sendSizes[Pstream::myProcNo()] = nSend;
-
-    combineReduce(sendSizes, UPstream::listEq());
-
     // 2. Size sendMap
     labelListList sendMap(Pstream::nProcs());
 
@@ -871,6 +862,11 @@ void Foam::InteractionLists<ParticleType>::buildMap
         sendMap[procI][nSend[procI]++] = i;
     }
 
+    // 4. Send over how many I need to receive
+    labelList recvSizes;
+    Pstream::exchangeSizes(sendMap, recvSizes);
+
+
     // Determine receive map
     // ~~~~~~~~~~~~~~~~~~~~~
 
@@ -888,7 +884,7 @@ void Foam::InteractionLists<ParticleType>::buildMap
     {
         if (procI != Pstream::myProcNo())
         {
-            label nRecv = sendSizes[procI][Pstream::myProcNo()];
+            label nRecv = recvSizes[procI];
 
             constructMap[procI].setSize(nRecv);
 
diff --git a/src/meshTools/regionSplit/regionSplit.C b/src/meshTools/regionSplit/regionSplit.C
index 27b438cd679..5c5ef0487c1 100644
--- a/src/meshTools/regionSplit/regionSplit.C
+++ b/src/meshTools/regionSplit/regionSplit.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2011-2015 OpenFOAM Foundation
+    \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -623,13 +623,7 @@ Foam::autoPtr<Foam::globalIndex> Foam::regionSplit::calcRegionSplit
 
     // Get the wanted region labels into recvNonLocal
     labelListList recvNonLocal;
-    labelListList sizes;
-    Pstream::exchange<labelList, label>
-    (
-        sendNonLocal,
-        recvNonLocal,
-        sizes
-    );
+    Pstream::exchange<labelList, label>(sendNonLocal, recvNonLocal);
 
     // Now we have the wanted compact region labels that procI wants in
     // recvNonLocal[procI]. Construct corresponding list of compact
@@ -650,13 +644,7 @@ Foam::autoPtr<Foam::globalIndex> Foam::regionSplit::calcRegionSplit
 
     // Send back (into recvNonLocal)
     recvNonLocal.clear();
-    sizes.clear();
-    Pstream::exchange<labelList, label>
-    (
-        sendWantedLocal,
-        recvNonLocal,
-        sizes
-    );
+    Pstream::exchange<labelList, label>(sendWantedLocal, recvNonLocal);
     sendWantedLocal.clear();
 
     // Now recvNonLocal contains for every element in setNonLocal the
-- 
GitLab