From de133af526ae290e1b4d17daf6b230c8a8b26159 Mon Sep 17 00:00:00 2001
From: Mark Olesen <Mark.Olesen@esi-group.com>
Date: Wed, 20 Dec 2023 14:59:36 +0100
Subject: [PATCH] FIX: redistributePar problems with lagrangian

- the fileHandler changes included setting cacheLevel(0) to avoid
  blocking with redistributePar. However, this meant if clouds
  were not uniformly present on all ranks the fileHandler would follow
  different code paths and lead to blocking.

  Now switch to distributed mode for the lagrangian operations within
  redistributePar based on the cacheLevel information.

FIX: avoid triggering a false processor check in argList

- when redistributing to few ranks
---
 .../parLagrangianDistributor.C                | 39 +++++++++++++-----
 .../redistributePar/redistributeLagrangian.H  | 40 ++++++++++++++++---
 .../redistributePar/redistributePar.C         | 17 ++++----
 src/OpenFOAM/global/argList/argList.C         |  8 ++--
 tutorials/mesh/parallel/filter/Allrun         | 29 ++++++++++++--
 .../parallel/filter/system/decomposeParDict.5 | 32 +++++++++++++++
 6 files changed, 133 insertions(+), 32 deletions(-)
 create mode 100644 tutorials/mesh/parallel/filter/system/decomposeParDict.5

diff --git a/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C b/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C
index 908a7e36102..de9968fbfb0 100644
--- a/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C
+++ b/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C
@@ -29,6 +29,7 @@ License
 #include "ListOps.H"
 #include "parLagrangianDistributor.H"
 #include "passivePositionParticleCloud.H"
+#include "fileOperation.H"
 
 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
 
@@ -76,9 +77,10 @@ void Foam::parLagrangianDistributor::findClouds
     (
         cloud::prefix,
         mesh.time().timeName(),
-        mesh,
+        mesh.thisDb(),
         IOobjectOption::MUST_READ,
-        IOobjectOption::NO_WRITE
+        IOobjectOption::NO_WRITE,
+        IOobjectOption::NO_REGISTER
     );
 
     // Using the fileHandler:
@@ -110,9 +112,10 @@ void Foam::parLagrangianDistributor::findClouds
     Pstream::combineReduce(cloudNames, ListOps::uniqueEqOp<word>());
     Foam::sort(cloudNames);  // Consistent order
 
+    const label nClouds = cloudNames.size();
 
     // See which of the global cloudNames I have
-    haveClouds.resize_nocopy(cloudNames.size());
+    haveClouds.resize_nocopy(nClouds);
     haveClouds = false;
 
     for (const fileName& localCloudName : localCloudDirs)
@@ -125,17 +128,21 @@ void Foam::parLagrangianDistributor::findClouds
     }
 
     // Collect fields per cloud
-    objectNames.resize(cloudNames.size());
+    objectNames.resize_nocopy(nClouds);
 
-    for (const fileName& localCloudName : localCloudDirs)
+    for (label cloudi = 0; cloudi < nClouds; ++cloudi)
     {
+        objectNames[cloudi].clear();
+
+        if (!haveClouds[cloudi]) continue;
+
         // Do local scan for valid cloud objects
         const bool oldParRun = UPstream::parRun(false);
         IOobjectList localObjs
         (
             mesh,
             mesh.time().timeName(),
-            cloud::prefix/localCloudName
+            cloud::prefix/cloudNames[cloudi]
         );
         UPstream::parRun(oldParRun);
 
@@ -152,9 +159,6 @@ void Foam::parLagrangianDistributor::findClouds
         if (isCloud)
         {
             // Has coordinates/positions - so must be a valid cloud
-
-            const label cloudi = cloudNames.find(localCloudName);
-
             objectNames[cloudi] = localObjs.sortedNames();
         }
     }
@@ -333,9 +337,24 @@ Foam::parLagrangianDistributor::distributeLagrangianPositions
     const word& cloudName
 ) const
 {
-    // Load cloud and send particle
+    // Mixed exists/missing on various ranks?
+    // Avoid masterRead+broadcast (can cause blocking)
+
+    auto& handler = Foam::fileHandler();
+    const bool oldDistributed =
+        handler.distributed
+        (
+            !fileOperation::cacheLevel() || handler.distributed()
+        );
+
+
+    // Load cloud
     passivePositionParticleCloud lpi(srcMesh_, cloudName, false);
 
+    // Restore distributed flag
+    handler.distributed(oldDistributed);
+
+    // Distribute particles to other ranks
     return distributeLagrangianPositions(lpi);
 }
 
diff --git a/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H b/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H
index bb118558f56..8fa5d1143b9 100644
--- a/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H
+++ b/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H
@@ -34,6 +34,7 @@ Description
 
 #include "parLagrangianDistributor.H"
 #include "unmappedPassivePositionParticleCloud.H"
+#include "fileOperation.H"
 
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
@@ -59,6 +60,17 @@ readLagrangian
         (void)mesh.tetBasePtIs();
     }
 
+    // Mixed exists/missing on various ranks?
+    // Avoid masterRead+broadcast (can cause blocking)
+
+    auto& handler = Foam::fileHandler();
+    const bool oldDistributed =
+        handler.distributed
+        (
+            !fileOperation::cacheLevel() || handler.distributed()
+        );
+
+
     // Setup clouds
     forAll(cloudNames, i)
     {
@@ -88,6 +100,9 @@ readLagrangian
         );
     }
 
+    // Restore distributed flag
+    handler.distributed(oldDistributed);
+
     return clouds;
 }
 
@@ -164,6 +179,16 @@ void reconstructLagrangian
     }
     const auto& distributor = *distributorPtr;
 
+    // Mixed exists/missing on various ranks?
+    // Avoid masterRead+broadcast (can cause blocking)
+
+    auto& handler = Foam::fileHandler();
+    const bool oldDistributed =
+        handler.distributed
+        (
+            !fileOperation::cacheLevel() || handler.distributed()
+        );
+
     forAll(cloudNames, cloudi)
     {
         const word& cloudName = cloudNames[cloudi];
@@ -171,6 +196,12 @@ void reconstructLagrangian
         Info<< "Reconstructing lagrangian fields for cloud "
             << cloudName << nl << endl;
 
+        autoPtr<mapDistributeBase> lagrangianMapPtr =
+            distributor.distributeLagrangianPositions
+            (
+                cloudName
+            );
+
         IOobjectList cloudObjs
         (
             mesh,
@@ -178,12 +209,6 @@ void reconstructLagrangian
             cloud::prefix/cloudName
         );
 
-        autoPtr<mapDistributeBase> lagrangianMapPtr =
-            distributor.distributeLagrangianPositions
-            (
-                cloudName
-            );
-
         distributor.distributeAllFields
         (
             lagrangianMapPtr(),
@@ -193,6 +218,9 @@ void reconstructLagrangian
             selectedFields
         );
     }
+
+    // Restore distributed flag
+    handler.distributed(oldDistributed);
 }
 
 
diff --git a/applications/utilities/parallelProcessing/redistributePar/redistributePar.C b/applications/utilities/parallelProcessing/redistributePar/redistributePar.C
index a2f3227bdbf..41c45095bbe 100644
--- a/applications/utilities/parallelProcessing/redistributePar/redistributePar.C
+++ b/applications/utilities/parallelProcessing/redistributePar/redistributePar.C
@@ -1440,24 +1440,25 @@ int main(int argc, char *argv[])
     else
     {
         // Directory does not exist. If this happens on master -> decompose mode
-        if (UPstream::master() && !reconstruct)
+        if (UPstream::master() && !reconstruct && !decompose)
         {
            decompose = true;
            InfoOrPout
                 << "No processor directories; switching on decompose mode"
-               << nl << endl;
+                << nl << endl;
         }
     }
     // If master changed to decompose mode make sure all nodes know about it
     Pstream::broadcast(decompose);
     if (decompose)
     {
-         // The UPstream::nProcs is either the source or destination procs
-         fileOperation::nProcsFilter(UPstream::nProcs());
-         InfoOrPout<< "Switching to exact matching for "
-             << fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs())
-             << " processor directories"
-             << nl << endl;
+        // The UPstream::nProcs is either the source or destination procs
+        fileOperation::nProcsFilter(UPstream::nProcs());
+        InfoOrPout
+            << "Switching to exact matching for "
+            << fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs())
+            << " processor directories"
+            << nl << endl;
      }
 
 
diff --git a/src/OpenFOAM/global/argList/argList.C b/src/OpenFOAM/global/argList/argList.C
index 68a67b2c095..a94437514a6 100644
--- a/src/OpenFOAM/global/argList/argList.C
+++ b/src/OpenFOAM/global/argList/argList.C
@@ -1711,21 +1711,21 @@ void Foam::argList::parse
                     }
 
 
-                    if (nProcDirs != Pstream::nProcs())
+                    if (nProcDirs < UPstream::nProcs())
                     {
                         FatalError
                             << "number of processor directories = "
                             << nProcDirs
                             << " is not equal to the number of processors = "
-                            << Pstream::nProcs()
+                            << UPstream::nProcs()
                             << exit(FatalError);
                     }
                 }
 
                 // Distribute the master's argument list (unaltered)
-                for (const int subproci : Pstream::subProcs())
+                for (const int proci : UPstream::subProcs())
                 {
-                    OPstream toProc(Pstream::commsTypes::scheduled, subproci);
+                    OPstream toProc(UPstream::commsTypes::scheduled, proci);
 
                     toProc
                         << args_ << options_
diff --git a/tutorials/mesh/parallel/filter/Allrun b/tutorials/mesh/parallel/filter/Allrun
index 8a56887610c..96035668dc2 100755
--- a/tutorials/mesh/parallel/filter/Allrun
+++ b/tutorials/mesh/parallel/filter/Allrun
@@ -3,6 +3,10 @@ cd "${0%/*}" || exit                                # Run from this directory
 . ${WM_PROJECT_DIR:?}/bin/tools/RunFunctions        # Tutorial run functions
 #------------------------------------------------------------------------------
 
+fileHandler="-fileHandler collated"
+unset fileHandler
+## decompDict5="-decomposeParDict system/decomposeParDict.5"
+
 # Create mesh
 runApplication blockMesh
 
@@ -18,12 +22,29 @@ runApplication createBaffles -overwrite
 runApplication $(getApplication)
 
 #- RedistributePar to do decomposition
-runParallel redistributePar -decompose -cellDist
+runParallel redistributePar -decompose -cellDist $fileHandler
 
 #- Continue running for a bit more
-runParallel -s parallel $(getApplication)
+runParallel -s parallel $(getApplication) $fileHandler
+
+if :
+then
+    #- Reconstruct all times
+    runParallel -s reconstruct \
+        redistributePar -reconstruct $fileHandler
+
+else
+    # Not yet entirely working...
+
+    #- Send to more ranks
+    runParallel -s more-ranks $decompDict5 redistributePar $fileHandler
+
+    #- Continue running for a bit more
+    runParallel -s more-ranks $decompDict5 $(getApplication) $fileHandler
 
-#- Reconstruct all times
-runParallel -s 1 redistributePar -reconstruct
+    #- Reconstruct all times
+    runParallel -s reconstruct $decompDict5 \
+        redistributePar -reconstruct $fileHandler -latestTime
+fi
 
 #------------------------------------------------------------------------------
diff --git a/tutorials/mesh/parallel/filter/system/decomposeParDict.5 b/tutorials/mesh/parallel/filter/system/decomposeParDict.5
new file mode 100644
index 00000000000..daa707df3d1
--- /dev/null
+++ b/tutorials/mesh/parallel/filter/system/decomposeParDict.5
@@ -0,0 +1,32 @@
+/*--------------------------------*- C++ -*----------------------------------*\
+| =========                 |                                                 |
+| \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox           |
+|  \\    /   O peration     | Version:  v2312                                 |
+|   \\  /    A nd           | Website:  www.openfoam.com                      |
+|    \\/     M anipulation  |                                                 |
+\*---------------------------------------------------------------------------*/
+FoamFile
+{
+    version     2.0;
+    format      ascii;
+    class       dictionary;
+    object      decomposeParDict;
+}
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+numberOfSubdomains 5;
+
+method          scotch;
+
+constraints
+{
+    //- Keep owner and neighbour on same processor for faces in zones:
+    faces
+    {
+        type    preserveFaceZones;
+        zones   (cycLeft cycRight);
+    }
+}
+
+
+// ************************************************************************* //
-- 
GitLab