From de133af526ae290e1b4d17daf6b230c8a8b26159 Mon Sep 17 00:00:00 2001 From: Mark Olesen <Mark.Olesen@esi-group.com> Date: Wed, 20 Dec 2023 14:59:36 +0100 Subject: [PATCH] FIX: redistributePar problems with lagrangian - the fileHandler changes included setting cacheLevel(0) to avoid blocking with redistributePar. However, this meant if clouds were not uniformly present on all ranks the fileHandler would follow different code paths and lead to blocking. Now switch to distributed mode for the lagrangian operations within redistributePar based on the cacheLevel information. FIX: avoid triggering a false processor check in argList - when redistributing to few ranks --- .../parLagrangianDistributor.C | 39 +++++++++++++----- .../redistributePar/redistributeLagrangian.H | 40 ++++++++++++++++--- .../redistributePar/redistributePar.C | 17 ++++---- src/OpenFOAM/global/argList/argList.C | 8 ++-- tutorials/mesh/parallel/filter/Allrun | 29 ++++++++++++-- .../parallel/filter/system/decomposeParDict.5 | 32 +++++++++++++++ 6 files changed, 133 insertions(+), 32 deletions(-) create mode 100644 tutorials/mesh/parallel/filter/system/decomposeParDict.5 diff --git a/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C b/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C index 908a7e36102..de9968fbfb0 100644 --- a/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C +++ b/applications/utilities/parallelProcessing/redistributePar/parLagrangianDistributor.C @@ -29,6 +29,7 @@ License #include "ListOps.H" #include "parLagrangianDistributor.H" #include "passivePositionParticleCloud.H" +#include "fileOperation.H" // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * // @@ -76,9 +77,10 @@ void Foam::parLagrangianDistributor::findClouds ( cloud::prefix, mesh.time().timeName(), - mesh, + mesh.thisDb(), IOobjectOption::MUST_READ, - IOobjectOption::NO_WRITE + IOobjectOption::NO_WRITE, + IOobjectOption::NO_REGISTER ); // Using the fileHandler: @@ -110,9 +112,10 @@ void Foam::parLagrangianDistributor::findClouds Pstream::combineReduce(cloudNames, ListOps::uniqueEqOp<word>()); Foam::sort(cloudNames); // Consistent order + const label nClouds = cloudNames.size(); // See which of the global cloudNames I have - haveClouds.resize_nocopy(cloudNames.size()); + haveClouds.resize_nocopy(nClouds); haveClouds = false; for (const fileName& localCloudName : localCloudDirs) @@ -125,17 +128,21 @@ void Foam::parLagrangianDistributor::findClouds } // Collect fields per cloud - objectNames.resize(cloudNames.size()); + objectNames.resize_nocopy(nClouds); - for (const fileName& localCloudName : localCloudDirs) + for (label cloudi = 0; cloudi < nClouds; ++cloudi) { + objectNames[cloudi].clear(); + + if (!haveClouds[cloudi]) continue; + // Do local scan for valid cloud objects const bool oldParRun = UPstream::parRun(false); IOobjectList localObjs ( mesh, mesh.time().timeName(), - cloud::prefix/localCloudName + cloud::prefix/cloudNames[cloudi] ); UPstream::parRun(oldParRun); @@ -152,9 +159,6 @@ void Foam::parLagrangianDistributor::findClouds if (isCloud) { // Has coordinates/positions - so must be a valid cloud - - const label cloudi = cloudNames.find(localCloudName); - objectNames[cloudi] = localObjs.sortedNames(); } } @@ -333,9 +337,24 @@ Foam::parLagrangianDistributor::distributeLagrangianPositions const word& cloudName ) const { - // Load cloud and send particle + // Mixed exists/missing on various ranks? + // Avoid masterRead+broadcast (can cause blocking) + + auto& handler = Foam::fileHandler(); + const bool oldDistributed = + handler.distributed + ( + !fileOperation::cacheLevel() || handler.distributed() + ); + + + // Load cloud passivePositionParticleCloud lpi(srcMesh_, cloudName, false); + // Restore distributed flag + handler.distributed(oldDistributed); + + // Distribute particles to other ranks return distributeLagrangianPositions(lpi); } diff --git a/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H b/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H index bb118558f56..8fa5d1143b9 100644 --- a/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H +++ b/applications/utilities/parallelProcessing/redistributePar/redistributeLagrangian.H @@ -34,6 +34,7 @@ Description #include "parLagrangianDistributor.H" #include "unmappedPassivePositionParticleCloud.H" +#include "fileOperation.H" // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // @@ -59,6 +60,17 @@ readLagrangian (void)mesh.tetBasePtIs(); } + // Mixed exists/missing on various ranks? + // Avoid masterRead+broadcast (can cause blocking) + + auto& handler = Foam::fileHandler(); + const bool oldDistributed = + handler.distributed + ( + !fileOperation::cacheLevel() || handler.distributed() + ); + + // Setup clouds forAll(cloudNames, i) { @@ -88,6 +100,9 @@ readLagrangian ); } + // Restore distributed flag + handler.distributed(oldDistributed); + return clouds; } @@ -164,6 +179,16 @@ void reconstructLagrangian } const auto& distributor = *distributorPtr; + // Mixed exists/missing on various ranks? + // Avoid masterRead+broadcast (can cause blocking) + + auto& handler = Foam::fileHandler(); + const bool oldDistributed = + handler.distributed + ( + !fileOperation::cacheLevel() || handler.distributed() + ); + forAll(cloudNames, cloudi) { const word& cloudName = cloudNames[cloudi]; @@ -171,6 +196,12 @@ void reconstructLagrangian Info<< "Reconstructing lagrangian fields for cloud " << cloudName << nl << endl; + autoPtr<mapDistributeBase> lagrangianMapPtr = + distributor.distributeLagrangianPositions + ( + cloudName + ); + IOobjectList cloudObjs ( mesh, @@ -178,12 +209,6 @@ void reconstructLagrangian cloud::prefix/cloudName ); - autoPtr<mapDistributeBase> lagrangianMapPtr = - distributor.distributeLagrangianPositions - ( - cloudName - ); - distributor.distributeAllFields ( lagrangianMapPtr(), @@ -193,6 +218,9 @@ void reconstructLagrangian selectedFields ); } + + // Restore distributed flag + handler.distributed(oldDistributed); } diff --git a/applications/utilities/parallelProcessing/redistributePar/redistributePar.C b/applications/utilities/parallelProcessing/redistributePar/redistributePar.C index a2f3227bdbf..41c45095bbe 100644 --- a/applications/utilities/parallelProcessing/redistributePar/redistributePar.C +++ b/applications/utilities/parallelProcessing/redistributePar/redistributePar.C @@ -1440,24 +1440,25 @@ int main(int argc, char *argv[]) else { // Directory does not exist. If this happens on master -> decompose mode - if (UPstream::master() && !reconstruct) + if (UPstream::master() && !reconstruct && !decompose) { decompose = true; InfoOrPout << "No processor directories; switching on decompose mode" - << nl << endl; + << nl << endl; } } // If master changed to decompose mode make sure all nodes know about it Pstream::broadcast(decompose); if (decompose) { - // The UPstream::nProcs is either the source or destination procs - fileOperation::nProcsFilter(UPstream::nProcs()); - InfoOrPout<< "Switching to exact matching for " - << fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs()) - << " processor directories" - << nl << endl; + // The UPstream::nProcs is either the source or destination procs + fileOperation::nProcsFilter(UPstream::nProcs()); + InfoOrPout + << "Switching to exact matching for " + << fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs()) + << " processor directories" + << nl << endl; } diff --git a/src/OpenFOAM/global/argList/argList.C b/src/OpenFOAM/global/argList/argList.C index 68a67b2c095..a94437514a6 100644 --- a/src/OpenFOAM/global/argList/argList.C +++ b/src/OpenFOAM/global/argList/argList.C @@ -1711,21 +1711,21 @@ void Foam::argList::parse } - if (nProcDirs != Pstream::nProcs()) + if (nProcDirs < UPstream::nProcs()) { FatalError << "number of processor directories = " << nProcDirs << " is not equal to the number of processors = " - << Pstream::nProcs() + << UPstream::nProcs() << exit(FatalError); } } // Distribute the master's argument list (unaltered) - for (const int subproci : Pstream::subProcs()) + for (const int proci : UPstream::subProcs()) { - OPstream toProc(Pstream::commsTypes::scheduled, subproci); + OPstream toProc(UPstream::commsTypes::scheduled, proci); toProc << args_ << options_ diff --git a/tutorials/mesh/parallel/filter/Allrun b/tutorials/mesh/parallel/filter/Allrun index 8a56887610c..96035668dc2 100755 --- a/tutorials/mesh/parallel/filter/Allrun +++ b/tutorials/mesh/parallel/filter/Allrun @@ -3,6 +3,10 @@ cd "${0%/*}" || exit # Run from this directory . ${WM_PROJECT_DIR:?}/bin/tools/RunFunctions # Tutorial run functions #------------------------------------------------------------------------------ +fileHandler="-fileHandler collated" +unset fileHandler +## decompDict5="-decomposeParDict system/decomposeParDict.5" + # Create mesh runApplication blockMesh @@ -18,12 +22,29 @@ runApplication createBaffles -overwrite runApplication $(getApplication) #- RedistributePar to do decomposition -runParallel redistributePar -decompose -cellDist +runParallel redistributePar -decompose -cellDist $fileHandler #- Continue running for a bit more -runParallel -s parallel $(getApplication) +runParallel -s parallel $(getApplication) $fileHandler + +if : +then + #- Reconstruct all times + runParallel -s reconstruct \ + redistributePar -reconstruct $fileHandler + +else + # Not yet entirely working... + + #- Send to more ranks + runParallel -s more-ranks $decompDict5 redistributePar $fileHandler + + #- Continue running for a bit more + runParallel -s more-ranks $decompDict5 $(getApplication) $fileHandler -#- Reconstruct all times -runParallel -s 1 redistributePar -reconstruct + #- Reconstruct all times + runParallel -s reconstruct $decompDict5 \ + redistributePar -reconstruct $fileHandler -latestTime +fi #------------------------------------------------------------------------------ diff --git a/tutorials/mesh/parallel/filter/system/decomposeParDict.5 b/tutorials/mesh/parallel/filter/system/decomposeParDict.5 new file mode 100644 index 00000000000..daa707df3d1 --- /dev/null +++ b/tutorials/mesh/parallel/filter/system/decomposeParDict.5 @@ -0,0 +1,32 @@ +/*--------------------------------*- C++ -*----------------------------------*\ +| ========= | | +| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox | +| \\ / O peration | Version: v2312 | +| \\ / A nd | Website: www.openfoam.com | +| \\/ M anipulation | | +\*---------------------------------------------------------------------------*/ +FoamFile +{ + version 2.0; + format ascii; + class dictionary; + object decomposeParDict; +} +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +numberOfSubdomains 5; + +method scotch; + +constraints +{ + //- Keep owner and neighbour on same processor for faces in zones: + faces + { + type preserveFaceZones; + zones (cycLeft cycRight); + } +} + + +// ************************************************************************* // -- GitLab