diff --git a/etc/caseDicts/annotated/decomposeParDict b/etc/caseDicts/annotated/decomposeParDict
index 5fcea6439e5a94b8874c936dd6044b494b64583a..9a685736cc73eb15576312c4619126791958dd6c 100644
--- a/etc/caseDicts/annotated/decomposeParDict
+++ b/etc/caseDicts/annotated/decomposeParDict
@@ -208,9 +208,7 @@ constraints
     patches
     {
         //- Keep owner and neighbour on same processor for faces in patches
-        //  (only makes sense for cyclic patches. Not suitable for e.g.
-        //   cyclicAMI since these are not coupled on the patch level. Use
-        //   singleProcessorFaceSets for those)
+        //  (only makes sense for cyclic patches and cyclicAMI)
         type    preservePatches;
         patches (".*");
         enabled false;
@@ -271,9 +269,7 @@ constraints
 // preserveFaceZones (heater solid1 solid3);
 
 //- Keep owner and neighbour on same processor for faces in patches:
-//  (makes sense only for cyclic patches. Not suitable for e.g. cyclicAMI
-//   since these are not coupled on the patch level. Use
-//   singleProcessorFaceSets for those)
+//  (only makes sense for cyclic patches and cyclicAMI)
 //preservePatches (cyclic_half0 cyclic_half1);
 
 //- Keep all of faceSet on a single processor. This puts all cells
diff --git a/etc/controlDict b/etc/controlDict
index f039ec1736c2385702ab5d81e615e16ab66635e6..fa8474e573afb634cfc58127f7c5ea3b235963ee 100644
--- a/etc/controlDict
+++ b/etc/controlDict
@@ -106,6 +106,10 @@ OptimisationSwitches
 
     //- collated: thread buffer size for queued file writes.
     //  If set to 0 or not sufficient for the file size, threading is not used.
+    //  A special setting is a negative value which assumes the buffer
+    //  (sized with magnitude of value) is large enough to hold all
+    //  outstanding writes so will not try to initialise the Pstream with
+    //  threading support.
     //  Default: 1e9
     maxThreadFileBufferSize 0;
 
diff --git a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C
index ee795672bf8416483154f7e3bdd23ff562b09633..ac00d96cca4c98f5b71148a847f89f75a6b88b66 100644
--- a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C
+++ b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C
@@ -984,7 +984,8 @@ bool Foam::decomposedBlockData::writeData(Ostream& os) const
             io.headerClassName(),
             io.note(),
             masterLocation,
-            name()
+            name(),
+            dictionary()
         );
     }
 
diff --git a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H
index 6e1f8e5df870bfb589e997db6cfc3232155df071..247a7006f2bb1ead542d904462ea913161b664f1 100644
--- a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H
+++ b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H
@@ -213,7 +213,7 @@ public:
             const string& note,
             const fileName& location,
             const word& objectName,
-            const dictionary* extraEntries = nullptr
+            const dictionary& extraEntries
         );
 
         //- Helper: write FoamFile IOobject header
diff --git a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockDataHeader.C b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockDataHeader.C
index 51313bc9726b0873e83ed7e572f5bcd7b943cc47..e4bdbf47324669f88f97266f7f0a4017f613e6c4 100644
--- a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockDataHeader.C
+++ b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockDataHeader.C
@@ -141,7 +141,7 @@ void Foam::decomposedBlockData::writeHeader
     const string& note,
     const fileName& location,
     const word& objectName,
-    const dictionary* extraEntries
+    const dictionary& extraEntries
 )
 {
     if (IOobject::bannerEnabled())
@@ -161,9 +161,9 @@ void Foam::decomposedBlockData::writeHeader
         objectName
     );
 
-    if (extraEntries)
+    if (!extraEntries.empty())
     {
-        extraEntries->writeEntries(os);
+        extraEntries.writeEntries(os);
     }
 
     os.endBlock();
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C
index b87f219426b2894d70e8065e18dc9c008b7b9d89..936a50db55e0dc0603fc7f13309e97de5bf46537 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.C
@@ -93,6 +93,14 @@ void Foam::UPstream::setParRun(const label nProcs, const bool haveThreads)
         Pout.prefix() = '[' +  name(myProcNo(comm)) + "] ";
         Perr.prefix() = '[' +  name(myProcNo(comm)) + "] ";
     }
+
+    if (debug)
+    {
+        Pout<< "UPstream::setParRun :"
+            << " nProcs:" << nProcs
+            << " haveThreads:" << haveThreads
+            << endl;
+    }
 }
 
 
diff --git a/src/OpenFOAM/global/argList/argList.C b/src/OpenFOAM/global/argList/argList.C
index 4199abcdaa431306bfb39a44a5e2a75f9742ac35..4a5d6e2537eb9d74ab82e34c7fa34c59f71602a0 100644
--- a/src/OpenFOAM/global/argList/argList.C
+++ b/src/OpenFOAM/global/argList/argList.C
@@ -778,7 +778,7 @@ Foam::argList::argList
     }
 
     // Detect any parallel options
-    bool needsThread = fileOperations::fileOperationInitialise::New
+    const bool needsThread = fileOperations::fileOperationInitialise::New
     (
         handlerType,
         argc,
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
index b5a3ced736ee1852c22dabe53197b1e352befa89..1fa2baca74a92f6edeaea31cfdd6f7057e6a3386 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
@@ -52,7 +52,7 @@ bool Foam::OFstreamCollator::writeFile
     const PtrList<SubList<char>>& slaveData,    // optional slave data
     IOstreamOption streamOpt,
     const bool append,
-    const dictionary* headerEntriesPtr
+    const dictionary& headerEntries
 )
 {
     if (debug)
@@ -96,7 +96,7 @@ bool Foam::OFstreamCollator::writeFile
                 "",             // note
                 "",             // location (leave empty instead inaccurate)
                 fName.name(),   // object name
-                headerEntriesPtr
+                headerEntries
             );
         }
     }
@@ -350,7 +350,7 @@ bool Foam::OFstreamCollator::write
     IOstreamOption streamOpt,
     const bool append,
     const bool useThread,
-    const dictionary* headerEntriesPtr
+    const dictionary& headerEntries
 )
 {
     // Determine (on master) sizes to receive. Note: do NOT use thread
@@ -389,7 +389,7 @@ bool Foam::OFstreamCollator::write
             dummySlaveData,
             streamOpt,
             append,
-            headerEntriesPtr
+            headerEntries
         );
     }
     else if (totalSize <= maxBufferSize_)
@@ -427,7 +427,7 @@ bool Foam::OFstreamCollator::write
                 recvSizes,
                 streamOpt,
                 append,
-                headerEntriesPtr
+                headerEntries
             )
         );
         writeData& fileAndData = fileAndDataPtr();
@@ -552,7 +552,7 @@ bool Foam::OFstreamCollator::write
                     recvSizes,
                     streamOpt,
                     append,
-                    headerEntriesPtr
+                    headerEntries
                 )
             );
 
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.H b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.H
index 1146cff2b4e0ff0dd0a78840ce36709fdbfd27dc..dfcfdae23671dc244d661d1f4d04bfb940bbaf4b 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.H
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.H
@@ -57,15 +57,13 @@ SourceFiles
 #include "labelList.H"
 #include "FIFOStack.H"
 #include "SubList.H"
+#include "dictionary.H"
 
 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
 
 namespace Foam
 {
 
-// Forward Declarations
-class dictionary;
-
 /*---------------------------------------------------------------------------*\
                         Class OFstreamCollator Declaration
 \*---------------------------------------------------------------------------*/
@@ -84,7 +82,7 @@ class OFstreamCollator
             PtrList<List<char>> slaveData_;
             const IOstreamOption streamOpt_;
             const bool append_;
-            const dictionary* headerEntries_;
+            const dictionary headerEntries_;
 
             writeData
             (
@@ -95,7 +93,7 @@ class OFstreamCollator
                 const labelList& sizes,
                 IOstreamOption streamOpt,
                 const bool append,
-                const dictionary* headerEntriesPtr = nullptr
+                const dictionary& headerEntries
             )
             :
                 comm_(comm),
@@ -106,7 +104,7 @@ class OFstreamCollator
                 slaveData_(),
                 streamOpt_(streamOpt),
                 append_(append),
-                headerEntries_(headerEntriesPtr)
+                headerEntries_(headerEntries)
             {}
 
             //- The (approximate) size of master + any optional slave data
@@ -160,7 +158,7 @@ class OFstreamCollator
             const PtrList<SubList<char>>& slaveData,
             IOstreamOption streamOpt,
             const bool append,
-            const dictionary* headerEntriesPtr
+            const dictionary& headerEntries
         );
 
         //- Write all files in stack
@@ -204,7 +202,7 @@ public:
             IOstreamOption streamOpt,
             const bool append,
             const bool useThread = true,
-            const dictionary* headerEntriesPtr = nullptr
+            const dictionary& headerEntries = dictionary::null
         );
 
         //- Wait for all thread actions to have finished
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C
index 85c1536cc443d9bd137c1265314547512e3b5a22..3464f119aa6d43cb468196689301b397b2c931e6 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.C
@@ -274,7 +274,7 @@ Foam::fileOperations::collatedFileOperation::collatedFileOperation
         false
     ),
     myComm_(comm_),
-    writer_(maxThreadFileBufferSize, comm_),
+    writer_(mag(maxThreadFileBufferSize), comm_),
     nProcs_(Pstream::nProcs()),
     ioRanks_(ioRanks())
 {
@@ -295,7 +295,7 @@ Foam::fileOperations::collatedFileOperation::collatedFileOperation
 :
     masterUncollatedFileOperation(comm, false),
     myComm_(-1),
-    writer_(maxThreadFileBufferSize, comm),
+    writer_(mag(maxThreadFileBufferSize), comm),
     nProcs_(Pstream::nProcs()),
     ioRanks_(ioRanks)
 {
@@ -310,6 +310,9 @@ Foam::fileOperations::collatedFileOperation::collatedFileOperation
 
 Foam::fileOperations::collatedFileOperation::~collatedFileOperation()
 {
+    // Wait for any outstanding file operations
+    flush();
+
     if (myComm_ != -1 && myComm_ != UPstream::worldComm)
     {
         UPstream::freeCommunicator(myComm_);
@@ -460,7 +463,7 @@ bool Foam::fileOperations::collatedFileOperation::writeObject
         {
             // Re-check static maxThreadFileBufferSize variable to see
             // if needs to use threading
-            const bool useThread = (maxThreadFileBufferSize > 0);
+            const bool useThread = (maxThreadFileBufferSize != 0);
 
             if (debug)
             {
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.H b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.H
index 6d635ccb1809f699cd7db470a8c16d59c6a208b6..23d5335646200329718935fad73f95855d8ad384 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.H
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/collatedFileOperation.H
@@ -6,7 +6,7 @@
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
     Copyright (C) 2017 OpenFOAM Foundation
-    Copyright (C) 2019-2020 OpenCFD Ltd.
+    Copyright (C) 2019-2021 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -31,7 +31,12 @@ Description
     Version of masterUncollatedFileOperation that collates regIOobjects
     into a container in the processors/ subdirectory.
 
-    Uses threading if maxThreadFileBufferSize > 0.
+    Uses threading if maxThreadFileBufferSize != 0.
+        > 0 : Can use mpi inside thread to collect data if buffer is not
+              large enough. Does need full thread support inside MPI.
+
+        < 0 : special : -maxThreadFileBufferSize is guaranteed large enough
+              for all writing. Initialises MPI without thread support.
 
 See also
     masterUncollatedFileOperation
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/threadedCollatedOFstream.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/threadedCollatedOFstream.C
index bc62facfd2d5943bd198a36186918f1fe60be645..c00ab582138fc8dd77a8491ec8c1b56770fbde93 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/threadedCollatedOFstream.C
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/threadedCollatedOFstream.C
@@ -61,7 +61,7 @@ Foam::threadedCollatedOFstream::~threadedCollatedOFstream()
         IOstreamOption(IOstream::BINARY, version(), compression_),
         false,  // append=false
         useThread_,
-        &headerEntries_
+        headerEntries_
     );
 }
 
diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C
index edd9315e47a0d878dcc350800df12d61ef4f3c71..f36485840bda226d59b4e52abdd8b3ab0ee26d64 100644
--- a/src/Pstream/mpi/UPstream.C
+++ b/src/Pstream/mpi/UPstream.C
@@ -279,7 +279,15 @@ bool Foam::UPstream::init(int& argc, char**& argv, const bool needsThread)
 
     if (debug)
     {
-        Pout<< "UPstream::init : procs:" << numprocs
+        Pout<< "UPstream::init :"
+            << " thread-support : wanted:" << needsThread
+            << " obtained:"
+            <<  (
+                    provided_thread_support == MPI_THREAD_MULTIPLE
+                  ? "MPI_THREAD_MULTIPLE"
+                  : "MPI_THREAD_SINGLE"
+                )
+            << " procs:" << numprocs
             << " rank:" << myRank
             << " world:" << world << endl;
     }
diff --git a/src/overset/cellCellStencil/inverseDistance/waveMethod.C b/src/overset/cellCellStencil/inverseDistance/waveMethod.C
index ef54ca9de4fcaef79f20d588a2be95f396cf957a..e3dd8a9c5567489d8ac1d6c7e45dec205fe773e6 100644
--- a/src/overset/cellCellStencil/inverseDistance/waveMethod.C
+++ b/src/overset/cellCellStencil/inverseDistance/waveMethod.C
@@ -5,7 +5,7 @@
     \\  /    A nd           | www.openfoam.com
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
-    Copyright (C) 2017-2020 OpenCFD Ltd.
+    Copyright (C) 2017-2021 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -129,7 +129,7 @@ void Foam::waveMethod::calculate
                 changedFacesInfo,
                 faceData,
                 cellData,
-                src.globalData().nTotalCells(),   // max iterations
+                src.globalData().nTotalCells()+1,   // max iterations
                 td
             );
         }
diff --git a/tutorials/basic/laplacianFoam/implicitAMI/system/decomposeParDict b/tutorials/basic/laplacianFoam/implicitAMI/system/decomposeParDict
index a1ee695090139c706ea390e32d9ee2c6eb0d796f..fa80348b2e897ca123a5d6e9adc0b56197f467b6 100644
--- a/tutorials/basic/laplacianFoam/implicitAMI/system/decomposeParDict
+++ b/tutorials/basic/laplacianFoam/implicitAMI/system/decomposeParDict
@@ -22,4 +22,16 @@ numberOfSubdomains  2;
 method          hierarchical;
 n               (2 1 1);
 
+constraints
+{
+    patches
+    {
+        //- Keep owner and neighbour on same processor for faces in patches
+        //  (only makes sense for cyclic patches and cyclicAMI)
+        type    preservePatches;
+        patches (".*");
+    }
+}
+
+
 // ************************************************************************* //