diff --git a/src/petsc4Foam/solvers/petscSolver.C b/src/petsc4Foam/solvers/petscSolver.C
index d7e6cdb453c228849727c4040c22c963eccc01df..c22d93b8eaa63278e342fd6e77fb31c295c1362c 100644
--- a/src/petsc4Foam/solvers/petscSolver.C
+++ b/src/petsc4Foam/solvers/petscSolver.C
@@ -7,6 +7,7 @@
 -------------------------------------------------------------------------------
     Copyright (C) 2018-2020 OpenCFD Ltd.
     Copyright (C) 2019-2020 Simone Bna
+    Copyright (C) 2021 Stefano Zampini
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -473,92 +474,206 @@ void Foam::petscSolver::buildMat
     MatSetOptionsPrefix(Amat, prefix_.c_str());
     MatSetFromOptions(Amat);
 
-    // On-processor non-zero entries, per row.
-    // Initialise with 1 (includes diagonal).
-    integerList nonZero_(nrows_, 1);
-    lowNonZero.resize(nrows_, 0);
-
-    // Number of on-processor non-zeros
-    // set the matrix options
-
-    if (!is_sbaij(Amat))
+    // Query for fast COO support
+    PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],const PetscInt[]) = NULL;
+    PetscObjectQueryFunction((PetscObject)Amat,"MatSetPreallocationCOO_C",&f);
+    if (f)
     {
-        for (const label celli : upp)
+        const globalIndex globalNumbering_(nrows_);
+
+        // Number of non-zeros from interfaces (on-processor or off-processor)
+        const label nReq = Pstream::nRequests();
+        label nProcValues = 0;
+        labelList globalCells
+        (
+            identity(globalNumbering_.range())
+        );
+        forAll(interfaces, patchi)
         {
-            ++nonZero_[celli];
+            const lduInterface* intf = interfaces.set(patchi);
+
+            if (intf)
+            {
+                if (isA<cyclicAMILduInterface>(*intf))
+                {
+                    FatalErrorInFunction
+                        << "cyclicAMI is not supported" << nl
+                        << exit(FatalError);
+                }
+                nProcValues += lduAddr.patchAddr(patchi).size();
+                interfaces[patchi].initInternalFieldTransfer
+                (
+                    Pstream::commsTypes::nonBlocking,
+                    globalCells
+                );
+            }
         }
-    }
+        PetscInt toti = nrows_+low.size()+upp.size()+nProcValues;
 
-    for (const label celli : low)
-    {
-        ++nonZero_[celli];
-        ++lowNonZero[celli];
-    }
+        PetscInt *coo_i, *coo_j;
+        PetscMalloc1(toti, &coo_i);
+        PetscMalloc1(toti, &coo_j);
 
-    maxLowNonZeroPerRow = max(lowNonZero);
+        PetscInt *ptr_i = coo_i;
+        PetscInt *ptr_j = coo_j;
+        label off = globalNumbering_.localStart();
+        for (label celli = off; celli < off + nrows_; ++celli)
+        {
+            const PetscInt globRow = celli;
+            *ptr_i++ = globRow;
+            *ptr_j++ = globRow;
+        }
+        for (label idx=0; idx < upp.size(); ++idx)
+        {
+            PetscInt globRow = low[idx] + off;
+            PetscInt globCol = upp[idx] + off;
+            *ptr_i++ = globRow;
+            *ptr_j++ = globCol;
+        }
+        for (label idx=0; idx < low.size(); ++idx)
+        {
+            PetscInt globRow = upp[idx] + off;
+            PetscInt globCol = low[idx] + off;
+            *ptr_i++ = globRow;
+            *ptr_j++ = globCol;
+        }
+        if (Pstream::parRun())
+        {
+            Pstream::waitRequests(nReq);
+        }
+        forAll(interfaces, patchi)
+        {
+            if (interfaces.set(patchi))
+            {
+                // Processor-local values
+                const labelUList& faceCells = lduAddr.patchAddr(patchi);
+                labelField nbrCells
+                (
+                    interfaces[patchi].internalFieldTransfer
+                    (
+                        Pstream::commsTypes::nonBlocking,
+                        globalCells
+                    )
+                );
 
-    // Off-processor non-zero entries, per row.
+                if (faceCells.size() != nbrCells.size())
+                {
+                    FatalErrorInFunction
+                        << "Mismatch in interface sizes (AMI?)" << nl
+                        << "Have " << faceCells.size() << " != "
+                        << nbrCells.size() << nl
+                        << exit(FatalError);
+                }
 
-    integerList nonZeroNeigh_(nrows_, 0);
+                forAll(faceCells, i)
+                {
+                    PetscInt globRow = faceCells[i] + off;
+                    PetscInt globCol = nbrCells[i];
+                    *ptr_i++ = globRow;
+                    *ptr_j++ = globCol;
+                }
+            }
+        }
 
-    // Number of non-zeros from interfaces (on-processor or off-processor)
-    forAll(interfaces, patchi)
+        // Preallocate with COO information
+        // Use interface call to get the timing logged
+#if PETSC_VERSION_GE(3,15,0)
+        MatSetPreallocationCOO(Amat,toti,coo_i,coo_j);
+#else
+        (*f)(Amat,toti,coo_i,coo_j);
+#endif
+        PetscFree(coo_i);
+        PetscFree(coo_j);
+    }
+    else
     {
-        const lduInterface* intf = interfaces.set(patchi);
+        // On-processor non-zero entries, per row.
+        // Initialise with 1 (includes diagonal).
+        integerList nonZero_(nrows_, 1);
+        lowNonZero.resize(nrows_, 0);
 
-        if (intf)
-        {
-            const labelUList& faceCells = lduAddr.patchAddr(patchi);
+        // Number of on-processor non-zeros
+        // set the matrix options
 
-            if (isA<cyclicAMILduInterface>(*intf))
+        if (!is_sbaij(Amat))
+        {
+            for (const label celli : upp)
             {
-                FatalErrorInFunction
-                    << "cyclicAMI is not supported" << nl
-                    << exit(FatalError);
+                ++nonZero_[celli];
             }
-            else if (isA<cyclicLduInterface>(*intf))
+        }
+
+        for (const label celli : low)
+        {
+            ++nonZero_[celli];
+            ++lowNonZero[celli];
+        }
+
+        maxLowNonZeroPerRow = max(lowNonZero);
+
+        // Off-processor non-zero entries, per row.
+
+        integerList nonZeroNeigh_(nrows_, 0);
+
+        // Number of non-zeros from interfaces (on-processor or off-processor)
+        forAll(interfaces, patchi)
+        {
+            const lduInterface* intf = interfaces.set(patchi);
+
+            if (intf)
             {
-                // Cyclic is on-processor
-                for (const label celli : faceCells)
+                const labelUList& faceCells = lduAddr.patchAddr(patchi);
+
+                if (isA<cyclicAMILduInterface>(*intf))
                 {
-                    ++nonZero_[celli];
+                    FatalErrorInFunction
+                        << "cyclicAMI is not supported" << nl
+                        << exit(FatalError);
                 }
-            }
-            else
-            {
-                // Off-processor
-                for (const label celli : faceCells)
+                else if (isA<cyclicLduInterface>(*intf))
                 {
-                    ++nonZeroNeigh_[celli];
+                    // Cyclic is on-processor
+                    for (const label celli : faceCells)
+                    {
+                        ++nonZero_[celli];
+                    }
+                }
+                else
+                {
+                    // Off-processor
+                    for (const label celli : faceCells)
+                    {
+                        ++nonZeroNeigh_[celli];
+                    }
                 }
             }
         }
-    }
 
-    // preallocate the matrix
-    MatXAIJSetPreallocation
-    (
-        Amat,
-        1,
-        nonZero_.data(),        // on-processor
-        nonZeroNeigh_.data(),   // off-processor
-        nonZero_.data(),
-        nonZeroNeigh_.data()
-    );
+        // preallocate the matrix
+        MatXAIJSetPreallocation
+        (
+            Amat,
+            1,
+            nonZero_.data(),        // on-processor
+            nonZeroNeigh_.data(),   // off-processor
+            nonZero_.data(),
+            nonZeroNeigh_.data()
+        );
 
-    // The general AIJ preallocator does not handle SELL
-    MatSeqSELLSetPreallocation
-    (
-        Amat,
-        0, nonZero_.data()        // on-processor
-    );
+        // The general AIJ preallocator does not handle SELL
+        MatSeqSELLSetPreallocation
+        (
+            Amat,
+            0, nonZero_.data()        // on-processor
+        );
 
-    MatMPISELLSetPreallocation
-    (
-        Amat,
-        0, nonZero_.data(),       // on-processor
-        0, nonZeroNeigh_.data()   // off-processor
-    );
+        MatMPISELLSetPreallocation
+        (
+            Amat,
+            0, nonZero_.data(),       // on-processor
+            0, nonZeroNeigh_.data()   // off-processor
+        );
+    }
 
     // set the matrix options
     if (matrix_.symmetric())
@@ -607,167 +722,218 @@ void Foam::petscSolver::updateMat
     // Local degrees-of-freedom i.e. number of local rows
     const label nrows_ = lduAddr.size();
 
-    // Number of internal faces (connectivity)
-    const label nIntFaces_ = upp.size();
-
-    const globalIndex globalNumbering_(nrows_);
-
-    // The diagonal
-    {
-        PetscInt globRow = globalNumbering_.toGlobal(0);
-
-        for (label celli = 0; celli < nrows_; ++celli)
-        {
-            PetscScalar val = diagVal[celli];
-
-            MatSetValues
-            (
-                Amat,
-                1, &globRow, // row
-                1, &globRow, // col
-                &val,
-                INSERT_VALUES
-            );
-            ++globRow;
-        }
-    }
-
-    // Connections to higher numbered cells (on-processor) (optimized form)
-    PetscInt globRow;
-    PetscInt globCols[maxLowNonZeroPerRow];
-    PetscScalar globVals[maxLowNonZeroPerRow];
-    label kidx = 0;
-
-    for (label idx=0; idx < nrows_; ++idx)
-    {
-        if (lowNonZero[idx] > 0)
-        {
-            globRow = globalNumbering_.toGlobal(low[kidx]);
-
-            for (label jidx=0; jidx<lowNonZero[idx]; ++jidx)
-            {
-                // The row is sorted, col is unsorted
-                globCols[jidx] = globalNumbering_.toGlobal(upp[kidx]);
-                globVals[jidx] = uppVal[kidx];
-                ++kidx;
-            }
-
-            MatSetValues
-            (
-                Amat,
-                1, &globRow,
-                lowNonZero[idx], globCols,
-                globVals,
-                INSERT_VALUES
-            );
-        }
-    }
-
-    if (!is_sbaij(Amat))
-    {
-        // Connections to lower numbered cells (on-processor)
-        for (label idx=0; idx < nIntFaces_; ++idx)
-        {
-            // The row is unsorted, col is sorted
-            PetscInt globRow = globalNumbering_.toGlobal(upp[idx]);
-            PetscInt globCol = globalNumbering_.toGlobal(low[idx]);
-            PetscScalar val = lowVal[idx];
-
-            MatSetValues
-            (
-                Amat,
-                1, &globRow,
-                1, &globCol,
-                &val,
-                INSERT_VALUES
-            );
-        }
-    }
-
-    labelList globalCells
-    (
-        identity
-        (
-            globalNumbering_.localSize(),
-            globalNumbering_.localStart()
-        )
-    );
-
-    // Connections to neighbouring processors
+    // Query for fast COO support
+    PetscErrorCode (*f)(Mat,const PetscScalar[],InsertMode) = NULL;
+    PetscObjectQueryFunction((PetscObject)Amat,"MatSetValuesCOO_C",&f);
+    if (f)
     {
-        // Initialise transfer of global cells
-        forAll(interfaces, patchi)
-        {
-            if (interfaces.set(patchi))
-            {
-                interfaces[patchi].initInternalFieldTransfer
-                (
-                    Pstream::commsTypes::nonBlocking,
-                    globalCells
-                );
-            }
-        }
-
-        if (Pstream::parRun())
-        {
-            Pstream::waitRequests();
-        }
-
-        forAll(interfaces, patchi)
-        {
-            if (interfaces.set(patchi))
-            {
-                // Processor-local values
+       label nProcValues = 0;
+       forAll(interfaces, patchi)
+       {
+          const lduInterface* intf = interfaces.set(patchi);
+
+          if (intf)
+          {
+             nProcValues += lduAddr.patchAddr(patchi).size();
+          }
+       }
+       PetscInt toti = nrows_+low.size()+upp.size()+nProcValues;
+
+       PetscScalar *coo_v;
+       PetscMalloc1(toti,&coo_v);
+       PetscArraycpy(coo_v,diagVal.cdata(),nrows_);
+       PetscArraycpy(coo_v+nrows_,uppVal.cdata(),upp.size());
+       PetscArraycpy(coo_v+nrows_+upp.size(),lowVal.cdata(),low.size());
+       if (nProcValues)
+       {
+          PetscScalar *ptr_v = coo_v+nrows_+low.size()+upp.size();
+          forAll(interfaces, patchi)
+          {
+             const lduInterface* intf = interfaces.set(patchi);
+
+             if (intf)
+             {
                 const labelUList& faceCells = lduAddr.patchAddr(patchi);
-
                 const scalarField& bCoeffs = interfaceBouCoeffs_[patchi];
-
-                labelField nbrCells
-                (
-                    interfaces[patchi].internalFieldTransfer
-                    (
-                        Pstream::commsTypes::nonBlocking,
-                        globalCells
-                    )
-                );
-
-
-                if (faceCells.size() != nbrCells.size())
-                {
-                    FatalErrorInFunction
-                        << "Mismatch in interface sizes (AMI?)" << nl
-                        << "Have " << faceCells.size() << " != "
-                        << nbrCells.size() << nl
-                        << exit(FatalError);
-                }
-
-                const label off = globalNumbering_.localStart();
-
                 // NB:opposite sign since we using this sides'
                 //    coefficients (from discretising our face; not the
                 //    neighbouring, reversed face)
-
                 forAll(faceCells, i)
                 {
-                    PetscInt globRow = faceCells[i] + off; // row is sorted
-                    PetscInt globCol = nbrCells[i];  // <- already global
-                    PetscScalar val = -bCoeffs[i];
-
-                    MatSetValues
-                    (
-                        Amat,
-                        1, &globRow,
-                        1, &globCol,
-                        &(val),
-                        INSERT_VALUES
-                    );
+                   *ptr_v++ = -bCoeffs[i];
                 }
-            }
-        }
+             }
+          }
+
+       }
+#if PETSC_VERSION_GE(3,15,0)
+       MatSetValuesCOO(Amat,coo_v,INSERT_VALUES);
+#else
+       (*f)(Amat,coo_v,INSERT_VALUES);
+#endif
+       PetscFree(coo_v);
+    } else {
+       // Number of internal faces (connectivity)
+       const label nIntFaces_ = upp.size();
+
+       const globalIndex globalNumbering_(nrows_);
+
+       // The diagonal
+       {
+           PetscInt globRow = globalNumbering_.toGlobal(0);
+
+           for (label celli = 0; celli < nrows_; ++celli)
+           {
+               PetscScalar val = diagVal[celli];
+
+               MatSetValues
+               (
+                   Amat,
+                   1, &globRow, // row
+                   1, &globRow, // col
+                   &val,
+                   INSERT_VALUES
+               );
+               ++globRow;
+           }
+       }
+
+       // Connections to higher numbered cells (on-processor) (optimized form)
+       PetscInt globRow;
+       PetscInt globCols[maxLowNonZeroPerRow];
+       PetscScalar globVals[maxLowNonZeroPerRow];
+       label kidx = 0;
+
+       for (label idx=0; idx < nrows_; ++idx)
+       {
+           if (lowNonZero[idx] > 0)
+           {
+               globRow = globalNumbering_.toGlobal(low[kidx]);
+
+               for (label jidx=0; jidx<lowNonZero[idx]; ++jidx)
+               {
+                   // The row is sorted, col is unsorted
+                   globCols[jidx] = globalNumbering_.toGlobal(upp[kidx]);
+                   globVals[jidx] = uppVal[kidx];
+                   ++kidx;
+               }
+
+               MatSetValues
+               (
+                   Amat,
+                   1, &globRow,
+                   lowNonZero[idx], globCols,
+                   globVals,
+                   INSERT_VALUES
+               );
+           }
+       }
+
+       if (!is_sbaij(Amat))
+       {
+           // Connections to lower numbered cells (on-processor)
+           for (label idx=0; idx < nIntFaces_; ++idx)
+           {
+               // The row is unsorted, col is sorted
+               PetscInt globRow = globalNumbering_.toGlobal(upp[idx]);
+               PetscInt globCol = globalNumbering_.toGlobal(low[idx]);
+               PetscScalar val = lowVal[idx];
+
+               MatSetValues
+               (
+                   Amat,
+                   1, &globRow,
+                   1, &globCol,
+                   &val,
+                   INSERT_VALUES
+               );
+           }
+       }
+
+       labelList globalCells
+       (
+           identity
+           (
+               globalNumbering_.localSize(),
+               globalNumbering_.localStart()
+           )
+       );
+
+       // Connections to neighbouring processors
+       {
+           // Initialise transfer of global cells
+           forAll(interfaces, patchi)
+           {
+               if (interfaces.set(patchi))
+               {
+                   interfaces[patchi].initInternalFieldTransfer
+                   (
+                       Pstream::commsTypes::nonBlocking,
+                       globalCells
+                   );
+               }
+           }
+
+           if (Pstream::parRun())
+           {
+               Pstream::waitRequests();
+           }
+
+           forAll(interfaces, patchi)
+           {
+               if (interfaces.set(patchi))
+               {
+                   // Processor-local values
+                   const labelUList& faceCells = lduAddr.patchAddr(patchi);
+
+                   const scalarField& bCoeffs = interfaceBouCoeffs_[patchi];
+
+                   labelField nbrCells
+                   (
+                       interfaces[patchi].internalFieldTransfer
+                       (
+                           Pstream::commsTypes::nonBlocking,
+                           globalCells
+                       )
+                   );
+
+
+                   if (faceCells.size() != nbrCells.size())
+                   {
+                       FatalErrorInFunction
+                           << "Mismatch in interface sizes (AMI?)" << nl
+                           << "Have " << faceCells.size() << " != "
+                           << nbrCells.size() << nl
+                           << exit(FatalError);
+                   }
+
+                   const label off = globalNumbering_.localStart();
+
+                   // NB:opposite sign since we using this sides'
+                   //    coefficients (from discretising our face; not the
+                   //    neighbouring, reversed face)
+
+                   forAll(faceCells, i)
+                   {
+                       PetscInt globRow = faceCells[i] + off; // row is sorted
+                       PetscInt globCol = nbrCells[i];  // <- already global
+                       PetscScalar val = -bCoeffs[i];
+
+                       MatSetValues
+                       (
+                           Amat,
+                           1, &globRow,
+                           1, &globCol,
+                           &(val),
+                           INSERT_VALUES
+                       );
+                   }
+               }
+           }
+       }
+       MatAssemblyBegin(Amat, MAT_FINAL_ASSEMBLY);
+       MatAssemblyEnd(Amat, MAT_FINAL_ASSEMBLY);
     }
-
-    MatAssemblyBegin(Amat, MAT_FINAL_ASSEMBLY);
-    MatAssemblyEnd(Amat, MAT_FINAL_ASSEMBLY);
 }