From 95a33c2f68f3a7bcc723c0d349bf44dbf318223b Mon Sep 17 00:00:00 2001
From: Mark Olesen <Mark.Olesen@esi-group.com>
Date: Thu, 14 Feb 2019 11:03:04 +0100
Subject: [PATCH] ENH: wordRes::uniq() removes all duplicates

- previously just removed duplicate literals, but now remove any
  duplicates.

- Replace previous wordHashSet implementation with a linear search
  instead. The lists are normally fairly small and mostly just have
  unique entries anyhow. This reduces the overall overhead.
---
 applications/test/wordRe/Test-wordRe.C        | 31 +++++++++-
 .../primitives/strings/wordRes/wordRes.C      | 61 ++++++++++++++-----
 .../primitives/strings/wordRes/wordRes.H      | 10 +--
 3 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/applications/test/wordRe/Test-wordRe.C b/applications/test/wordRe/Test-wordRe.C
index ca05244c5ef..8917e847648 100644
--- a/applications/test/wordRe/Test-wordRe.C
+++ b/applications/test/wordRe/Test-wordRe.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2017-2018 OpenCFD Ltd.
+    \\  /    A nd           | Copyright (C) 2017-2019 OpenCFD Ltd.
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
                             | Copyright (C) 2011-2016 OpenFOAM Foundation
@@ -36,6 +36,7 @@ Description
 #include "keyType.H"
 #include "wordRes.H"
 #include "predicates.H"
+#include "Random.H"
 
 using namespace Foam;
 
@@ -139,6 +140,34 @@ int main(int argc, char *argv[])
     Info<< "string match: "  << string("x.*")("xyz") << nl;
     Info<< "string match: "  << string("x.*")(keyre) << nl;
 
+
+    // Test uniq
+    {
+        Random rnd;
+        const label last = wres1.size()-1;
+
+        for (label i = 0; i < 8; ++i)
+        {
+            // Make a copy
+            wordRe wre(wres1[rnd.position<label>(0,last)]);
+
+            // Append
+            wres1.append(wre);
+        }
+
+        // Add some entropy
+        Foam::shuffle(wres1);
+
+        Info<< nl
+            << "Test uniq on " << wres1
+            << "  ==  " << wordRes::uniq(wres1) << nl;
+
+        // Inplace
+        wres1.uniq();
+        Info<< nl << "Inplace: " << wres1 << nl;
+    }
+    Info<< nl;
+
     wordRe(s1, wordRe::DETECT).info(Info) << nl;
     wordRe(s2).info(Info) << nl;
     wordRe(s2, wordRe::DETECT).info(Info) << nl;
diff --git a/src/OpenFOAM/primitives/strings/wordRes/wordRes.C b/src/OpenFOAM/primitives/strings/wordRes/wordRes.C
index 580753e73f0..79bdcdfe960 100644
--- a/src/OpenFOAM/primitives/strings/wordRes/wordRes.C
+++ b/src/OpenFOAM/primitives/strings/wordRes/wordRes.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2016-2018 OpenCFD Ltd.
+    \\  /    A nd           | Copyright (C) 2016-2019 OpenCFD Ltd.
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -24,26 +24,41 @@ License
 \*---------------------------------------------------------------------------*/
 
 #include "wordRes.H"
-#include "HashSet.H"
 
 // * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * //
 
 Foam::wordRes Foam::wordRes::uniq(const UList<wordRe>& input)
 {
     wordRes output(input.size());
-    wordHashSet uniqWord;
+
+    // Use linear List search instead of HashSet, since the lists are
+    // normally fairly small and mostly just have unique entries
+    // anyhow. This reduces the overall overhead.
+
+    List<bool> duplicate(input.size(), false);  // Track duplicates
 
     label count = 0;
-    for (const wordRe& select : input)
+
+    forAll(input, i)
     {
-        if (select.isPattern() || uniqWord.insert(select))
+        const wordRe& val = input[i];
+
+        const label next = input.find(val, i+1);
+
+        if (next > i)
         {
-            output[count] = select;
+            duplicate[next] = true;  // Duplicate
+        }
+
+        if (!duplicate[i])
+        {
+            output[count] = val;
             ++count;
         }
     }
 
     output.resize(count);
+
     return output;
 }
 
@@ -52,23 +67,37 @@ Foam::wordRes Foam::wordRes::uniq(const UList<wordRe>& input)
 
 void Foam::wordRes::uniq()
 {
-    wordHashSet uniqWord;
+    List<wordRe> input = *this;
+
+    wordRes& output = *this;
 
-    label i = 0, count = 0;
-    for (wordRe& select : *this)
+    // Use linear List search instead of HashSet, since the lists are
+    // normally fairly small and mostly just have unique entries
+    // anyhow. This reduces the overall overhead.
+
+    List<bool> duplicate(input.size(), false);  // Track duplicates
+
+    label count = 0;
+
+    forAll(input, i)
     {
-        if (select.isPattern() || uniqWord.insert(select))
+        wordRe& val = input[i];
+
+        const label next = input.find(val, i+1);
+
+        if (next > i)
         {
-            if (count != i)
-            {
-                (*this)[count] = std::move(select);
-            }
+            duplicate[next] = true;  // Duplicate
+        }
+
+        if (!duplicate[i])
+        {
+            output[count] = std::move(val);
             ++count;
         }
-        ++i;
     }
 
-    resize(count);
+    output.resize(count);
 }
 
 
diff --git a/src/OpenFOAM/primitives/strings/wordRes/wordRes.H b/src/OpenFOAM/primitives/strings/wordRes/wordRes.H
index ac0fa9c9c9f..c8385451b05 100644
--- a/src/OpenFOAM/primitives/strings/wordRes/wordRes.H
+++ b/src/OpenFOAM/primitives/strings/wordRes/wordRes.H
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2016-2018 OpenCFD Ltd.
+    \\  /    A nd           | Copyright (C) 2016-2019 OpenCFD Ltd.
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -101,8 +101,8 @@ public:
         //- Return a null wordRes - a reference to the NullObject
         inline static const wordRes& null();
 
-        //- Return a wordRes with duplicate words filtered out.
-        //  No filtering attempted on regular expressions.
+        //- Return a wordRes with duplicate entries filtered out.
+        //  No distinction made between literals or regular expressions.
         static wordRes uniq(const UList<wordRe>& input);
 
 
@@ -118,8 +118,8 @@ public:
 
     // Member Functions
 
-        //- Filter out duplicate words (inplace).
-        //  No filtering attempted on regular expressions.
+        //- Filter out duplicate entries (inplace).
+        //  No distinction made between literals or regular expressions.
         void uniq();
 
         //- Smart match as literal or regex, stopping on the first match.
-- 
GitLab