From 93b51a524cea1b4b7938bd677526beea7ea9bcb3 Mon Sep 17 00:00:00 2001
From: mark <mark@opencfd>
Date: Tue, 10 Jan 2017 12:42:40 +0100
Subject: [PATCH] ENH: optionally eliminate duplicates on hashedWordList
 construction (issue #375)

- makes it easier to use as a wordHashSet replacement for situations
  where we want to avoid duplicates but retain the input order.

- support construction from HashTable, which means it works like the
  HashTable::sortedToc but with its own hashing for these keys.

- expose rehash() method for the user. There is normally no need for
  using it directly, but also no reason to lock it away as private.
---
 applications/test/HashSet/Test-hashSet.C      |  22 ++-
 applications/test/mesh/Test-mesh.C            |   1 -
 .../primitives/strings/lists/hashedWordList.C | 141 +++++-------------
 .../primitives/strings/lists/hashedWordList.H |  98 ++++++++----
 .../strings/lists/hashedWordListI.H           | 120 ++++++++++++++-
 5 files changed, 245 insertions(+), 137 deletions(-)

diff --git a/applications/test/HashSet/Test-hashSet.C b/applications/test/HashSet/Test-hashSet.C
index bc219e9b968..dcfa0aec7a3 100644
--- a/applications/test/HashSet/Test-hashSet.C
+++ b/applications/test/HashSet/Test-hashSet.C
@@ -42,7 +42,7 @@ int main(int argc, char *argv[])
         "def",
         "ghi"
     };
-    words = { "def", "ghi", "xy", "all", "begin", "all" };
+    words = { "def", "ghi", "xy", "all", "end", "all" };
 
     wordHashSet setA
     {
@@ -84,6 +84,26 @@ int main(int argc, char *argv[])
     Info<< "hashedWordList: " << words << nl
         << "with lookup: "  << words.lookup() << endl;
 
+    {
+        List<word> input = { "def", "ghi", "xy", "all", "end", "all", "def" };
+        hashedWordList words1(input, true);
+
+        Info<< "input word list: " << input << nl
+            << "without dup: "  << words1 << endl;
+
+        Info<< "from wordHashSet: " << hashedWordList(setA) << endl;
+        Info<< "from HashTable: " << hashedWordList(tableA) << endl;
+        Info<< "from HashTable: " << hashedWordList(tableB) << endl;
+
+        // even this works
+        Info<< "from hashSet: "
+            << hashedWordList
+               (
+                   wordHashSet(setA)
+                 | wordHashSet(tableA) | wordHashSet(tableB)
+               ) << endl;
+    }
+
     Info<< "wordHashSet: "    << setA << endl;
     Info<< "Table-HashSet: "  << tableA << endl;
     Info<< "Map<label>: "     << mapA << endl;
diff --git a/applications/test/mesh/Test-mesh.C b/applications/test/mesh/Test-mesh.C
index 8aa69045ef8..7af9de9f14d 100644
--- a/applications/test/mesh/Test-mesh.C
+++ b/applications/test/mesh/Test-mesh.C
@@ -52,7 +52,6 @@ int main(int argc, char *argv[])
     );
 
 
-    
     Info<< "Cell centres" << nl << mesh.cellCentres() << endl;
     Info<< "Cell volumes" << nl << mesh.cellVolumes() << endl;
     Info<< "Cell shapes" << nl << mesh.cellShapes() << endl;
diff --git a/src/OpenFOAM/primitives/strings/lists/hashedWordList.C b/src/OpenFOAM/primitives/strings/lists/hashedWordList.C
index ea744b0036c..d1a8a8ac9fe 100644
--- a/src/OpenFOAM/primitives/strings/lists/hashedWordList.C
+++ b/src/OpenFOAM/primitives/strings/lists/hashedWordList.C
@@ -3,7 +3,7 @@
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
     \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
-     \\/     M anipulation  | Copyright (C) 2016 OpenCFD Ltd.
+     \\/     M anipulation  | Copyright (C) 2016-2017 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -25,94 +25,46 @@ License
 
 #include "hashedWordList.H"
 
-// * * * * * * * * * * * * * Private Member Functions  * * * * * * * * * * * //
-
-void Foam::hashedWordList::rehash()
-{
-    indices_.clear();
-    forAll(*this, i)
-    {
-        indices_.insert(List<word>::operator[](i), i);
-    }
-}
-
-
 // * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
 
-Foam::hashedWordList::hashedWordList()
-:
-    List<word>()
-{}
-
-
-Foam::hashedWordList::hashedWordList(const UList<word>& names)
-:
-    List<word>(names)
-{
-    rehash();
-}
-
-
-Foam::hashedWordList::hashedWordList(const hashedWordList& names)
-:
-    List<word>(static_cast<const UList<word>&>(names))
-{
-    rehash();
-}
-
-
-Foam::hashedWordList::hashedWordList(const Xfer<List<word>>& names)
-:
-    List<word>(names)
-{
-    rehash();
-}
-
-
-Foam::hashedWordList::hashedWordList(std::initializer_list<word> lst)
-:
-    List<word>(lst)
-{
-    rehash();
-}
-
-
 Foam::hashedWordList::hashedWordList
 (
-    const label nNames,
-    const char** names
+    const label count,
+    const char** lst,
+    const bool removeDuplicates
 )
 :
-    List<word>(nNames)
+    List<word>(count)
 {
     forAll(*this, i)
     {
-        List<word>::operator[](i) = names[i];
+        List<word>::operator[](i) = lst[i];
     }
 
-    rehash();
+    rehash(removeDuplicates);
 }
 
 
 Foam::hashedWordList::hashedWordList
 (
-    const char** names
+    const char** lst,
+    const bool removeDuplicates
 )
 {
-    // count names
-    label nNames = 0;
-    for (unsigned i = 0; names[i] && *(names[i]); ++i)
+    // Determine the number of entries
+    label count = 0;
+    for (unsigned i = 0; lst[i] && *(lst[i]); ++i)
     {
-        ++nNames;
+        ++count;
     }
 
-    List<word>::setSize(nNames);
+    List<word>::setSize(count);
     forAll(*this, i)
     {
-        List<word>::operator[](i) = names[i];
+        List<word>::operator[](i) = lst[i];
     }
 
-    rehash();
+    rehash(removeDuplicates);
 }
 
 
@@ -124,59 +76,48 @@ Foam::hashedWordList::hashedWordList(Istream& is)
 
 // * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * * //
 
-void Foam::hashedWordList::clear()
-{
-    List<word>::clear();
-    indices_.clear();
-}
-
-
-void Foam::hashedWordList::append(const word& name)
-{
-    const label idx = size();
-    List<word>::append(name);
-    indices_.insert(name, idx);
-}
-
-
-void Foam::hashedWordList::transfer(List<word>& lst)
+void Foam::hashedWordList::transfer
+(
+    List<word>& lst,
+    const bool removeDuplicates
+)
 {
     List<word>::transfer(lst);
-    rehash();
+    rehash(removeDuplicates);
 }
 
 
-void Foam::hashedWordList::sort()
+void Foam::hashedWordList::rehash() const
 {
-    Foam::sort(*this);
-    rehash();
+    indices_.clear();
+
+    forAll(*this, i)
+    {
+        indices_.insert(List<word>::operator[](i), i);
+    }
 }
 
 
 void Foam::hashedWordList::uniq()
 {
-    if (size() != indices_.size())
+    indices_.clear();
+
+    label nElem = 0;
+    forAll(*this, i)
     {
-        // sizes don't match, which means there appear to be duplicates
+        const word& item = List<word>::operator[](i);
 
-        indices_.clear();
-        label nElem = 0;
-        forAll(*this, i)
+        if (indices_.insert(item, nElem))
         {
-            const word& item = List<word>::operator[](i);
-
-            if (indices_.insert(item, nElem))
+            if (nElem != i)
             {
-                if (nElem != i)
-                {
-                    List<word>::operator[](nElem) = item;
-                }
-                ++nElem;
+                List<word>::operator[](nElem) = item;
             }
+            ++nElem;
         }
-
-        List<word>::setSize(nElem);
     }
+
+    List<word>::setSize(nElem);
 }
 
 
@@ -193,7 +134,7 @@ Foam::Istream& Foam::operator>>(Istream& is, hashedWordList& lst)
 
 Foam::Ostream& Foam::operator<<(Ostream& os, const hashedWordList& lst)
 {
-    os  << static_cast<const List<word>&>(lst);
+    os  << static_cast<const UList<word>&>(lst);
     return os;
 }
 
diff --git a/src/OpenFOAM/primitives/strings/lists/hashedWordList.H b/src/OpenFOAM/primitives/strings/lists/hashedWordList.H
index d36b9721b20..733d012b92b 100644
--- a/src/OpenFOAM/primitives/strings/lists/hashedWordList.H
+++ b/src/OpenFOAM/primitives/strings/lists/hashedWordList.H
@@ -3,7 +3,7 @@
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
     \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
-     \\/     M anipulation  | Copyright (C) 2016 OpenCFD Ltd.
+     \\/     M anipulation  | Copyright (C) 2016-2017 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -25,7 +25,7 @@ Class
     Foam::hashedWordList
 
 Description
-    A wordList with hashed indices for faster lookup by name.
+    A wordList with hashed indices for additional fast lookup by name.
 
 SourceFiles
     hashedWordListI.H
@@ -62,38 +62,64 @@ class hashedWordList
     // Private data
 
         //- Hash of words/indices
-        HashTable<label,word> indices_;
+        mutable HashTable<label,word> indices_;
 
 
     // Private Member Functions
 
-        //- Rebuild the hash of indices
-        void rehash();
+        //- Rebuild the lookup hash or make unique entries first.
+        inline void rehash(const bool unique);
 
 public:
 
     // Constructors
 
         //- Construct null
-        hashedWordList();
+        inline hashedWordList();
 
         //- Copy constructor.
-        hashedWordList(const hashedWordList&);
-
-        //- Construct from list of words
-        hashedWordList(const UList<word>&);
+        inline hashedWordList(const hashedWordList& lst);
+
+        //- Construct from list of words,
+        //  optionally eliminating duplicates
+        inline hashedWordList
+        (
+            const UList<word>& lst,
+            const bool removeDuplicates=false
+        );
+
+        //- Construct by transferring the parameter contents,
+        //  optionally eliminating duplicates
+        inline hashedWordList
+        (
+            const Xfer<List<word>>& lst,
+            const bool removeDuplicates=false
+        );
 
         //- Construct from an initializer list
-        hashedWordList(std::initializer_list<word>);
-
-        //- Construct by transferring the parameter contents
-        hashedWordList(const Xfer<List<word>>&);
-
-        //- Construct from number and list of names
-        hashedWordList(const label nNames, const char** names);
-
-        //- Construct from a nullptr-terminated list of names
-        hashedWordList(const char** names);
+        inline hashedWordList(std::initializer_list<word>);
+
+        //- Construct from the word keys of any HashTable, sorting immediately.
+        //  This also handles a wordHashSet, which is derived from a HashTable.
+        //  The result is similar to a HashTable::sortedToc.
+        template<class AnyType, class AnyHash>
+        explicit inline hashedWordList
+        (
+            const HashTable<AnyType, word, AnyHash>& h
+        );
+
+        //- Construct from number and list of words,
+        //  optionally eliminating duplicates
+        hashedWordList
+        (
+            const label count,
+            const char** lst,
+            const bool removeDuplicates=false
+        );
+
+        //- Construct from a nullptr-terminated list of words,
+        //  optionally eliminating duplicates
+        hashedWordList(const char** lst, const bool removeDuplicates=false);
 
         //- Construct from Istream
         hashedWordList(Istream&);
@@ -102,47 +128,53 @@ public:
     // Member Functions
 
         //- Clear the list, i.e. set size to zero.
-        void clear();
+        inline void clear();
 
-        //- Append an element at the end of the list
-        void append(const word&);
+        //- Append an element at the end of the list,
+        //  optionally avoid append if it would be a duplicate entry
+        inline void append(const word& name, const bool avoidDuplicates=false);
 
         //- Does the list contain the specified name
-        inline bool found(const word&) const;
+        inline bool found(const word& name) const;
 
         //- Does the list contain the specified name
-        inline bool contains(const word&) const;
+        inline bool contains(const word& name) const;
 
         //- Return the hash of words/indices for inspection
         inline const HashTable<label,word>& lookup() const;
 
         //- Transfer the contents of the argument List into this list
-        //  and annul the argument list.
-        void transfer(List<word>&);
+        //  and annul the argument list,
+        //  optionally eliminating duplicates
+        void transfer(List<word>& lst, const bool removeDuplicates=false);
+
+        //- Rebuild the lookup hash indices
+        void rehash() const;
 
         //- Sort the list and rehash the indices
-        void sort();
+        inline void sort();
 
-        //- Adjust the list if necessary to eliminate duplicate entries
+        //- Adjust the list if necessary to eliminate duplicate entries,
+        //  and rehash the indices
         void uniq();
 
 
     // Member Operators
 
         //- Assignment operator from list of words
-        inline void operator=(const UList<word>&);
+        inline void operator=(const UList<word>& lst);
 
         //- Assignment operator from initializer list
-        inline void operator=(std::initializer_list<word>);
+        inline void operator=(std::initializer_list<word> lst);
 
         //- Assignment operator.
-        inline void operator=(const hashedWordList&);
+        inline void operator=(const hashedWordList& lst);
 
         //- Return name corresponding to specified index
         inline const word& operator[](const label index) const;
 
         //- Return index corresponding to specified name
-        inline label operator[](const word&) const;
+        inline label operator[](const word& name) const;
 
 
     // Istream operators
diff --git a/src/OpenFOAM/primitives/strings/lists/hashedWordListI.H b/src/OpenFOAM/primitives/strings/lists/hashedWordListI.H
index 9dc7e5790d9..b011b26ecd7 100644
--- a/src/OpenFOAM/primitives/strings/lists/hashedWordListI.H
+++ b/src/OpenFOAM/primitives/strings/lists/hashedWordListI.H
@@ -3,7 +3,7 @@
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
     \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
-     \\/     M anipulation  | Copyright (C) 2016 OpenCFD Ltd.
+     \\/     M anipulation  | Copyright (C) 2016-2017 OpenCFD Ltd.
 -------------------------------------------------------------------------------
 License
     This file is part of OpenFOAM.
@@ -23,8 +23,117 @@ License
 
 \*---------------------------------------------------------------------------*/
 
+// * * * * * * * * * * * * * Private Member Functions  * * * * * * * * * * * //
+
+inline void Foam::hashedWordList::rehash(const bool unique)
+{
+    if (unique)
+    {
+        uniq();
+    }
+    else
+    {
+        rehash();
+    }
+}
+
+
+// * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
+
+inline Foam::hashedWordList::hashedWordList()
+:
+    List<word>(),
+    indices_()
+{}
+
+
+inline Foam::hashedWordList::hashedWordList(const hashedWordList& lst)
+:
+    List<word>(static_cast<const UList<word>&>(lst))
+{
+    rehash();
+}
+
+
+inline Foam::hashedWordList::hashedWordList
+(
+    const UList<word>& lst,
+    const bool removeDuplicates
+)
+:
+    List<word>(lst)
+{
+    rehash(removeDuplicates);
+}
+
+
+inline Foam::hashedWordList::hashedWordList
+(
+    const Xfer<List<word>>& lst,
+    const bool removeDuplicates
+)
+:
+    List<word>(lst)
+{
+    rehash(removeDuplicates);
+}
+
+
+inline Foam::hashedWordList::hashedWordList(std::initializer_list<word> lst)
+:
+    List<word>(lst)
+{
+    rehash();
+}
+
+
+template<class AnyType, class AnyHash>
+inline Foam::hashedWordList::hashedWordList
+(
+    const HashTable<AnyType, word, AnyHash>& h
+)
+:
+    List<word>(h.size())
+{
+    label nElem = 0;
+    for
+    (
+        typename HashTable<AnyType, word, AnyHash>::const_iterator
+        iter = h.cbegin();
+        iter != h.cend();
+        ++iter
+    )
+    {
+        List<word>::operator[](nElem++) = iter.key();
+    }
+
+    this->sort();
+}
+
+
 // * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
 
+inline void Foam::hashedWordList::clear()
+{
+    List<word>::clear();
+    indices_.clear();
+}
+
+
+inline void Foam::hashedWordList::append
+(
+    const word& name,
+    const bool avoidDuplicates
+)
+{
+    // name is either unique or we don't care about duplicates
+    if (indices_.insert(name, size()) || !avoidDuplicates)
+    {
+        List<word>::append(name);
+    }
+}
+
+
 inline const Foam::HashTable<Foam::label,Foam::word>&
 Foam::hashedWordList::lookup() const
 {
@@ -44,6 +153,13 @@ inline bool Foam::hashedWordList::contains(const word& name) const
 }
 
 
+inline void Foam::hashedWordList::sort()
+{
+    Foam::sort(*this);
+    rehash();
+}
+
+
 // * * * * * * * * * * * * * * * Member Operators  * * * * * * * * * * * * * //
 
 inline void Foam::hashedWordList::operator=(const UList<word>& lst)
@@ -75,9 +191,9 @@ inline const Foam::word& Foam::hashedWordList::operator[]
 }
 
 
-// could return -1 instead of bombing out
 inline Foam::label Foam::hashedWordList::operator[](const word& name) const
 {
+    // Could return -1 instead of bombing out
     return indices_[name];
 }
 
-- 
GitLab