Commit 48787737 authored by Mark Olesen's avatar Mark Olesen Committed by Andrew Heather
Browse files

ENH: reorganize regular expressions and add C++11 regex support

- new regExpCxx wrapper for C++11 regex support with drop-in
  compatibility with existing code.

- regExpPosix (was regExp), for future phase out in favour of regExpCxx.

- The regExp header will continue to be used for defining an
  appropriate typedef corresponding to the preferred implementation.
parent e0e04147
Test-regex.C
EXE = $(FOAM_USER_APPBIN)/Test-regex
Test-regex1.C
EXE = $(FOAM_USER_APPBIN)/Test-regex1
......@@ -2,10 +2,8 @@
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2017-2018 OpenCFD Ltd.
\\ / A nd | Copyright (C) 2017-2019 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
| Copyright (C) 2011-2016 OpenFOAM Foundation
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
......@@ -28,13 +26,14 @@ Description
\*---------------------------------------------------------------------------*/
#include "argList.H"
#include "IOstreams.H"
#include "IOobject.H"
#include "IFstream.H"
#include "regExp.H"
#include "SubStrings.H"
#include "Switch.H"
#include "regExpCxx.H"
#include "regExpPosix.H"
using namespace Foam;
......@@ -63,14 +62,16 @@ struct regexTest
}
};
// Needed for list output. Just treat everything as unequal.
bool operator!=(const struct regexTest&, const struct regexTest&)
{
return true;
}
// Simple output of match groups
static Ostream& operator<<(Ostream& os, const regExp::results_type& sm)
static Ostream& operator<<(Ostream& os, const regExpCxx::results_type& sm)
{
for (std::smatch::size_type i = 1; i < sm.size(); ++i)
{
......@@ -81,83 +82,31 @@ static Ostream& operator<<(Ostream& os, const regExp::results_type& sm)
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
// Main program:
int main(int argc, char *argv[])
// Simple output of match groups
static Ostream& operator<<(Ostream& os, const regExpPosix::results_type& sm)
{
List<regexTest> rawList(IFstream("testRegexps")());
Info<< "Test expressions:" << rawList << endl;
IOobject::writeDivider(Info) << endl;
regExp::results_type match;
// Expect some failures:
const bool throwingError = FatalError.throwExceptions();
// Report matches:
for (const auto& testseq : rawList)
for (std::smatch::size_type i = 1; i < sm.size(); ++i)
{
const bool expected = testseq.expected;
const string& pat = testseq.pattern;
const string& str = testseq.text;
Info<< "Test " << Switch(expected) << ": "
<< str << " =~ m/" << pat.c_str() << "/ == ";
regExp re;
try
{
re = pat;
if (re.match(str, match))
{
Info<< "true";
if (re.ngroups())
{
Info<< " (" << re.ngroups() << " groups):" << match;
}
}
else if (re.search(str))
{
Info<< "partial match";
}
else
{
Info<< "false";
}
Info<< endl;
}
catch (const Foam::error& err)
{
Info<< "Caught FatalError " << err << nl << endl;
continue;
}
if (false)
{
regExp re2(std::move(re));
Info<<"move construct: " << re.exists() << "/" << re2.exists()
<< endl;
os << " " << sm.str(i);
}
re = std::move(re2);
Info<<"move assign: " << re.exists() << "/" << re2.exists()
<< endl;
return os;
}
re.swap(re2);
Info<<"swap: " << re.exists() << "/" << re2.exists()
<< endl;
}
}
template<class RegexType>
void generalTests()
{
Info<< nl << "test regExp(const char*) ..." << endl;
string me("Mark");
// Expect some failures:
const bool throwingError = FatalError.throwExceptions();
try
{
// Handling of null strings
if (regExp(nullptr).match(me))
if (RegexType(nullptr).match(me))
{
Info<< "fail - matched: " << me << endl;
}
......@@ -174,7 +123,7 @@ int main(int argc, char *argv[])
try
{
// Normal match
if (regExp("[Mm]ar[ck]").match(me))
if (RegexType("[Mm]ar[ck]").match(me))
{
Info<< "pass - matched: " << me << endl;
}
......@@ -191,7 +140,7 @@ int main(int argc, char *argv[])
try
{
// Match ignore case
if (regExp("mar[ck]", true).match(me))
if (RegexType("mar[ck]", true).match(me))
{
Info<< "pass - matched: " << me << endl;
}
......@@ -208,7 +157,7 @@ int main(int argc, char *argv[])
try
{
// Embedded prefix for match ignore case
if (regExp("(?i)mar[ck]").match(me))
if (RegexType("(?i)mar[ck]").match(me))
{
Info<< "pass - matched: " << me << endl;
}
......@@ -225,7 +174,7 @@ int main(int argc, char *argv[])
try
{
// Handling of empty expression
if (regExp("").match(me))
if (RegexType("").match(me))
{
Info<< "fail - matched: " << me << endl;
}
......@@ -242,7 +191,7 @@ int main(int argc, char *argv[])
try
{
// Embedded prefix - but expression is empty
if (regExp("(?i)").match(me))
if (RegexType("(?i)").match(me))
{
Info<< "fail - matched: " << me << endl;
}
......@@ -257,6 +206,144 @@ int main(int argc, char *argv[])
}
FatalError.throwExceptions(throwingError);
}
template<class RegexType>
void testExpressions(const UList<regexTest>& tests)
{
typename RegexType::results_type match;
// Expect some failures:
const bool throwingError = FatalError.throwExceptions();
// Report matches:
for (const auto& testseq : tests)
{
const bool expected = testseq.expected;
const string& pat = testseq.pattern;
const string& str = testseq.text;
Info<< "Test " << Switch(expected) << ": "
<< str << " =~ m/" << pat.c_str() << "/ == ";
RegexType re;
try
{
re = pat;
if (re.match(str, match))
{
Info<< "true";
if (re.ngroups())
{
Info<< " (" << re.ngroups() << " groups):" << match;
}
}
else if (re.search(str))
{
Info<< "partial match";
}
else
{
Info<< "false";
}
Info<< endl;
}
catch (const Foam::error& err)
{
Info<< "Caught FatalError " << err << nl << endl;
continue;
}
if (false)
{
RegexType re2(std::move(re));
Info<<"move construct: " << re.exists() << "/" << re2.exists()
<< endl;
re = std::move(re2);
Info<<"move assign: " << re.exists() << "/" << re2.exists()
<< endl;
re.swap(re2);
Info<<"swap: " << re.exists() << "/" << re2.exists()
<< endl;
}
}
FatalError.throwExceptions(throwingError);
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
// Main program:
int main(int argc, char *argv[])
{
argList::noBanner();
argList::noFunctionObjects();
argList::noParallel();
argList::addBoolOption
(
"cxx",
"Test C++11 regular expressions"
);
argList::addBoolOption
(
"posix",
"Test POSIX regular expressions"
);
argList::addArgument("file");
argList::addArgument("...");
argList::addArgument("fileN");
argList::noMandatoryArgs();
#include "setRootCase.H"
if (!args.count({"cxx", "posix"}))
{
Info<< "Specified one or more of -cxx, -posix" << nl;
return 1;
}
if (args.size() < 2)
{
Info<< "No test files specified .. restrict to general tests" << nl;
if (args.found("cxx"))
{
generalTests<regExpCxx>();
}
if (args.found("posix"))
{
generalTests<regExpPosix>();
}
}
for (label argi = 1; argi < args.size(); ++argi)
{
List<regexTest> tests(IFstream(args[argi])());
Info<< "Test expressions:" << tests << endl;
IOobject::writeDivider(Info) << endl;
if (args.found("cxx"))
{
testExpressions<regExpCxx>(tests);
}
if (args.found("posix"))
{
testExpressions<regExpPosix>(tests);
}
}
Info<< "\nDone" << nl << endl;
......
......@@ -4,7 +4,7 @@ signals/sigInt.C
signals/sigQuit.C
signals/sigStopAtWriteNow.C
signals/sigWriteNow.C
regExp.C
regExpPosix.C
timer.C
fileStat.C
POSIX.C
......
......@@ -2,10 +2,8 @@
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2017-2018 OpenCFD Ltd.
\\ / A nd | Copyright (C) 2019 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
| Copyright (C) 2011-2017 OpenFOAM Foundation
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
......@@ -23,176 +21,19 @@ License
You should have received a copy of the GNU General Public License
along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
Class
Typedef
Foam::regExp
Description
Wrapper around POSIX extended regular expressions.
The PCRE '(?i)' extension is provided to compile the regular expression
as being case-insensitive.
See also
The manpage regex(7) for more information about POSIX regular expressions.
These differ somewhat from \c Perl and \c sed regular expressions.
SourceFiles
regExpI.H
regExp.C
Selection of preferred regular expression implementation
\*---------------------------------------------------------------------------*/
#ifndef regExp_H
#define regExp_H
#include <regex.h>
#include <string>
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
// Forward declarations
template<class String> class SubStrings;
/*---------------------------------------------------------------------------*\
Class regExp Declaration
\*---------------------------------------------------------------------------*/
class regExp
{
// Private data
//- Precompiled regular expression
regex_t* preg_;
public:
//- Type for matches
typedef SubStrings<std::string> results_type;
// Static Member Functions
//- Test if character appears to be a regular expression meta-character
// \return true if character is one of the following:
// - any character: '.' \n
// - quantifiers: '*', '+', '?' \n
// - grouping: '(', '|', ')' \n
// - range: '[', ']' \n
//
// \note The presence of '{', '}' regex bounds is not considered
inline static bool meta(char c);
// Constructors
//- Construct null
inline regExp();
//- Copy construct - disallowed
regExp(const regExp&) = delete;
//- Move construct
inline regExp(regExp&& rgx);
//- Construct from character array
inline explicit regExp(const char* pattern);
//- Construct from string
inline explicit regExp(const std::string& pattern);
//- Construct from character array, optionally ignore case
inline regExp(const char* pattern, bool ignoreCase);
//- Construct from string, optionally ignore case
inline regExp(const std::string& pattern, bool ignoreCase);
//- Destructor
inline ~regExp();
// Member functions
// Access
//- Return true if a precompiled expression does not exist
inline bool empty() const;
//- Return true if a precompiled expression exists
inline bool exists() const;
//- The number of capture groups for a non-empty expression
inline unsigned ngroups() const;
// Editing
//- Clear expression.
// \return True if expression had existed prior to the clear.
bool clear();
//- Swap contents
inline void swap(regExp& rgx);
//- Compile pattern into a regular expression, optionally ignore case.
// \return True if the pattern was compiled
bool set(const char* pattern, bool ignoreCase=false);
//- Compile pattern into a regular expression, optionally ignore case.
// \return True if the pattern was compiled
bool set(const std::string& pattern, bool ignoreCase=false);
// Matching/Searching
//- Find position within the text.
// \return The index where it begins or string::npos if not found
std::string::size_type find(const std::string& text) const;
//- True if the regex matches the entire text.
// The begin-of-line (^) and end-of-line ($) anchors are implicit
bool match(const std::string& text) const;
//- True if the regex matches the text, set the matches.
// The first group starts at index 1 (0 is the entire match).
// The begin-of-line (^) and end-of-line ($) anchors are implicit
bool match(const std::string& text, results_type& matches) const;
//- Return true if the regex was found within the text
inline bool search(const std::string& text) const;
// Member Operators
//- Perform match on text
inline bool operator()(const std::string& text) const;
//- Copy assignment - disallowed
void operator=(const regExp&) = delete;
//- Move assignment
inline void operator=(regExp&& rgx);
//- Assign and compile pattern from a character array.
// Matching is case sensitive.
inline void operator=(const char* pattern);
//- Assign and compile pattern from string.
// Matching is case sensitive.
inline void operator=(const std::string& pattern);
};
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
} // End namespace Foam
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
#include "regExpI.H"
#include "regExpPosix.H"
#include "regExpFwd.H"
#endif
......
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2019 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
Typedef
Foam::regExp
Description
Selection of preferred regular expression implementation
\*---------------------------------------------------------------------------*/
#ifndef regExpFwd_H
#define regExpFwd_H
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
class regExpCxx;
class regExpPosix;
typedef regExpPosix regExp;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
#endif
// ************************************************************************* //
......@@ -25,22 +25,32 @@ License
\*---------------------------------------------------------------------------*/
#include "regExp.H"
#include "regExpPosix.H"
#include "SubStrings.H"
#include "error.H"
// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
int Foam::regExpPosix::grammar(0);
// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
namespace
{
// Verify that the entire len was matched
static inline bool fullMatch(const regmatch_t& m, const regoff_t len)
{
return (m.rm_so == 0 && m.rm_eo == len);
}
} // End anonymous namespace
// * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * * //