diff --git a/applications/test/regex/Test-regex.C b/applications/test/regex/Test-regex.C index 711de2a913adb36ff721a9211ab0a96eff931314..9e74c36f2489d207a589e95b311bb2f6a6c8f12c 100644 --- a/applications/test/regex/Test-regex.C +++ b/applications/test/regex/Test-regex.C @@ -2,7 +2,7 @@ ========= | \\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / O peration | - \\ / A nd | Copyright (C) 2011 OpenFOAM Foundation + \\ / A nd | Copyright (C) 2011-2015 OpenFOAM Foundation \\/ M anipulation | ------------------------------------------------------------------------------- License @@ -22,6 +22,7 @@ License along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. Description + Tests for regular expressions \*---------------------------------------------------------------------------*/ @@ -39,14 +40,13 @@ using namespace Foam; int main(int argc, char *argv[]) { - List<Tuple2<string, string> > rawList(IFstream("testRegexps")()); - Info<< "input list:" << rawList << endl; + Info<< "Test expressions:" << rawList << endl; IOobject::writeDivider(Info) << endl; List<string> groups; - // report matches: + // Report matches: forAll(rawList, elemI) { const string& pat = rawList[elemI].first(); @@ -60,50 +60,87 @@ int main(int argc, char *argv[]) Info<< "true"; if (re.ngroups()) { - Info<< " groups:" << groups; + Info<< nl << "groups: " << groups; } } else { - Info<< "false"; if (re.search(str)) { Info<< " partial match"; } + else + { + Info<< "false"; + } } Info<< endl; } - Info<<"test regExp(const char*) ..." << endl; + Info<< nl << "test regExp(const char*) ..." << endl; string me("Mark"); + // Handling of null strings + if (regExp(NULL).match(me)) + { + Info<< "fail - matched: " << me << endl; + } + else + { + Info<< "pass - null pointer is no expression" << endl; + } + + // Normal match if (regExp("[Mm]ar[ck]").match(me)) { - Info<< "matched: " << me << endl; + Info<< "pass - matched: " << me << endl; } else { Info<< "no match" << endl; } - if (regExp("").match(me)) + // Match ignore case + if (regExp("mar[ck]", true).match(me)) { - Info<< "matched: " << me << endl; + Info<< "pass - matched: " << me << endl; } else { Info<< "no match" << endl; } - if (regExp(NULL).match(me)) + // Embedded prefix for match ignore case + if (regExp("(?i)mar[ck]").match(me)) { - Info<< "matched: " << me << endl; + Info<< "pass - matched: " << me << endl; } else { Info<< "no match" << endl; } + // Handling of empty expression + if (regExp("").match(me)) + { + Info<< "fail - matched: " << me << endl; + } + else + { + Info<< "pass - no match on empty expression" << endl; + } + + // Embedded prefix - but expression is empty + if (regExp("(?i)").match(me)) + { + Info<< "fail - matched: " << me << endl; + } + else + { + Info<< "pass - no match on empty expression" << endl; + } + + Info<< endl; return 0; diff --git a/applications/test/regex/testRegexps b/applications/test/regex/testRegexps index 598ab0b0cf68a110388694ce469fd2ba18f285e8..904eb5d8614c6bd59f850455b66a8a92c40a2e3f 100644 --- a/applications/test/regex/testRegexps +++ b/applications/test/regex/testRegexps @@ -8,14 +8,30 @@ // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // -// pattern, string +// Pattern, String ( - ( "a.*" "abc" ) - ( "a.*" "bac" ) - ( "a.*" "abcd" ) - ( "a.*" "def" ) - ( "d(.*)f" "def" ) - ( " *([A-Za-z]+) *= *([^ /]+) *(//.*)?" " keyword = value // settings" ) + ( "a.*" "abc" ) // true + ( "a.*" "bac" ) // false - partial match only + ( "a.*" "abcd" ) // true + ( "a.*" "def" ) // false + ( ".*a.*" "Abc" ) // false + ( "(?i).*a.*" "Abc" ) // true + ( "d(.*)f" "def" ) // true + ( + " *([A-Za-z]+) *= *([^ /]+) *(//.*)?" + " keyword = value // comment" + ) // true + + ( + "[[:digit:]]" + "contains 1 or more digits" + ) // false - partial match only + + ( + "[[:digit:]]+-[[:digit:]]+-[[:digit:]]+-[[:digit:]]+" + "1-905-123-2234" + ) // true + ) // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // diff --git a/src/OSspecific/POSIX/regExp.C b/src/OSspecific/POSIX/regExp.C index 2023f9ce59870df5786c1b58638c9c3f24ee882f..0bd52fc5d8cd9b588d77ee6dd8084db8e27bb145 100644 --- a/src/OSspecific/POSIX/regExp.C +++ b/src/OSspecific/POSIX/regExp.C @@ -2,7 +2,7 @@ ========= | \\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / O peration | - \\ / A nd | Copyright (C) 2011-2012 OpenFOAM Foundation + \\ / A nd | Copyright (C) 2011-2015 OpenFOAM Foundation \\/ M anipulation | ------------------------------------------------------------------------------- License @@ -23,13 +23,61 @@ License \*---------------------------------------------------------------------------*/ -#include <sys/types.h> - #include "regExp.H" -#include "label.H" #include "string.H" #include "List.H" -#include "IOstreams.H" + +// * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * // + +template<class StringType> +bool Foam::regExp::matchGrouping +( + const std::string& str, + List<StringType>& groups +) const +{ + if (preg_ && str.size()) + { + size_t nmatch = ngroups() + 1; + regmatch_t pmatch[nmatch]; + + // Also verify that the entire string was matched. + // pmatch[0] is the entire match + // pmatch[1..] are the (...) sub-groups + if + ( + regexec(preg_, str.c_str(), nmatch, pmatch, 0) == 0 + && (pmatch[0].rm_so == 0 && pmatch[0].rm_eo == label(str.size())) + ) + { + groups.setSize(ngroups()); + label groupI = 0; + + for (size_t matchI = 1; matchI < nmatch; matchI++) + { + if (pmatch[matchI].rm_so != -1 && pmatch[matchI].rm_eo != -1) + { + groups[groupI] = str.substr + ( + pmatch[matchI].rm_so, + pmatch[matchI].rm_eo - pmatch[matchI].rm_so + ); + } + else + { + groups[groupI].clear(); + } + groupI++; + } + + return true; + } + } + + groups.clear(); + return false; +} + // * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * // @@ -69,18 +117,34 @@ void Foam::regExp::set(const char* pattern, const bool ignoreCase) const { clear(); - // avoid NULL pointer and zero-length patterns + // Avoid NULL pointer and zero-length patterns if (pattern && *pattern) { - preg_ = new regex_t; - int cflags = REG_EXTENDED; if (ignoreCase) { cflags |= REG_ICASE; } - int err = regcomp(preg_, pattern, cflags); + const char* pat = pattern; + + // Check for embedded prefix for ignore-case + // this is the only embedded prefix we support + // - a simple check is sufficient + if (!strncmp(pattern, "(?i)", 4)) + { + cflags |= REG_ICASE; + pat += 4; + + // avoid zero-length patterns + if (!*pat) + { + return; + } + } + + preg_ = new regex_t; + int err = regcomp(preg_, pat, cflags); if (err != 0) { @@ -89,7 +153,7 @@ void Foam::regExp::set(const char* pattern, const bool ignoreCase) const FatalErrorIn ( - "regExp::set(const char*)" + "regExp::set(const char*, const bool ignoreCase)" ) << "Failed to compile regular expression '" << pattern << "'" << nl << errbuf << exit(FatalError); @@ -143,7 +207,7 @@ bool Foam::regExp::match(const std::string& str) const size_t nmatch = 1; regmatch_t pmatch[1]; - // also verify that the entire string was matched + // Also verify that the entire string was matched // pmatch[0] is the entire match if ( @@ -159,48 +223,23 @@ bool Foam::regExp::match(const std::string& str) const } -bool Foam::regExp::match(const string& str, List<string>& groups) const +bool Foam::regExp::match +( + const std::string& str, + List<std::string>& groups +) const { - if (preg_ && str.size()) - { - size_t nmatch = ngroups() + 1; - regmatch_t pmatch[nmatch]; - - // also verify that the entire string was matched - // pmatch[0] is the entire match - // pmatch[1..] are the (...) sub-groups - if - ( - regexec(preg_, str.c_str(), nmatch, pmatch, 0) == 0 - && (pmatch[0].rm_so == 0 && pmatch[0].rm_eo == label(str.size())) - ) - { - groups.setSize(ngroups()); - label groupI = 0; - - for (size_t matchI = 1; matchI < nmatch; matchI++) - { - if (pmatch[matchI].rm_so != -1 && pmatch[matchI].rm_eo != -1) - { - groups[groupI] = str.substr - ( - pmatch[matchI].rm_so, - pmatch[matchI].rm_eo - pmatch[matchI].rm_so - ); - } - else - { - groups[groupI].clear(); - } - groupI++; - } + return matchGrouping(str, groups); +} - return true; - } - } - groups.clear(); - return false; +bool Foam::regExp::match +( + const std::string& str, + List<Foam::string>& groups +) const +{ + return matchGrouping(str, groups); } diff --git a/src/OSspecific/POSIX/regExp.H b/src/OSspecific/POSIX/regExp.H index 1edb856564ef870ae794970eaa4f57a2ea41a542..b4f4554790b0b365b51492ab75b524dbb1915d0f 100644 --- a/src/OSspecific/POSIX/regExp.H +++ b/src/OSspecific/POSIX/regExp.H @@ -2,7 +2,7 @@ ========= | \\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / O peration | - \\ / A nd | Copyright (C) 2011 OpenFOAM Foundation + \\ / A nd | Copyright (C) 2011-2015 OpenFOAM Foundation \\/ M anipulation | ------------------------------------------------------------------------------- License @@ -27,6 +27,9 @@ Class Description Wrapper around POSIX extended regular expressions. + The PCRE '(?i)' extension is provided to compile the regular expression + as being case-insensitive. + SeeAlso The manpage regex(7) for more information about POSIX regular expressions. These differ somewhat from \c Perl and \c sed regular expressions. @@ -51,6 +54,7 @@ namespace Foam class string; template<class T> class List; + /*---------------------------------------------------------------------------*\ Class regExp Declaration \*---------------------------------------------------------------------------*/ @@ -71,6 +75,16 @@ class regExp //- Disallow default bitwise assignment void operator=(const regExp&); + //- Return true if it matches and sets the sub-groups matched. + // Templated to support both std::string and Foam::string + template<class StringType> + bool matchGrouping + ( + const std::string&, + List<StringType>& groups + ) const; + + public: // Static Member Functions @@ -135,15 +149,14 @@ public: // Editing - //- Compile pattern into a regular expression, optionally ignoring - // case + //- Compile pattern into a regular expression, + // optionally ignoring case void set(const char*, const bool ignoreCase=false) const; - //- Compile pattern into a regular expression, optionally ignoring - // case + //- Compile pattern into a regular expression, + // optionally ignoring case void set(const std::string&, const bool ignoreCase=false) const; - //- Release precompiled expression. // Returns true if precompiled expression existed before clear bool clear() const; @@ -161,7 +174,11 @@ public: //- Return true if it matches and sets the sub-groups matched // The begin-of-line (^) and end-of-line ($) anchors are implicit - bool match(const string&, List<string>& groups) const; + bool match(const std::string&, List<std::string>& groups) const; + + //- Return true if it matches and sets the sub-groups matched + // The begin-of-line (^) and end-of-line ($) anchors are implicit + bool match(const std::string&, List<string>& groups) const; //- Return true if the regex was found within string bool search(const std::string& str) const