Commit 3c5852eb authored by Mark Olesen's avatar Mark Olesen
Browse files

reworked regExp + wordRe a bit, minor change to keyType

regExp:
- added optional ignoreCase for constructor.
- the compile() methods is now exposed as set(...) method with an optional
  ignoreCase argument.  Not currently much use for the other regex compile
  flags though. The set() method can be used directly instead of the
  operator=() assignment.

keyType + wordRe:
- it's not clear that any particular characters are valid/invalid (compared
  to string or word), so just drop the valid(char) method for now

wordRe:
- a bool doesn't suffice, added enum compOption (compile-option)
- most constructors now have a compOption. In *all* cases it defaults to
  LITERAL - ie, the same behaviour for std::string and Foam::string
- added set(...) methods that do much the same as operator=(...), but the
  compOption can be specified.  In all cases, it defaults to DETECT.

 In Summary
    By default the constructors will generally preserve the argument as
    string literal and the assignment operators will use the wordRe::DETECT
    compOption to scan the string for regular expression meta characters
    and/or invalid word characters and react accordingly.

    The exceptions are when constructing/assigning from another
    Foam::wordRe (preserve the same type) or from a Foam::word (always
    literal).
parent 19503c93
......@@ -12,7 +12,8 @@
(
( "a.*" "abc" )
( "a.*" "bac" )
( "a.*" "abcd" )
( "A.*" "abcd" )
( "a.*" "ABCD" )
( "a.*" "def" )
( "d(.*)f" "def" )
( "plain" "def" )
......
......@@ -45,36 +45,42 @@ int main(int argc, char *argv[])
Foam::string s2("this .* file");
const char * s3 = "this .* file";
Info<< wordRe(s1).info() << endl;
Info<< wordRe(s2, false).info() << endl;
Info<< wordRe(s2).info() << endl;
Info<< wordRe(s3, true).info() << endl;
wordRe(s1, wordRe::DETECT).info(Info) << endl;
wordRe(s2).info(Info) << endl;
wordRe(s2, wordRe::DETECT).info(Info) << endl;
wordRe(s3, wordRe::REGEXP).info(Info) << endl;
wre = "this .* file";
Info<< wre.info() << endl;
wre.info(Info) << endl;
wre = s1;
Info<< wre.info() << endl;
wre.info(Info) << endl;
wre.uncompile();
Info<< wre.info() << " uncompiled" << endl;
wre.info(Info) << endl;
wre = "something";
Info<< wre.info() << " before" << endl;
wre.info(Info) << " before" << endl;
wre.uncompile();
Info<< wre.info() << " uncompiled" << endl;
wre.compile(true);
Info<< wre.info() << " after auto-detect" << endl;
wre.info(Info) << " uncompiled" << endl;
wre.compile(wordRe::DETECT);
wre.info(Info) << " after DETECT" << endl;
wre.compile(wordRe::NOCASE);
wre.info(Info) << " after NOCASE" << endl;
wre.compile(wordRe::DETECT_NOCASE);
wre.info(Info) << " after DETECT_NOCASE" << endl;
wre = "something .* value";
Info<< wre.info() << " before" << endl;
wre.info(Info) << " before" << endl;
wre.uncompile();
Info<< wre.info() << " uncompiled" << endl;
wre.compile(true);
Info<< wre.info() << " after auto-detect" << endl;
wre.info(Info) << " uncompiled" << endl;
wre.compile(wordRe::DETECT);
wre.info(Info) << " after DETECT" << endl;
wre.uncompile();
Info<< wre.info() << " uncompiled" << endl;
wre.info(Info) << " uncompiled" << endl;
wre.recompile();
Info<< wre.info() << " recompiled" << endl;
wre.info(Info) << " recompiled" << endl;
wre.set("something .* value", wordRe::LITERAL);
wre.info(Info) << " set as LITERAL" << endl;
IOobject::writeDivider(Info);
......@@ -88,12 +94,21 @@ int main(int argc, char *argv[])
const wordRe& wre = rawList[elemI].first();
const string& str = rawList[elemI].second();
Info<< wre.info()
wre.info(Info)
<< " equals:" << (wre == str)
<< "(" << wre.match(str, true) << ")"
<< " match:" << wre.match(str)
<< " str=" << str
<< endl;
wordRe wre2;
wre2.set(wre, wordRe::NOCASE);
wre2.info(Info)
<< " match:" << wre2.match(str)
<< " str=" << str
<< endl;
}
Info<< endl;
......
......@@ -32,30 +32,6 @@ License
#include "List.H"
#include "IOstreams.H"
// * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * * //
void Foam::regExp::compile(const char* pattern) const
{
clear();
// avoid NULL pointer and zero-length patterns
if (pattern && *pattern)
{
preg_ = new regex_t;
if (regcomp(preg_, pattern, REG_EXTENDED) != 0)
{
FatalErrorIn
(
"regExp::compile(const char*)"
) << "Failed to compile regular expression '" << pattern << "'"
<< exit(FatalError);
}
}
}
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
Foam::regExp::regExp()
......@@ -64,19 +40,19 @@ Foam::regExp::regExp()
{}
Foam::regExp::regExp(const char* pattern)
Foam::regExp::regExp(const char* pattern, const bool ignoreCase)
:
preg_(0)
{
compile(pattern);
set(pattern, ignoreCase);
}
Foam::regExp::regExp(const std::string& pattern)
Foam::regExp::regExp(const std::string& pattern, const bool ignoreCase)
:
preg_(0)
{
compile(pattern.c_str());
set(pattern.c_str(), ignoreCase);
}
......@@ -90,6 +66,39 @@ Foam::regExp::~regExp()
// * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * * //
void Foam::regExp::set(const char* pattern, const bool ignoreCase) const
{
clear();
// avoid NULL pointer and zero-length patterns
if (pattern && *pattern)
{
preg_ = new regex_t;
int cflags = REG_EXTENDED;
if (ignoreCase)
{
cflags |= REG_ICASE;
}
if (regcomp(preg_, pattern, cflags) != 0)
{
FatalErrorIn
(
"regExp::set(const char*)"
) << "Failed to compile regular expression '" << pattern << "'"
<< exit(FatalError);
}
}
}
void Foam::regExp::set(const std::string& pattern, const bool ignoreCase) const
{
return set(pattern.c_str(), ignoreCase);
}
bool Foam::regExp::clear() const
{
if (preg_)
......@@ -194,13 +203,13 @@ bool Foam::regExp::match(const string& str, List<string>& groups) const
void Foam::regExp::operator=(const char* pat)
{
compile(pat);
set(pat);
}
void Foam::regExp::operator=(const std::string& pat)
{
compile(pat.c_str());
set(pat);
}
......
......@@ -65,9 +65,6 @@ class regExp
// Private member functions
//- Compile into a regular expression
void compile(const char*) const;
//- Disallow default bitwise copy construct
regExp(const regExp&);
......@@ -100,11 +97,11 @@ public:
//- Construct null
regExp();
//- Construct from character array
regExp(const char*);
//- Construct from character array, optionally ignoring case
regExp(const char*, const bool ignoreCase=false);
//- Construct from std::string (or string)
regExp(const std::string&);
//- Construct from std::string (or string), optionally ignoring case
regExp(const std::string&, const bool ignoreCase=false);
// Destructor
......@@ -113,7 +110,9 @@ public:
// Member functions
//- Is the precompiled expression set?
//- Access
//- Does a precompiled expression exist?
inline bool exists() const
{
return preg_ ? true : false;
......@@ -125,10 +124,23 @@ public:
return preg_ ? preg_->re_nsub : 0;
}
//- Editing
//- Compile pattern into a regular expression, optionally ignoring case
void set(const char*, const bool ignoreCase=false) const;
//- Compile pattern into a regular expression, optionally ignoring case
void set(const std::string&, const bool ignoreCase=false) const;
//- Release precompiled expression.
// Returns true if precompiled expression existed before clear
bool clear() const;
//- Searching
//- Find position within string.
// Returns the index where it begins or string::npos if not found
std::string::size_type find(const std::string& str) const;
......@@ -150,12 +162,14 @@ public:
// Member Operators
//- Assign from a string and compile regular expression
void operator=(const std::string&);
//- Assign from a character array and compile regular expression
//- Assign and compile pattern from a character array
// Always case sensitive
void operator=(const char*);
//- Assign and compile pattern from string
// Always case sensitive
void operator=(const std::string&);
};
......
......@@ -99,9 +99,6 @@ public:
// Member functions
//- Is this character valid for a keyType
inline static bool valid(char c);
//- Should be treated as a match rather than a literal string
inline bool isPattern() const;
......
......@@ -81,13 +81,7 @@ inline Foam::keyType::keyType
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
inline bool Foam::keyType::valid(char c)
{
return c != '"';
}
bool Foam::keyType::isPattern() const
inline bool Foam::keyType::isPattern() const
{
return isPattern_;
}
......
......@@ -29,13 +29,20 @@ Description
A wordRe is a word, but can also have a regular expression for matching
words.
By default the constructors will generally preserve the argument as
string literal and the assignment operators will use the wordRe::DETECT
compOption to scan the string for regular expression meta characters
and/or invalid word characters and react accordingly.
The exceptions are when constructing/assigning from another
Foam::wordRe (preserve the same type) or from a Foam::word (always
literal).
Note
If the string contents are changed - eg, by the operator+=() or by
string::replace(), etc - it will be necessary to use compile() or
recompile() to sychronize the regular expression.
THIS IS STILL A DRAFT -- NOT YET RELEASED FOR GENERAL USE
SourceFiles
wordRe.C
wordReIO.C
......@@ -47,7 +54,6 @@ SourceFiles
#include "word.H"
#include "regExp.H"
#include "InfoProxy.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
......@@ -79,12 +85,24 @@ class wordRe
public:
// Public data types
//- Enumeration with compile options
// Note that 'REGEXP' is implicit if 'NOCASE' is specified alone.
enum compOption
{
LITERAL = 0, /*!< treat as a strign literal */
DETECT = 1, /*!< treat as regular expression */
REGEXP = 2, /*!< detect if the string contains meta-characters */
NOCASE = 4, /*!< ignore case in regular expression */
DETECT_NOCASE = DETECT | NOCASE,
REGEXP_NOCASE = REGEXP | NOCASE
};
//- Is this a meta character?
static inline bool meta(char);
//- Is this character valid for a wordRe
inline static bool valid(char);
//- Test string for regular expression meta characters
static inline bool isPattern(const string&);
......@@ -100,64 +118,67 @@ public:
inline wordRe(const word&);
//- Construct as copy of character array
// Treat as regular expression specified explicitly.
inline wordRe(const char*, const bool asPattern=false);
//- Construct as copy of string.
// Treat as regular expression specified explicitly.
inline wordRe(const string&, const bool asPattern);
// Optionally specify how it should be treated.
inline wordRe(const char*, const compOption=LITERAL);
//- Construct as copy of string.
// Auto-test for regular expression
inline wordRe(const string&);
// Optionally specify how it should be treated.
inline wordRe(const string&, const compOption=LITERAL);
//- Construct as copy of std::string
// Treat as regular expression specified explicitly.
inline wordRe(const std::string&, const bool asPattern);
//- Construct as copy of std::string
// Auto-test for regular expression
inline wordRe(const std::string&);
// Optionally specify how it should be treated.
inline wordRe(const std::string&, const compOption=LITERAL);
//- Construct from Istream
// Words are treated as literals, strings with an auto-test
wordRe(Istream&);
// Destructor
~wordRe();
// Member functions
//- Access
//- Should be treated as a match rather than a literal string?
inline bool isPattern() const;
//- Create and compile the regular expression
// Optionally detect if it appears to be a regular expression
inline bool compile(const bool detect=false) const;
//- Infrastructure
//- Compile the regular expression
inline bool compile() const;
//- Possibly compile the regular expression, with greater control
inline bool compile(const compOption) const;
//- Recompile an existing regular expression
inline bool recompile() const;
//- Frees precompiled regular expression and makes is a literal string.
//- Frees precompiled regular expression, making wordRe a literal.
// Optionally strips invalid word characters
inline void uncompile(const bool doStripInvalid=false) const;
//- Editing
//- Copy string, auto-test for regular expression or other options
inline void set(const std::string&, const compOption=DETECT);
//- Copy string, auto-test for regular expression or other options
inline void set(const char*, const compOption=DETECT);
//- Clear string and precompiled regular expression
inline void clear();
//- Searching
//- Smart match as regular expression or as a string
// Optionally specify a literal match only
inline bool match(const string&, bool literalMatch=false) const;
//- Miscellaneous
//- Return a string with quoted meta-characters
inline string quotemeta() const;
//- Return info proxy.
InfoProxy<wordRe> info() const
{
return *this;
}
//- Output some basic info
Ostream& info(Ostream&) const;
// Member operators
......@@ -165,18 +186,22 @@ public:
// Assignment
//- Assign copy
// Always case sensitive
inline void operator=(const wordRe&);
//- Copy word, never a regular expression
inline void operator=(const word&);
//- Copy string, auto-test for regular expression
// Always case sensitive
inline void operator=(const string&);
//- Copy string, auto-test for regular expression
// Always case sensitive
inline void operator=(const std::string&);
//- Copy string, auto-test for regular expression
// Always case sensitive
inline void operator=(const char*);
......@@ -187,10 +212,6 @@ public:
};
template<>
Ostream& operator<<(Ostream&, const InfoProxy<wordRe>&);
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
} // End namespace Foam
......
......@@ -32,16 +32,6 @@ inline bool Foam::wordRe::meta(char c)
}
inline bool Foam::wordRe::valid(char c)
{
return
(
!isspace(c)
&& c != '"'
&& c != '/'
);
}
inline bool Foam::wordRe::isPattern(const string& str)
{
return string::meta<regExp>(str);
......@@ -76,66 +66,33 @@ inline Foam::wordRe::wordRe(const word& str)
{}
inline Foam::wordRe::wordRe(const char* str, const bool asPattern)
:
word(str, false),
re_()
{
if (asPattern)
{
compile();
}
}
inline Foam::wordRe::wordRe(const string& str, const bool asPattern)
:
word(str, false),
re_()
{
if (asPattern)
{
compile();
}
}
inline Foam::wordRe::wordRe(const string& str)
inline Foam::wordRe::wordRe(const char* str, const compOption opt)
:
word(str, false),
re_()
{
compile(true); // auto-detect regex
compile(opt);
}
inline Foam::wordRe::wordRe(const std::string& str, const bool asPattern)
inline Foam::wordRe::wordRe(const string& str, const compOption opt)
:
word(str, false),
re_()
{
if (asPattern)
{
compile();
}
compile(opt);
}
inline Foam::wordRe::wordRe(const std::string& str)
inline Foam::wordRe::wordRe(const std::string& str, const compOption opt)
:
word(str, false),
re_()
{
compile(true); // auto-detect regex
compile(opt);
}
// * * * * * * * * * * * * * * * * Destructor * * * * * * * * * * * * * * * //
Foam::wordRe::~wordRe()
{}
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
inline bool Foam::wordRe::isPattern() const
......@@ -144,22 +101,47 @@ inline bool Foam::wordRe::isPattern() const
}
inline bool Foam::wordRe::compile(const bool detect) const
inline bool Foam::wordRe::compile(const compOption opt) const
{
// appears to be a plain word and not a regex
if (detect && string::valid<word>(*this) && !string::meta<regExp>(*this))
bool doCompile = false;
if (opt & wordRe::REGEXP)
{
re_.clear();
doCompile = true;
}
else if (opt & wordRe::DETECT)
{
if (string::meta<regExp>(*this) || !string::valid<word>(*this))
{
doCompile = true;
}
}
else if (opt & wordRe::NOCASE)
{
doCompile = true;
}
if (doCompile)
{
re_.set(*this