From 49a70d832d25ca29232cb918b57420a0b2fb1b8d Mon Sep 17 00:00:00 2001 From: Mark Olesen <Mark.Olesen@Germany> Date: Sun, 7 Feb 2010 14:31:40 +0100 Subject: [PATCH] ENH: add C++-based wmkdepend parser (uses Coco/R grammar). - This avoids dependency on lex/flex and provides better encapsulation for buffer switching. As a result, the maximum number of open files only corresponds to the include depth. --- wmake/src/Makefile | 16 +- wmake/src/wmkdepend.cpp | 164 +++++ wmake/src/wmkdependParser.atg | 497 ++++++++++++++++ wmake/src/wmkdependParser.cpp | 575 ++++++++++++++++++ wmake/src/wmkdependParser.h | 310 ++++++++++ wmake/src/wmkdependScanner.cpp | 1022 ++++++++++++++++++++++++++++++++ wmake/src/wmkdependScanner.h | 477 +++++++++++++++ 7 files changed, 3058 insertions(+), 3 deletions(-) create mode 100644 wmake/src/wmkdepend.cpp create mode 100644 wmake/src/wmkdependParser.atg create mode 100644 wmake/src/wmkdependParser.cpp create mode 100644 wmake/src/wmkdependParser.h create mode 100644 wmake/src/wmkdependScanner.cpp create mode 100644 wmake/src/wmkdependScanner.h diff --git a/wmake/src/Makefile b/wmake/src/Makefile index a74c250f881..cac2a639a52 100644 --- a/wmake/src/Makefile +++ b/wmake/src/Makefile @@ -2,7 +2,7 @@ # ========= | # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox # \\ / O peration | -# \\ / A nd | Copyright (C) 1991-2009 OpenCFD Ltd. +# \\ / A nd | Copyright (C) 1991-2010 OpenCFD Ltd. # \\/ M anipulation | #------------------------------------------------------------------------------ # License @@ -60,16 +60,17 @@ include $(RULES)/$(WM_LINK_LANGUAGE) # targets #------------------------------------------------------------------------------ -all: $(BIN)/dirToString $(BIN)/wmkdep +all: $(BIN)/dirToString $(BIN)/wmkdep $(BIN)/wmkdepend clean: - rm -f $(BIN)/dirToString $(BIN)/wmkdep 2>/dev/null + rm -f $(BIN)/dirToString $(BIN)/wmkdep $(BIN)/wmkdepend 2>/dev/null $(BIN)/dirToString: dirToString.c @mkdir -p $(BIN) $(cc) $(cFLAGS) dirToString.c -o $(BIN)/dirToString + $(BIN)/wmkdep: wmkdep.l @mkdir -p $(BIN) flex wmkdep.l @@ -77,4 +78,13 @@ $(BIN)/wmkdep: wmkdep.l @rm -f lex.yy.c 2>/dev/null +# for bootstrapping - use generated files directly (instead of from .atg file) +$(BIN)/wmkdepend: wmkdepend.cpp \ + wmkdependParser.cpp wmkdependScanner.cpp \ + wmkdependParser.h wmkdependScanner.h + @mkdir -p $(BIN) + $(CC) $(c++FLAGS) \ + wmkdepend.cpp wmkdependParser.cpp wmkdependScanner.cpp \ + -o $(BIN)/wmkdepend + #------------------------------------------------------------------------------ diff --git a/wmake/src/wmkdepend.cpp b/wmake/src/wmkdepend.cpp new file mode 100644 index 00000000000..159fb3849b9 --- /dev/null +++ b/wmake/src/wmkdepend.cpp @@ -0,0 +1,164 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. + \\/ M anipulation | +------------------------------------------------------------------------------ +License + This file is part of OpenFOAM. + + OpenFOAM is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Application + wmkdepend + +Description + A fast dependency list generator that emulates the behaviour and + output of cpp -M. However, the output contains no duplications and + is ~40% faster than cpp. + + The algorithm uses flex to scan for includes and searches the files + found. Each file is entered into a hash table so that files are scanned + only once. This is why this program is faster than cpp. + +Usage + wmkdep [ -Idirectory ... -Idirectory ] filename + +\*---------------------------------------------------------------------------*/ + +#include <cstdio> +#include <stdlib.h> +#include <cstring> + +#include "wmkdependParser.h" + +// Note: since we use the Coco/R default error messages, we must use +// wide streams for stderr. + +void printUsage(const char* message = NULL) +{ + if (message) + { + fwprintf(stderr, L"\nError: %s\n\n", message); + } + + fwprintf + ( + stderr, + L"Usage: wmkdepend [ -Idirectory ... -Idirectory ] filename\n" + ); +} + + +int main(int argc, char* argv[]) +{ + if (argc == 1) + { + printUsage("Error: input file not supplied"); + ::exit(1); + } + + for (int i=1; i < argc; i++) + { + if (strncmp(argv[i], "-I", 2) == 0 && strlen(argv[i]) > 2) + { + std::string dirName(argv[i] + 2); + + // add trailing slash if required + if (dirName.rfind('/') != dirName.size()-1) + { + dirName += '/'; + } + + wmake::Parser::includeDirs.push_back(dirName); + } + } + + std::string sourceFile(argv[argc-1]); + + fwprintf + ( + stderr, + L"Making dependency list for source file %s\n", + sourceFile.c_str() + ); + + std::string::size_type basePos = sourceFile.rfind('/'); + if (basePos == std::string::npos) + { + basePos = 0; + } + else + { + basePos++; + } + + std::string::size_type dotPos = sourceFile.rfind('.'); + if + ( + dotPos == std::string::npos + || dotPos == sourceFile.size()-1 + || dotPos <= basePos + ) + { + fwprintf + ( + stderr, + L"Cannot find extension in source file name %s\n", + sourceFile.c_str() + ); + ::exit(1); + } + + std::string depFile = sourceFile.substr(0, dotPos); + depFile += ".dep"; + + const std::string sourceExt = sourceFile.substr(dotPos); + if (sourceExt == ".java") + { + // import directories to ignore + wmake::Parser::ignoreDir("java.*"); + wmake::Parser::ignoreDir("org.*"); + wmake::Parser::ignoreDir("com.*"); + wmake::Parser::ignoreDir("sunw.*"); + wmake::Parser::ignoreDir("sun.*"); + wmake::Parser::ignoreDir("launcher.*"); + + std::cout + << "$(CLASSES_DIR)/" + << sourceFile.substr(basePos, dotPos - basePos) << ".class: " + << depFile << "\n"; + } + else + { + std::cout + << "$(OBJECTS_DIR)/" + << sourceFile.substr(basePos, dotPos - basePos) << ".o: " + << depFile << "\n"; + } + + + wmake::Parser::sourceFile = sourceFile; + wmake::Parser::depFile = depFile; + + wmake::Parser::includeFile(sourceFile); + + return 0; +} + + + +/*****************************************************************************/ diff --git a/wmake/src/wmkdependParser.atg b/wmake/src/wmkdependParser.atg new file mode 100644 index 00000000000..2da6ebd3133 --- /dev/null +++ b/wmake/src/wmkdependParser.atg @@ -0,0 +1,497 @@ +/*---------------------------------------------------------------------------*\ + Attributed Grammar for Coco/R (-*- C++ -*- version) + compile with: + coco-cpp wmkdependParser.atg +\*---------------------------------------------------------------------------*/ +[copy] +/*---------------------------------*- C++ -*---------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. + \\/ M anipulation | +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +@file wmkdependParser.atg + +Description + An attributed Coco/R grammar to parse C/C++, Fortran and Java files + for include and import statements. + +SourceFiles + generated + +\*---------------------------------------------------------------------------*/ +[/copy] +#include <iostream> +#include <string> +#include <list> + +//! @brief A simple HashTable implementation +/** + * @note This hash table is only vaguely STL-like. In accordance with + * its present purpose, this hash table only supports a constIterator + * and no deletions. For simplicity, the constIterator increment is + * simply via a next() method. Instead of comparing to an end value, + * the constIterator valid() method is used. + * For example, + * @code + * for + * ( + * HashTable<foo>::constIterator iter = myHash.begin(); + * iter.valid(); + * iter.next() + * ) + * { + * std::cerr<< "key: " << iter.key() << "\n"; + * } + * @endcode + * + */ +class StringHashSet +{ + //! An entry within the HashTable + struct hashedEntry + { + const std::string key_; //<! The lookup key + hashedEntry *next_; //<! Pointer to next hashedEntry in sub-list + + hashedEntry(const std::string& key, hashedEntry *next=0) + : + key_(key), next_(next) + {} + }; + + const int size_; //<! fixed HashTable size + hashedEntry** table_; + +public: + + //! Construct with a default size + StringHashSet(int size = 500) + : + size_(size), + table_(new hashedEntry*[size_]) + { + memset(table_, 0, size_ * sizeof(hashedEntry*)); + } + + //! Destructor + ~StringHashSet() + { + for (int hashIdx = 0; hashIdx < size_; ++hashIdx) + { + hashedEntry* ep = table_[hashIdx]; + while (ep) + { + hashedEntry* del = ep; + ep = ep->next_; + delete del; + } + } + delete[] table_; + table_ = 0; + } + + //! Return hash index for lookup name in hash table + bool hashKeyIndex(const std::string& name) const + { + int hashIdx = 0; + + // calculate hash index + for + ( + std::string::const_iterator iter = name.begin(); + iter != name.end(); + ++iter + ) + { + hashIdx = hashIdx << 1 ^ *iter; + } + + if (hashIdx < 0) + { + hashIdx = -hashIdx; + } + + return hashIdx % size_; + } + + + //! Return true if name is found in hash table + bool found(const std::string& name) const + { + const int hashIdx = hashKeyIndex(name); + + for (hashedEntry* ep = table_[hashIdx]; ep; ep = ep->next_) + { + if (name == ep->key_) + { + // found + return true; + } + } + + // entry not found + return false; + } + + + //! Return true if name is found in hash table, insert if not found + bool foundOrInsert(const std::string& name) + { + const int hashIdx = hashKeyIndex(name); + + for (hashedEntry* ep = table_[hashIdx]; ep; ep = ep->next_) + { + if (name == ep->key_) + { + // found - return true + return true; + } + } + + // not found - insert it + table_[hashIdx] = new hashedEntry(name, table_[hashIdx]); + + // entry not found (but was added) - return false + return false; + } + +}; + + +/*---------------------------------------------------------------------------*/ + +COMPILER wmkdepend + // grammar pragmas: + $namespace=wmake + $prefix=wmkdepend + $define=FORCE_UTF8 + +/*---------------------------------------------------------------------------*/ +private: + + //! Hash of files already visited + static StringHashSet visitedFiles_; + + //! Hash of (java) directories already visited + static StringHashSet visitedDirs_; + + //! Replace all '.' with '/' + static void dotToSlash(std::string& name); + + //! Import (java) directories + static void importDir(const std::string& dirName); + + //! Import (java) file + static void importFile(const std::string& name); + +public: + //! Include directories to search + static std::list<std::string> includeDirs; + + //! The name of the top-level source file + static std::string sourceFile; + + //! The name of the top-level dep file + static std::string depFile; + + //! Add directory to list of visited dirs, thus effectively ignoring it + static void ignoreDir(const std::string& name); + + //! Include file + static void includeFile(const std::string& name); + +/*---------------------------------------------------------------------------*/ +[code] +#include <sys/types.h> +#include <dirent.h> + +StringHashSet Parser::visitedFiles_; +StringHashSet Parser::visitedDirs_; + +std::list<std::string> Parser::includeDirs; +std::string Parser::sourceFile; +std::string Parser::depFile; + + +void Parser::dotToSlash(std::string& name) +{ + std::string::size_type start = 0; + + while ((start = name.find('.', start)) != std::string::npos) + { + name.replace(start, 1, 1, '/'); + start++; + } +} + + +void Parser::ignoreDir(const std::string& name) +{ + visitedDirs_.foundOrInsert(name); +} + + +void Parser::includeFile(const std::string& name) +{ + if (visitedFiles_.foundOrInsert(name)) + { + return; + } + + // use stdio and buffering within Coco/R -- (faster) + FILE *fh = fopen(name.c_str(), "r"); + if (fh) + { + std::cout << depFile << ": " << name << "\n"; + } + else + { + for + ( + std::list<std::string>::const_iterator iter = includeDirs.begin(); + iter != includeDirs.end(); + ++iter + ) + { + const std::string pathName = *iter + name; + + fh = fopen(pathName.c_str(), "r"); + if (fh) + { + std::cout << depFile << ": " << pathName << "\n"; + break; + } + } + } + + if (fh) + { + Scanner scanner(fh); + Parser parser(&scanner); + + parser.Parse(); + fclose(fh); + } + else + { + fwprintf + ( + stderr, + L"could not open file %s for source file %s\n", + name.c_str(), sourceFile.c_str() + ); + } +} + + +void Parser::importFile(const std::string& name) +{ + // check if a globbed form was already visited + std::string::size_type dotPos = name.find('.'); + if (dotPos != std::string::npos) + { + std::string dirGlob = name.substr(0, dotPos); + dirGlob += ".*"; + + if (visitedDirs_.found(dirGlob)) + { + return; + } + } + + std::string javaFileName = name; + + dotToSlash(javaFileName); + javaFileName += ".java"; + + includeFile(javaFileName); +} + + +void Parser::importDir(const std::string& name) +{ + if (visitedDirs_.foundOrInsert(name)) + { + return; + } + + std::string dirName = name; + dotToSlash(dirName); + + DIR *source = opendir(dirName.c_str()); + + if (source) + { + struct dirent *list; + + // Read and parse all the entries in the directory + while ((list = readdir(source)) != NULL) + { + const char* ext = strstr(list->d_name, ".java"); + + // avoid matching on something like '.java~' + if (ext && strlen(ext) == 5) + { + std::string pathName = dirName + list->d_name; + includeFile(pathName); + } + } + + closedir(source); + } + else + { + fwprintf + ( + stderr, + L"could not open directory %s\n", + dirName.c_str() + ); + return; + } +} + +[/code] + + +/*---------------------------------------------------------------------------*/ + +CHARACTERS + letter = 'A'..'Z' + 'a'..'z' + '_'. + digit = "0123456789". + cr = '\r'. + lf = '\n'. + tab = '\t'. + stringCh = ANY - '"' - '\\' - cr - lf. + printable = '\u0020' .. '\u007e'. + java_letter = letter + '$'. + +// * * * * * * * * * * * * * * * * TOKENS * * * * * * * * * * * * * * * * * // + +TOKENS + +// string +string = + '"' { stringCh | '\\' printable } '"'. + +// single-quoted string (eg, Fortran) +sqstring = + '\'' { stringCh | '\\' printable } '\''. + +// for java import +package_name = + java_letter { java_letter | digit } + { '.' java_letter { java_letter | digit } } . + +// for java import +package_dir = + java_letter { java_letter | digit } + { '.' java_letter { java_letter | digit } } ".*" . + + +// * * * * * * * * * * * PRAGMAS / COMMENTS / IGNORE * * * * * * * * * * * // + +COMMENTS FROM "/*" TO "*/" NESTED +COMMENTS FROM "//" TO lf + +IGNORE tab + +// * * * * * * * * * * * * * * * PRODUCTIONS * * * * * * * * * * * * * * * // + +PRODUCTIONS + +wmkdepend += +{ + // C/C++-style includes + '#' + [ + "include" + [ + string (. + if (isUTF8()) + { + includeFile(t->toStringUTF8(1, t->length()-2)); + } + else + { + includeFile(t->toString(1, t->length()-2)); + } + .) + ] + ] + [ ANY { ANY } ] '\n' // skip trailing junk + + // Fortran-style includes + | "include" + [ + sqstring (. + if (isUTF8()) + { + includeFile(t->toStringUTF8(1, t->length()-2)); + } + else + { + includeFile(t->toString(1, t->length()-2)); + } + .) + ] + [ ANY { ANY } ] '\n' // skip trailing junk + + // Java imports + | "import" + ( + package_dir (. + if (isUTF8()) + { + importDir(t->toStringUTF8()); + } + else + { + importDir(t->toString()); + } + .) + | package_name (. + if (isUTF8()) + { + importFile(t->toStringUTF8()); + } + else + { + importFile(t->toString()); + } + .) + ) + ';' + [ ANY { ANY } ] '\n' // skip trailing junk + + | [ ANY { ANY } ] '\n' // skip any other lines + +} +. + + +/*---------------------------------------------------------------------------*/ + +END wmkdepend. + +// ************************************************************************* // diff --git a/wmake/src/wmkdependParser.cpp b/wmake/src/wmkdependParser.cpp new file mode 100644 index 00000000000..b6d5e4e71a1 --- /dev/null +++ b/wmake/src/wmkdependParser.cpp @@ -0,0 +1,575 @@ +/*---------------------------------*- C++ -*---------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. + \\/ M anipulation | +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +@file wmkdependParser.atg + +Description + An attributed Coco/R grammar to parse C/C++, Fortran and Java files + for include and import statements. + +SourceFiles + generated + +\*---------------------------------------------------------------------------*/ +// This file was generated with Coco/R C++ (7 Feb 2010) +// http://www.ssw.uni-linz.ac.at/coco/ +// with these defines: +// - FORCE_UTF8 + + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <cwchar> +#include <sstream> + +#include "wmkdependParser.h" + +namespace wmake { + + +#include <sys/types.h> +#include <dirent.h> + +StringHashSet Parser::visitedFiles_; +StringHashSet Parser::visitedDirs_; + +std::list<std::string> Parser::includeDirs; +std::string Parser::sourceFile; +std::string Parser::depFile; + + +void Parser::dotToSlash(std::string& name) +{ + std::string::size_type start = 0; + + while ((start = name.find('.', start)) != std::string::npos) + { + name.replace(start, 1, 1, '/'); + start++; + } +} + + +void Parser::ignoreDir(const std::string& name) +{ + visitedDirs_.foundOrInsert(name); +} + + +void Parser::includeFile(const std::string& name) +{ + if (visitedFiles_.foundOrInsert(name)) + { + return; + } + + // use stdio and buffering within Coco/R -- (faster) + FILE *fh = fopen(name.c_str(), "r"); + if (fh) + { + std::cout << depFile << ": " << name << "\n"; + } + else + { + for + ( + std::list<std::string>::const_iterator iter = includeDirs.begin(); + iter != includeDirs.end(); + ++iter + ) + { + const std::string pathName = *iter + name; + + fh = fopen(pathName.c_str(), "r"); + if (fh) + { + std::cout << depFile << ": " << pathName << "\n"; + break; + } + } + } + + if (fh) + { + Scanner scanner(fh); + Parser parser(&scanner); + + parser.Parse(); + fclose(fh); + } + else + { + fwprintf + ( + stderr, + L"could not open file %s for source file %s\n", + name.c_str(), sourceFile.c_str() + ); + } +} + + +void Parser::importFile(const std::string& name) +{ + // check if a globbed form was already visited + std::string::size_type dotPos = name.find('.'); + if (dotPos != std::string::npos) + { + std::string dirGlob = name.substr(0, dotPos); + dirGlob += ".*"; + + if (visitedDirs_.found(dirGlob)) + { + return; + } + } + + std::string javaFileName = name; + + dotToSlash(javaFileName); + javaFileName += ".java"; + + includeFile(javaFileName); +} + + +void Parser::importDir(const std::string& name) +{ + if (visitedDirs_.foundOrInsert(name)) + { + return; + } + + std::string dirName = name; + dotToSlash(dirName); + + DIR *source = opendir(dirName.c_str()); + + if (source) + { + struct dirent *list; + + // Read and parse all the entries in the directory + while ((list = readdir(source)) != NULL) + { + const char* ext = strstr(list->d_name, ".java"); + + // avoid matching on something like '.java~' + if (ext && strlen(ext) == 5) + { + std::string pathName = dirName + list->d_name; + includeFile(pathName); + } + } + + closedir(source); + } + else + { + fwprintf + ( + stderr, + L"could not open directory %s\n", + dirName.c_str() + ); + return; + } +} + + + + +//! @cond fileScope +// + +// +// Create by copying str - only used locally +inline static wchar_t* coco_string_create(const wchar_t* str) +{ + const int len = wcslen(str); + wchar_t* dst = new wchar_t[len + 1]; + wcsncpy(dst, str, len); + dst[len] = 0; + return dst; +} + + +// Free storage and nullify the argument +inline static void coco_string_delete(wchar_t* &str) +{ + delete[] str; + str = NULL; +} +// +//! @endcond fileScope + + +// ---------------------------------------------------------------------------- +// Parser Implementation +// ---------------------------------------------------------------------------- + +void Parser::SynErr(int n) +{ + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + + +void Parser::SemErr(const std::wstring& msg) +{ + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + + +bool Parser::isUTF8() const +{ + return scanner && scanner->buffer && scanner->buffer->isUTF8(); +} + + +void Parser::Get() +{ + for (;;) + { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) + { + ++errDist; + break; + } + if (dummyToken != t) + { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + + +void Parser::Expect(int n) +{ + if (la->kind == n) + { + Get(); + } + else + { + SynErr(n); + } +} + + +void Parser::ExpectWeak(int n, int follow) +{ + if (la->kind == n) + { + Get(); + } + else + { + SynErr(n); + while (!StartOf(follow)) + { + Get(); + } + } +} + + +bool Parser::WeakSeparator(int n, int syFol, int repFol) +{ + if (la->kind == n) + { + Get(); + return true; + } + else if (StartOf(repFol)) + { + return false; + } + else + { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) + { + Get(); + } + return StartOf(syFol); + } +} + + +void Parser::wmkdepend() +{ + while (StartOf(1)) { + if (la->kind == 5) { + Get(); + if (la->kind == 6) { + Get(); + if (la->kind == 1) { + Get(); + if (isUTF8()) + { + includeFile(t->toStringUTF8(1, t->length()-2)); + } + else + { + includeFile(t->toString(1, t->length()-2)); + } + + } + } + if (StartOf(2)) { + Get(); + while (StartOf(3)) { + Get(); + } + } + Expect(7); + } else if (la->kind == 6) { + Get(); + if (la->kind == 2) { + Get(); + if (isUTF8()) + { + includeFile(t->toStringUTF8(1, t->length()-2)); + } + else + { + includeFile(t->toString(1, t->length()-2)); + } + + } + if (StartOf(4)) { + Get(); + while (StartOf(3)) { + Get(); + } + } + Expect(7); + } else if (la->kind == 8) { + Get(); + if (la->kind == 4) { + Get(); + if (isUTF8()) + { + importDir(t->toStringUTF8()); + } + else + { + importDir(t->toString()); + } + + } else if (la->kind == 3) { + Get(); + if (isUTF8()) + { + importFile(t->toStringUTF8()); + } + else + { + importFile(t->toString()); + } + + } else SynErr(11); + Expect(9); + if (StartOf(3)) { + Get(); + while (StartOf(3)) { + Get(); + } + } + Expect(7); + } else { + if (StartOf(5)) { + Get(); + while (StartOf(3)) { + Get(); + } + } + Expect(7); + } + } +} + + + +void Parser::Parse() +{ + t = NULL; + // might call Parse() twice + if (dummyToken) { + coco_string_delete(dummyToken->val); + delete dummyToken; + } + dummyToken = new Token(coco_string_create(L"Dummy Token")); + la = dummyToken; + Get(); + wmkdepend(); + Expect(0); // expect end-of-file automatically added +} + + +Parser::Parser(Scanner* scan, Errors* err) +: + dummyToken(NULL), + deleteErrorsDestruct_(!err), + errDist(minErrDist), + scanner(scan), + errors(err), + t(NULL), + la(NULL) +{ + if (!errors) // add in default error handling + { + errors = new Errors(); + } + // user-defined initializations: +} + + +bool Parser::StartOf(int s) +{ + const bool T = true; + const bool x = false; + + static const bool set[6][12] = + { + {T,x,x,x, x,x,x,x, x,x,x,x}, + {x,T,T,T, T,T,T,T, T,T,T,x}, + {x,x,T,T, T,T,x,x, T,T,T,x}, + {x,T,T,T, T,T,T,x, T,T,T,x}, + {x,T,x,T, T,T,T,x, T,T,T,x}, + {x,T,T,T, T,x,x,x, x,T,T,x} + }; + + return set[s][la->kind]; +} + + +Parser::~Parser() +{ + if (deleteErrorsDestruct_) { delete errors; } // delete default error handling + if (dummyToken) { + coco_string_delete(dummyToken->val); + delete dummyToken; + } + // user-defined destruction: +} + + +// ---------------------------------------------------------------------------- +// Errors Implementation +// ---------------------------------------------------------------------------- + +Errors::Errors() +: + count(0) +{} + + +Errors::~Errors() +{} + + +void Errors::clear() +{ + count = 0; +} + + +std::wstring Errors::strerror(int n) +{ + switch (n) { + case 0: return L"EOF expected"; break; + case 1: return L"string expected"; break; + case 2: return L"sqstring expected"; break; + case 3: return L"package_name expected"; break; + case 4: return L"package_dir expected"; break; + case 5: return L"\"#\" expected"; break; + case 6: return L"\"include\" expected"; break; + case 7: return L"\"\\n\" expected"; break; + case 8: return L"\"import\" expected"; break; + case 9: return L"\";\" expected"; break; + case 10: return L"??? expected"; break; + case 11: return L"invalid wmkdepend"; break; + default: + { + // std::wostringstream buf; (this typedef might be missing) + std::basic_ostringstream<wchar_t> buf; + buf << "error " << n; + return buf.str(); + } + break; + } +} + + +void Errors::Warning(const std::wstring& msg) +{ + fwprintf(stderr, L"%ls\n", msg.c_str()); +} + + +void Errors::Warning(int line, int col, const std::wstring& msg) +{ + fwprintf(stderr, L"-- line %d col %d: %ls\n", line, col, msg.c_str()); +} + + +void Errors::Error(int line, int col, const std::wstring& msg) +{ + fwprintf(stderr, L"-- line %d col %d: %ls\n", line, col, msg.c_str()); + count++; +} + + +void Errors::SynErr(int line, int col, int n) +{ + this->Error(line, col, this->strerror(n)); +} + + +void Errors::Exception(const std::wstring& msg) +{ + fwprintf(stderr, L"%ls", msg.c_str()); + ::exit(1); +} + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +} // End namespace + +// ************************************************************************* // diff --git a/wmake/src/wmkdependParser.h b/wmake/src/wmkdependParser.h new file mode 100644 index 00000000000..8c7703adbd6 --- /dev/null +++ b/wmake/src/wmkdependParser.h @@ -0,0 +1,310 @@ +/*---------------------------------*- C++ -*---------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. + \\/ M anipulation | +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +@file wmkdependParser.atg + +Description + An attributed Coco/R grammar to parse C/C++, Fortran and Java files + for include and import statements. + +SourceFiles + generated + +\*---------------------------------------------------------------------------*/ +// This file was generated with Coco/R C++ (7 Feb 2010) +// http://www.ssw.uni-linz.ac.at/coco/ +// with these defines: +// - FORCE_UTF8 + + +#ifndef COCO_wmkdependPARSER_H__ +#define COCO_wmkdependPARSER_H__ + +#include <iostream> +#include <string> +#include <list> + +//! @brief A simple HashTable implementation +/** + * @note This hash table is only vaguely STL-like. In accordance with + * its present purpose, this hash table only supports a constIterator + * and no deletions. For simplicity, the constIterator increment is + * simply via a next() method. Instead of comparing to an end value, + * the constIterator valid() method is used. + * For example, + * @code + * for + * ( + * HashTable<foo>::constIterator iter = myHash.begin(); + * iter.valid(); + * iter.next() + * ) + * { + * std::cerr<< "key: " << iter.key() << "\n"; + * } + * @endcode + * + */ +class StringHashSet +{ + //! An entry within the HashTable + struct hashedEntry + { + const std::string key_; //<! The lookup key + hashedEntry *next_; //<! Pointer to next hashedEntry in sub-list + + hashedEntry(const std::string& key, hashedEntry *next=0) + : + key_(key), next_(next) + {} + }; + + const int size_; //<! fixed HashTable size + hashedEntry** table_; + +public: + + //! Construct with a default size + StringHashSet(int size = 500) + : + size_(size), + table_(new hashedEntry*[size_]) + { + memset(table_, 0, size_ * sizeof(hashedEntry*)); + } + + //! Destructor + ~StringHashSet() + { + for (int hashIdx = 0; hashIdx < size_; ++hashIdx) + { + hashedEntry* ep = table_[hashIdx]; + while (ep) + { + hashedEntry* del = ep; + ep = ep->next_; + delete del; + } + } + delete[] table_; + table_ = 0; + } + + //! Return hash index for lookup name in hash table + bool hashKeyIndex(const std::string& name) const + { + int hashIdx = 0; + + // calculate hash index + for + ( + std::string::const_iterator iter = name.begin(); + iter != name.end(); + ++iter + ) + { + hashIdx = hashIdx << 1 ^ *iter; + } + + if (hashIdx < 0) + { + hashIdx = -hashIdx; + } + + return hashIdx % size_; + } + + + //! Return true if name is found in hash table + bool found(const std::string& name) const + { + const int hashIdx = hashKeyIndex(name); + + for (hashedEntry* ep = table_[hashIdx]; ep; ep = ep->next_) + { + if (name == ep->key_) + { + // found + return true; + } + } + + // entry not found + return false; + } + + + //! Return true if name is found in hash table, insert if not found + bool foundOrInsert(const std::string& name) + { + const int hashIdx = hashKeyIndex(name); + + for (hashedEntry* ep = table_[hashIdx]; ep; ep = ep->next_) + { + if (name == ep->key_) + { + // found - return true + return true; + } + } + + // not found - insert it + table_[hashIdx] = new hashedEntry(name, table_[hashIdx]); + + // entry not found (but was added) - return false + return false; + } + +}; + + +/*---------------------------------------------------------------------------*/ + + + +#include "wmkdependScanner.h" + +namespace wmake { + + +/*---------------------------------------------------------------------------*\ + Class Errors Declaration +\*---------------------------------------------------------------------------*/ +//! Parser error handing +class Errors +{ +public: + int count; //!< The number of errors detected + + //! Return a string describing the given error code. + static std::wstring strerror(int n); + + Errors(); //!< Construct null - start with no errors + virtual ~Errors(); //!< Destructor + virtual void clear(); //!< Clear the error count + + //! Handle a general warning 'msg' + virtual void Warning(const std::wstring& msg); + //! Handle a general warning 'msg' + virtual void Warning(int line, int col, const std::wstring& msg); + //! Handle general error 'msg' (eg, a semantic error) + virtual void Error(int line, int col, const std::wstring& msg); + //! Handle syntax error 'n', uses strerror for the message, calls Error() + virtual void SynErr(int line, int col, int n); + //! Handle a general exception 'msg' + virtual void Exception(const std::wstring& msg); + +}; // Errors + + + +/*---------------------------------------------------------------------------*\ + Class Parser Declaration +\*---------------------------------------------------------------------------*/ +//! A Coco/R Parser +class Parser +{ + enum { + _EOF=0, + _string=1, + _sqstring=2, + _package_name=3, + _package_dir=4, + maxT = 10 //<! max term (w/o pragmas) + }; + static const int minErrDist = 2; //!< min. distance before reporting errors + + Token *dummyToken; + bool deleteErrorsDestruct_; //!< delete the 'errors' member in destructor + int errDist; + + void SynErr(int n); //!< Handle syntax error 'n' + void Get(); + void Expect(int n); + bool StartOf(int s); + void ExpectWeak(int n, int follow); + bool WeakSeparator(int n, int syFol, int repFol); + +public: + Scanner *scanner; + Errors *errors; + + Token *t; //!< last recognized token + Token *la; //!< lookahead token + +private: + + //! Hash of files already visited + static StringHashSet visitedFiles_; + + //! Hash of (java) directories already visited + static StringHashSet visitedDirs_; + + //! Replace all '.' with '/' + static void dotToSlash(std::string& name); + + //! Import (java) directories + static void importDir(const std::string& dirName); + + //! Import (java) file + static void importFile(const std::string& name); + +public: + //! Include directories to search + static std::list<std::string> includeDirs; + + //! The name of the top-level source file + static std::string sourceFile; + + //! The name of the top-level dep file + static std::string depFile; + + //! Add directory to list of visited dirs, thus effectively ignoring it + static void ignoreDir(const std::string& name); + + //! Include file + static void includeFile(const std::string& name); + +/*---------------------------------------------------------------------------*/ + + //! Construct for the specified scanner + /*! + * Use the default error handling, or optionally provide an error + * handler, which will not be deleted upon destruction. + */ + Parser(Scanner* scan, Errors* err = 0); + ~Parser(); + void Parse(); //!< Execute the parse operation + void SemErr(const std::wstring& msg); //!< Handle semantic error + bool isUTF8() const; //!< Return true if scanner buffer is UTF8 + + void wmkdepend(); + +}; // end Parser + +} // End namespace + +#endif // COCO_wmkdependPARSER_H__ + +// ************************************************************************* // diff --git a/wmake/src/wmkdependScanner.cpp b/wmake/src/wmkdependScanner.cpp new file mode 100644 index 00000000000..72f082f7886 --- /dev/null +++ b/wmake/src/wmkdependScanner.cpp @@ -0,0 +1,1022 @@ +/*---------------------------------*- C++ -*---------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. + \\/ M anipulation | +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +@file wmkdependParser.atg + +Description + An attributed Coco/R grammar to parse C/C++, Fortran and Java files + for include and import statements. + +SourceFiles + generated + +\*---------------------------------------------------------------------------*/ +// This file was generated with Coco/R C++ (7 Feb 2010) +// http://www.ssw.uni-linz.ac.at/coco/ +// with these defines: +// - FORCE_UTF8 + + +#include <sstream> + +// io.h and fcntl are used to ensure binary read from streams on windows +#ifdef _WIN32 +# include <io.h> +# include <fcntl.h> +#endif + +#include <climits> + +#include "wmkdependScanner.h" + +// values for the file stream buffering +#define MIN_BUFFER_LENGTH 1024 // 1KB +#define MAX_BUFFER_LENGTH (64*MIN_BUFFER_LENGTH) // 64KB +// value for the heap management +#define HEAP_BLOCK_SIZE (64*1024) // 64KB + + +namespace wmake { + +// * * * * * * * * * * * Miscellaneous String Routines * * * * * * * * * * * // + +// +// string handling, byte character +// + +std::string coco_stdString(const wchar_t* str) +{ + return str ? coco_stdString(str, 0, wcslen(str)) : std::string(); +} + + +std::string coco_stdString(const wchar_t* str, unsigned length) +{ + return coco_stdString(str, 0, length); +} + + +std::string coco_stdString(const wchar_t* str, unsigned index, unsigned length) +{ + const unsigned len = (str && *str) ? length : 0; + std::string dst; + dst.reserve(len); + + for (unsigned i = 0; i < len; ++i) + { + dst += char(str[index+i] & 0xFF); + } + + return dst; +} + + +std::string coco_stdStringUTF8(const wchar_t* str) +{ + return str ? coco_stdStringUTF8(str, 0, wcslen(str)) : std::string(); +} + + +std::string coco_stdStringUTF8(const wchar_t* str, unsigned length) +{ + return coco_stdStringUTF8(str, 0, length); +} + + +std::string coco_stdStringUTF8(const wchar_t* str, unsigned index, unsigned length) +{ + const unsigned len = (str && *str) ? length : 0; + std::string dst; + dst.reserve(len); + + for (unsigned i = 0; i < len; ++i) + { + wchar_t wc = str[index+i]; + + if (!(wc & ~0x0000007F)) + { + // 0x00000000 - 0x0000007F [min. 8bit storage, 1-byte encoding) + // 0aaaaaaa + dst += char(wc); + } + else if (!(wc & ~0x000007FF)) + { + // 0x00000080 - 0x000007FF [min. 16bit storage, 2-byte encoding] + // 110bbbaa 10aaaaaa + dst += char(0xC0 | ((wc >> 6) & 0x1F)); + dst += char(0x80 | ((wc) & 0x3F)); + } + else if (!(wc & ~0x0000FFFF)) + { + // 0x00000800 - 0x0000FFFF [min. 16bit storage, 3-byte encoding] + // 1110bbbb 10bbbbaa 10aaaaaa + dst += char(0xE0 | ((wc >> 12) & 0x0F)); + dst += char(0x80 | ((wc >> 6) & 0x3F)); + dst += char(0x80 | ((wc) & 0x3F)); + } + else if (!(wc & ~0x001FFFFF)) + { + // 0x00010000 - 0x001FFFFF [min. 24bit storage, 4-byte encoding] + // 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa + dst += char(0xF0 | ((wc >> 18) & 0x07)); + dst += char(0x80 | ((wc >> 12) & 0x3F)); + dst += char(0x80 | ((wc >> 6) & 0x3F)); + dst += char(0x80 | ((wc) & 0x3F)); + } +// +// Not (yet) used - wchar_t storage is limited to 16bit on windows +// This also corresponds to the unicode BMP (Basic Multilingual Plane) +// +// else if (!(wc & ~0x03FFFFFF)) +// { +// // 0x00200000 - 0x03FFFFFF [min. 32bit storage, 5-byte encoding] +// // 111110dd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa +// dst += char(0xF8 | ((wc >> 24) & 0x03)); +// dst += char(0x80 | ((wc >> 18) & 0x3F)); +// dst += char(0x80 | ((wc >> 12) & 0x3F)); +// dst += char(0x80 | ((wc >> 6) & 0x3F)); +// dst += char(0x80 | ((wc) & 0x3F)); +// } +// else if (!(wc & ~0x7FFFFFFF)) +// { +// // 0x04000000 - 0x7FFFFFFF [min. 32bit storage, 6-byte encoding] +// // 1111110d 10dddddd 10cccccc 10ccbbbb 10bbbbaa 10aaaaaa +// dst += char(0xFC | ((wc >> 30) & 0x01)); +// dst += char(0x80 | ((wc >> 24) & 0x3F)); +// dst += char(0x80 | ((wc >> 18) & 0x3F)); +// dst += char(0x80 | ((wc >> 12) & 0x3F)); +// dst += char(0x80 | ((wc >> 6) & 0x3F)); +// dst += char(0x80 | ((wc) & 0x3F)); +// } +// + else + { + // report anything unknown/invalid as replacement character U+FFFD + dst += char(0xEF); + dst += char(0xBF); + dst += char(0xBD); + } + } + + return dst; +} + + +// * * * * * * * * * * * * End of String Routines * * * * * * * * * * * * * // + + +Token::Token(wchar_t* value) +: + kind(0), + pos(0), + col(0), + line(0), + val(value), + next(NULL) +{} + + +Token::~Token() +{} + + +int Token::length() const +{ + return val ? wcslen(val) : 0; +} + + +// ---------------------------------------------------------------------------- +// Buffer Implementation +// ---------------------------------------------------------------------------- + +Buffer::Buffer(Buffer* b) +: + buf(b->buf), + bufCapacity(b->bufCapacity), + bufLen(b->bufLen), + bufPos(b->bufPos), + bufStart(b->bufStart), + fileLen(b->fileLen), + cStream(b->cStream), + stdStream(b->stdStream), + isUserStream_(b->isUserStream_) +{ + // avoid accidental deletion on any of these members + b->buf = NULL; + b->cStream = NULL; + b->stdStream = NULL; +} + + +Buffer::Buffer(const char* chars, int len) +: + buf(new unsigned char[len]), + bufCapacity(len), + bufLen(len), + bufPos(0), + bufStart(0), + fileLen(len), + cStream(NULL), + stdStream(NULL), + isUserStream_(false) +{ + memcpy(this->buf, chars, len*sizeof(char)); +} + + +Buffer::Buffer(const unsigned char* chars, int len) +: + buf(new unsigned char[len]), + bufCapacity(len), + bufLen(len), + bufPos(0), + bufStart(0), + fileLen(len), + cStream(NULL), + stdStream(NULL), + isUserStream_(false) +{ + memcpy(this->buf, chars, len*sizeof(char)); +} + + +Buffer::Buffer(FILE* ifh, bool isUserStream) +: + buf(NULL), + bufCapacity(0), + bufLen(0), + bufPos(0), + bufStart(0), + fileLen(0), + cStream(ifh), + stdStream(NULL), + isUserStream_(isUserStream) +{ +// ensure binary read on windows +#ifdef _WIN32 + _setmode(_fileno(cStream), _O_BINARY); +#endif + + if (CanSeek()) + { + fseek(cStream, 0, SEEK_END); + fileLen = ftell(cStream); + fseek(cStream, 0, SEEK_SET); + bufLen = (fileLen < MAX_BUFFER_LENGTH) ? fileLen : MAX_BUFFER_LENGTH; + bufStart = INT_MAX; // nothing in the buffer so far + } + + bufCapacity = (bufLen > 0) ? bufLen : MIN_BUFFER_LENGTH; + buf = new unsigned char[bufCapacity]; + if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) + else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid + if (bufLen == fileLen && CanSeek()) Close(); +} + + +Buffer::Buffer(std::istream* istr, bool isUserStream) +: + buf(NULL), + bufCapacity(0), + bufLen(0), + bufPos(0), + bufStart(0), + fileLen(0), + cStream(NULL), + stdStream(istr), + isUserStream_(isUserStream) +{ +#if _WIN32 + // TODO: ensure binary read on windows? +#endif +} + + +Buffer::~Buffer() +{ + Close(); + if (buf) + { + delete[] buf; + buf = NULL; + } +} + + +void Buffer::Close() +{ + if (!isUserStream_) + { + if (cStream) + { + fclose(cStream); + cStream = NULL; + } + if (stdStream) + { + delete stdStream; + stdStream = 0; + } + } +} + + +int Buffer::Read() +{ + if (stdStream) + { + int ch = stdStream->get(); + if (stdStream->eof()) + { + return EoF; + } + return ch; + } + + if (bufPos < bufLen) { + return buf[bufPos++]; + } + else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[bufPos++]; + } + else if (cStream && !CanSeek() && (ReadNextStreamChunk() > 0)) { + return buf[bufPos++]; + } + + return EoF; +} + +bool Buffer::isUTF8() const +{ + return false; +} + +int UTF8Buffer::Read() +{ + int ch; + do { + ch = Buffer::Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while (ch != EoF && ch >= 128 && ((ch & 0xC0) != 0xC0)); + if (ch < 128 || ch == EoF) { + // nothing to do, first 127 chars are identical in ASCII and UTF8 + // 0xxxxxxx or end of file character + } + else if ((ch & 0xF0) == 0xF0) { + // 0x00010000 - 0x001FFFFF [min. 24bit storage, 4-byte encoding] + // 11110ccc 10ccbbbb 10bbbbaa 10aaaaaa + // CAUTION: this should probably be disallowed since it overflows + // wchar_t on windows and overflows the max (0xFFFF) used here + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } + else if ((ch & 0xE0) == 0xE0) { + // 0x00000800 - 0x0000FFFF [min. 16bit storage, 3-byte encoding] + // 1110bbbb 10bbbbaa 10aaaaaa + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } + else if ((ch & 0xC0) == 0xC0) { + // 0x00000080 - 0x000007FF [min. 16bit storage, 2-byte encoding] + // 110bbbaa 10aaaaaa + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + + +bool UTF8Buffer::isUTF8() const +{ + return true; +} + + +int Buffer::Peek() +{ + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + + +int Buffer::GetPos() const +{ + if (stdStream) + { + return stdStream->tellg(); + } + + return bufPos + bufStart; +} + + +void Buffer::SetPos(int value) +{ + if (stdStream) + { + stdStream->seekg(value, std::ios::beg); + return; + } + + if ((value >= fileLen) && cStream && !CanSeek()) + { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while ((value >= fileLen) && (ReadNextStreamChunk() > 0)) + {} + } + + if ((value < 0) || (value > fileLen)) + { + fwprintf(stderr, L"--- buffer out of bounds access, position: %d\n", value); + ::exit(1); + } + + if ((value >= bufStart) && (value < (bufStart + bufLen))) // already in buffer + { + bufPos = value - bufStart; + } + else if (cStream) // must be swapped in + { + fseek(cStream, value, SEEK_SET); + bufLen = fread(buf, sizeof(char), bufCapacity, cStream); + bufStart = value; bufPos = 0; + } + else + { + bufPos = fileLen - bufStart; // make Pos return fileLen + } +} + + +// +// Read the next chunk of bytes from the stream, increases the buffer +// if needed and updates the fields fileLen and bufLen. +// Returns the number of bytes read. +// +int Buffer::ReadNextStreamChunk() +{ + int freeLen = bufCapacity - bufLen; + if (freeLen == 0) + { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + bufCapacity = bufLen * 2; + unsigned char *newBuf = new unsigned char[bufCapacity]; + memcpy(newBuf, buf, bufLen*sizeof(char)); + delete[] buf; + buf = newBuf; + freeLen = bufLen; + } + int read = fread(buf + bufLen, sizeof(char), freeLen, cStream); + if (read > 0) + { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; +} + + +bool Buffer::CanSeek() const +{ + return cStream && (ftell(cStream) != -1); +} + +// ---------------------------------------------------------------------------- +// Scanner Implementation +// ---------------------------------------------------------------------------- + +Scanner::Scanner(const char* buf, int len) +: + buffer(new Buffer(buf, len)) +{ + Init(); +} + + +Scanner::Scanner(const unsigned char* buf, int len) +: + buffer(new Buffer(buf, len)) +{ + Init(); +} + + +Scanner::Scanner(FILE* ifh) +: + buffer(new Buffer(ifh, true)) +{ + Init(); +} + + +#ifdef _WIN32 +Scanner::Scanner(const std::wstring& fileName) +{ + FILE* ifh; + + if ((ifh = _wfopen(fileName.c_str(), L"rb")) == NULL) + { + fwprintf(stderr, L"--- Cannot open file %ls\n", fileName.c_str()); + ::exit(1); + } + buffer = new Buffer(ifh, false); + Init(); +} +#endif + + +Scanner::Scanner(const std::string& fileName) +{ + FILE* ifh; + if ((ifh = fopen(fileName.c_str(), "rb")) == NULL) + { + fwprintf(stderr, L"--- Cannot open file %s\n", fileName.c_str()); + ::exit(1); + } + buffer = new Buffer(ifh, false); + Init(); +} + + +Scanner::Scanner(std::istream& istr) +: + buffer(new Buffer(&istr, true)) +{ + Init(); +} + + +Scanner::~Scanner() +{ + char* cur = reinterpret_cast<char*>(firstHeap); + +#ifdef COCO_DEBUG_HEAP + fwprintf(stderr, L"~Scanner:\n"); +#endif + + while (cur) + { + cur = *(reinterpret_cast<char**>(cur + HEAP_BLOCK_SIZE)); + free(firstHeap); +#ifdef COCO_DEBUG_HEAP + fwprintf + ( + stderr, L" free %p -> %p\n", + firstHeap, + reinterpret_cast<char*>(firstHeap) + HEAP_BLOCK_SIZE + ); +#endif + firstHeap = cur; + } + delete[] tval; + delete buffer; +} + + +void Scanner::Init() +{ + for (int i = 36; i <= 36; ++i) start.set(i, 7); + for (int i = 65; i <= 90; ++i) start.set(i, 7); + for (int i = 95; i <= 95; ++i) start.set(i, 7); + for (int i = 97; i <= 122; ++i) start.set(i, 7); + start.set(34, 1); + start.set(39, 4); + start.set(35, 11); + start.set(10, 12); + start.set(59, 13); + start.set(Buffer::EoF, -1); + + keywords.set(L"include", 6); + keywords.set(L"import", 8); + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + tlen = 0; + tval[tlen] = 0; + + // HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = + reinterpret_cast<void**> + (reinterpret_cast<char*>(heap) + HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > HEAP_BLOCK_SIZE) + { + fwprintf(stderr, L"--- Too small HEAP_BLOCK_SIZE\n"); + ::exit(1); + } +#ifdef COCO_DEBUG_HEAP + fwprintf + ( + stderr, L"Scanner::init: firstHeap %p -> %p\n", + firstHeap, + reinterpret_cast<char*>(firstHeap) + HEAP_BLOCK_SIZE + ); +#endif + + pos = -1; line = 1; col = 0; + oldEols = 0; + NextCh(); + if (ch == 0xEF) // check optional byte order mark for UTF-8 + { // Windows-specific magic + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) + { + fwprintf(stderr, L"Illegal byte order mark at start of file"); + ::exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(oldBuf); col = 0; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + else + { + // FORCE_UTF8 was defined + // use UTF8Buffer without relying on a byte order mark. + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(oldBuf); col = 0; + delete oldBuf; oldBuf = NULL; + } + + pt = tokens = CreateToken(); // first token is a dummy +} + + +void Scanner::NextCh() +{ + if (oldEols > 0) + { + ch = EOL; + oldEols--; + } + else + { + pos = buffer->GetPos(); + ch = buffer->Read(); col++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == '\r' && buffer->Peek() != '\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } +} + + +void Scanner::AddCh() +{ + if (tlen >= tvalLength) + { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete[] tval; + tval = newBuf; + } + if (ch != Buffer::EoF) + { + tval[tlen++] = ch; + NextCh(); + } +} + + + +bool Scanner::Comment0() { + int level = 1, pos0 = pos, line0 = line, col0 = col; + NextCh(); + if (ch == '/') { + NextCh(); + while (true) { + if (ch == 10) { + level--; + if (level == 0) { oldEols = line - line0; NextCh(); return true; } + NextCh(); + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } else { + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; + } + return false; +} + +bool Scanner::Comment1() { + int level = 1, pos0 = pos, line0 = line, col0 = col; + NextCh(); + if (ch == '*') { + NextCh(); + while (true) { + if (ch == '*') { + NextCh(); + if (ch == '/') { + level--; + if (level == 0) { oldEols = line - line0; NextCh(); return true; } + NextCh(); + } + } else if (ch == '/') { + NextCh(); + if (ch == '*') { + level++; NextCh(); + } + } else if (ch == buffer->EoF) return false; + else NextCh(); + } + } else { + buffer->SetPos(pos0); NextCh(); line = line0; col = col0; + } + return false; +} + +void Scanner::CreateHeapBlock() +{ + char* cur = reinterpret_cast<char*>(firstHeap); + +#ifdef COCO_DEBUG_HEAP + fwprintf(stderr, L"CreateHeapBlock: tokens %p\n", tokens); +#endif + + // release unused blocks + while + ( + (reinterpret_cast<char*>(tokens) < cur) + || (reinterpret_cast<char*>(tokens) > (cur + HEAP_BLOCK_SIZE)) + ) + { + cur = *(reinterpret_cast<char**>(cur + HEAP_BLOCK_SIZE)); +#ifdef COCO_DEBUG_HEAP + fwprintf + ( + stderr, L" free %p -> %p\n", + firstHeap, + reinterpret_cast<char*>(firstHeap) + HEAP_BLOCK_SIZE + ); +#endif + free(firstHeap); + firstHeap = cur; + } + + // HEAP_BLOCK_SIZE byte heap + pointer to next heap block + void* newHeap = malloc(HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = + reinterpret_cast<void**> + (reinterpret_cast<char*>(newHeap) + HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +#ifdef COCO_DEBUG_HEAP + fwprintf + ( + stderr, L" malloc %p -> %p\n", + newHeap, + reinterpret_cast<char*>(newHeap) + HEAP_BLOCK_SIZE + ); +#endif +} + + +Token* Scanner::CreateToken() +{ + const int reqMem = sizeof(Token); + if + ( + (reinterpret_cast<char*>(heapTop) + reqMem) + >= reinterpret_cast<char*>(heapEnd) + ) + { + CreateHeapBlock(); + } + // token 'occupies' heap starting at heapTop + Token* tok = reinterpret_cast<Token*>(heapTop); + // increment past this part of the heap, which is now used + heapTop = + reinterpret_cast<void*> + (reinterpret_cast<char*>(heapTop) + reqMem); + tok->val = NULL; + tok->next = NULL; + return tok; +} + + +void Scanner::AppendVal(Token* tok) +{ + const int reqMem = (tlen + 1) * sizeof(wchar_t); + if + ( + (reinterpret_cast<char*>(heapTop) + reqMem) + >= reinterpret_cast<char*>(heapEnd) + ) + { + if (reqMem > HEAP_BLOCK_SIZE) + { + fwprintf(stderr, L"--- Too long token value\n"); + ::exit(1); + } + CreateHeapBlock(); + } + + // add text value from heap + tok->val = reinterpret_cast<wchar_t*>(heapTop); + + // increment past this part of the heap, which is now used + heapTop = + reinterpret_cast<void*> + (reinterpret_cast<char*>(heapTop) + reqMem); + + // copy the currently parsed tval into the token + wcsncpy(tok->val, tval, tlen); + tok->val[tlen] = '\0'; +} + + +Token* Scanner::NextToken() +{ + while + ( + ch == ' ' + || ch == 9 + ) NextCh(); + if ((ch == '/' && Comment0()) || (ch == '/' && Comment1())) return NextToken(); + int recKind = noSym; + int recEnd = pos; + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) + { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { + case_0: + if (recKind != noSym) { + tlen = recEnd - t->pos; + SetScannerBehindT(); + } + t->kind = recKind; break; + } // NextCh already done + case 1: + case_1: + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= '!') || (ch >= '#' && ch <= '[') || (ch >= ']' && ch <= 65535)) {AddCh(); goto case_1;} + else if (ch == '"') {AddCh(); goto case_3;} + else if (ch == 92) {AddCh(); goto case_2;} + else {goto case_0;} + case 2: + case_2: + if ((ch >= ' ' && ch <= '~')) {AddCh(); goto case_1;} + else {goto case_0;} + case 3: + case_3: + {t->kind = 1; break;} + case 4: + case_4: + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= '!') || (ch >= '#' && ch <= '&') || (ch >= '(' && ch <= '[') || (ch >= ']' && ch <= 65535)) {AddCh(); goto case_4;} + else if (ch == 39) {AddCh(); goto case_8;} + else if (ch == 92) {AddCh(); goto case_5;} + else {goto case_0;} + case 5: + case_5: + if ((ch >= ' ' && ch <= '~')) {AddCh(); goto case_4;} + else {goto case_0;} + case 6: + case_6: + {t->kind = 4; break;} + case 7: + case_7: + recEnd = pos; recKind = 3; + if (ch == '$' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_7;} + else if (ch == '.') {AddCh(); goto case_9;} + else {t->kind = 3; std::wstring literal(tval, tlen); t->kind = keywords.get(literal, t->kind); break;} + case 8: + case_8: + recEnd = pos; recKind = 2; + if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= '!') || (ch >= '#' && ch <= '&') || (ch >= '(' && ch <= '[') || (ch >= ']' && ch <= 65535)) {AddCh(); goto case_4;} + else if (ch == 39) {AddCh(); goto case_8;} + else if (ch == 92) {AddCh(); goto case_5;} + else {t->kind = 2; break;} + case 9: + case_9: + if (ch == '$' || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_10;} + else if (ch == '*') {AddCh(); goto case_6;} + else {goto case_0;} + case 10: + case_10: + recEnd = pos; recKind = 3; + if (ch == '$' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z')) {AddCh(); goto case_10;} + else if (ch == '.') {AddCh(); goto case_9;} + else {t->kind = 3; std::wstring literal(tval, tlen); t->kind = keywords.get(literal, t->kind); break;} + case 11: + {t->kind = 5; break;} + case 12: + {t->kind = 7; break;} + case 13: + {t->kind = 9; break;} + } + AppendVal(t); + return t; +} + + +void Scanner::SetScannerBehindT() +{ + buffer->SetPos(t->pos); + NextCh(); + line = t->line; col = t->col; + for (int i = 0; i < tlen; i++) NextCh(); +} + + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() +{ + if (tokens->next == NULL) { + pt = tokens = NextToken(); + } + else { + pt = tokens = tokens->next; + } + return tokens; +} + + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() +{ + do + { + if (pt->next == NULL) + { + pt->next = NextToken(); + } + pt = pt->next; + } while (pt->kind > maxT); // skip pragmas + + return pt; +} + + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() +{ + pt = tokens; +} + + +int Scanner::Line() const +{ + return line; +} + + +void Scanner::Line(int lineNo) +{ + line = lineNo; +} + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +} // End namespace + +// ************************************************************************* // diff --git a/wmake/src/wmkdependScanner.h b/wmake/src/wmkdependScanner.h new file mode 100644 index 00000000000..a72bf4b9684 --- /dev/null +++ b/wmake/src/wmkdependScanner.h @@ -0,0 +1,477 @@ +/*---------------------------------*- C++ -*---------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. + \\/ M anipulation | +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +@file wmkdependParser.atg + +Description + An attributed Coco/R grammar to parse C/C++, Fortran and Java files + for include and import statements. + +SourceFiles + generated + +\*---------------------------------------------------------------------------*/ +// This file was generated with Coco/R C++ (7 Feb 2010) +// http://www.ssw.uni-linz.ac.at/coco/ +// with these defines: +// - FORCE_UTF8 + + +#ifndef COCO_wmkdependSCANNER_H__ +#define COCO_wmkdependSCANNER_H__ + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <cwchar> +#include <string> +#include <fstream> +#include <iostream> + +namespace wmake { + +// * * * * * * * * * * * Miscellaneous String Routines * * * * * * * * * * * // + +//! Simple lower-case string transformation +template<class StringT> +inline void coco_string_toLower(StringT& str) +{ + for + ( + typename StringT::iterator iter = str.begin(); + iter != str.end(); + ++iter + ) + { + if (*iter >= 'A' && *iter <= 'Z') + { + *iter += ('a' - 'A'); // lower-case + } + } +} + + +//! Simple string hashing function +template<class StringT> +inline int coco_string_hash(const StringT& str) +{ + int h = 0; + for + ( + typename StringT::const_iterator iter = str.begin(); + iter != str.end(); + ++iter + ) + { + h = (h * 7) ^ *iter; + } + return h < 0 ? -h : h; +} + + +// +// String conversions +// ~~~~~~~~~~~~~~~~~~ + +//! Convert wide string to double +inline double coco_string_toDouble(const wchar_t* str) +{ + return str ? wcstod(str, NULL) : 0; +} + + +//! Convert wide string to long +inline long coco_string_toLong(const wchar_t* str) +{ + return str ? wcstol(str, NULL, 10) : 0; +} + + +//! A byte string (restricted to 8bit values) by copying str +std::string coco_stdString(const wchar_t* str); + +//! A byte string (restricted to 8bit values) by copying str, +//! up to length characters long +std::string coco_stdString(const wchar_t* str, unsigned length); + +//! A byte substring (restricted to 8bit values) of str, +//! starting at index and length characters long +std::string coco_stdString(const wchar_t* str, unsigned index, unsigned length); + +//! A UTF8 byte string by copying str +std::string coco_stdStringUTF8(const wchar_t* str); + +//! A UTF8 byte string by copying str, up to length characters long +std::string coco_stdStringUTF8(const wchar_t* str, unsigned length); + +//! A UTF8 byte substring, starting at index and length characters long +std::string coco_stdStringUTF8(const wchar_t* str, unsigned index, unsigned length); + +// * * * * * * * * * * * * End of String Routines * * * * * * * * * * * * * // + + + +/*---------------------------------------------------------------------------*\ + Class Token Declaration +\*---------------------------------------------------------------------------*/ +/*! + * @brief Scanner Token + * + * @note since each Token is allocated by the internal heap mechanism, + * the destructor does not clean up the val member. + */ +class Token +{ +public: + int kind; //!< token kind + int pos; //!< token position in the source text (starting at 0) + int col; //!< token column (starting at 1) + int line; //!< token line (starting at 1) + wchar_t* val; //!< token value (normally allocated from the internal heap) + Token *next; //!< Peek tokens are kept in linked list + + int length() const; //!< The length of val, or 0 if val is NULL + + //! Construct null Token, optionally with pointer to a string value + Token(wchar_t* value = 0); + ~Token(); //!< Destructor - does not cleanup val member + + //! Token val as byte string (restricted to 8bit values) + inline std::string toString() const + { + return coco_stdString(val); + } + + //! Token val as byte string (restricted to 8bit values), up to length characters long + inline std::string toString(unsigned length) const + { + return coco_stdString(val, length); + } + + //! Token val as byte string (restricted to 8bit values), starting at index and length characters long + inline std::string toString(unsigned index, unsigned length) const + { + return coco_stdString(val, index, length); + } + + //! Token val as UTF8 byte string + inline std::string toStringUTF8() const + { + return coco_stdStringUTF8(val); + } + + //! Token val as UTF8 byte string, up to length characters long + inline std::string toStringUTF8(unsigned length) const + { + return coco_stdStringUTF8(val, length); + } + + //! Token val as UTF8 byte substring, starting at index and length characters long + inline std::string toStringUTF8(unsigned index, unsigned length) const + { + return coco_stdStringUTF8(this->val, index, length); + } + +}; + + +/*---------------------------------------------------------------------------*\ + Class Buffer Declaration +\*---------------------------------------------------------------------------*/ +/*! + * @brief Scanner Buffer Token + * + * This Buffer supports the following cases: + * -# seekable stream (file) + * -# whole stream in buffer + * -# part of stream in buffer + * -# non seekable stream (network, console) + */ +class Buffer +{ + unsigned char *buf; //!< input buffer + int bufCapacity; //!< capacity of buf + int bufLen; //!< length of buffer + int bufPos; //!< current position in buffer + int bufStart; //!< position of first byte in buffer relative to input stream + int fileLen; //!< length of input stream (may change if the stream is no file) + FILE* cStream; //!< input stdio stream (normally seekable) + std::istream* stdStream; //!< STL std stream (seekable) + bool isUserStream_; //!< was the stream opened by the user? + + int ReadNextStreamChunk(); + bool CanSeek() const; //!< true if stream can be seeked otherwise false + +protected: + Buffer(Buffer*); //!< for the UTF8Buffer + +public: + //! max unicode characters is 0xFFFF (16bit storage) + static const int MaxChar = 65535; + static const int EoF = MaxChar + 1; + + //! Copy buffer contents from constant character string + Buffer(const char* chars, int len); + + //! Copy buffer contents from constant character string + Buffer(const unsigned char* chars, int len); + + //! @brief Attach buffer to a stdio stream. + //! User streams are not closed in the destructor + Buffer(FILE*, bool isUserStream = true); + + //! @brief Attach buffer to an STL standard stream + //! User streams are not closed in the destructor + explicit Buffer(std::istream*, bool isUserStream = true); + + //! Close stream (but not user streams) and free buf (if any) + virtual ~Buffer(); + + virtual void Close(); //!< Close stream (but not user streams) + virtual int Read(); //!< Get character from stream or buffer + virtual int Peek(); //!< Peek character from stream or buffer + + virtual int GetPos() const; + virtual void SetPos(int value); + virtual bool isUTF8() const; //!< Return false - buffer is not UTF8 +}; + + +/*---------------------------------------------------------------------------*\ + Class UTF8Buffer Declaration +\*---------------------------------------------------------------------------*/ +//! A Scanner Buffer variant that decodes UTF-8 characters into 16bit unicode +class UTF8Buffer : public Buffer +{ +public: + UTF8Buffer(Buffer* b) : Buffer(b) {} + virtual int Read(); + virtual bool isUTF8() const; //!< Return true - buffer is UTF8 +}; + + +/*---------------------------------------------------------------------------*\ + Class StartStates Declaration +\*---------------------------------------------------------------------------*/ +//! maps characters (integers) to start states of tokens as a HashTable +class StartStates +{ + //! HashTable entry + struct Entry + { + int key; //<! The lookup key + int val; //<! The data + Entry *next; //<! Pointer next Entry in sub-list + + Entry(int k, int v, Entry *n=0) + : + key(k), val(v), next(n) + {} + }; + + static const int size_ = 128; //<! fixed HashTable size + Entry **table_; + +public: + StartStates() + : + table_(new Entry*[size_]) + { + memset(table_, 0, size_*sizeof(Entry*)); + } + + virtual ~StartStates() + { + for (int i = 0; i < size_; ++i) + { + Entry *e = table_[i]; + while (e) + { + Entry *next = e->next; + delete e; + e = next; + } + } + delete[] table_; + } + + void set(int key, int val) + { + const int hashIndex = unsigned(key) % size_; + table_[hashIndex] = new Entry(key, val, table_[hashIndex]); + } + + int state(int key) + { + Entry *e = table_[unsigned(key) % size_]; + while (e && e->key != key) e = e->next; + return e ? e->val : 0; + } +}; + + +/*---------------------------------------------------------------------------*\ + Class KeywordMap Declaration +\*---------------------------------------------------------------------------*/ +//! maps strings to integers (identifiers to keyword kinds) as a HashTable +class KeywordMap +{ + //! HashTable entry + struct Entry + { + const std::wstring key; //<! The lookup key + int val; //<! The data + Entry *next; //<! Pointer next Entry in sub-list + + Entry(const std::wstring& k, int v, Entry *n=0) + : + key(k), val(v), next(n) + {} + }; + + static const int size_ = 128; //<! fixed HashTable size + Entry **table_; + +public: + KeywordMap() + : + table_(new Entry*[size_]) + { + memset(table_, 0, size_*sizeof(Entry*)); + } + + virtual ~KeywordMap() + { + for (int i = 0; i < size_; ++i) + { + Entry *e = table_[i]; + while (e) + { + Entry *next = e->next; + delete e; + e = next; + } + } + delete[] table_; + } + + void set(const std::wstring& key, int val) + { + const int hashIndex = coco_string_hash(key) % size_; + table_[hashIndex] = new Entry(key, val, table_[hashIndex]); + } + + int get(const std::wstring& key, int defaultVal) + { + Entry *e = table_[coco_string_hash(key) % size_]; + while (e && e->key != key) e = e->next; + return e ? e->val : defaultVal; + } +}; + + +/*---------------------------------------------------------------------------*\ + Class Scanner Declaration +\*---------------------------------------------------------------------------*/ +//! A Coco/R Scanner +class Scanner +{ + static const int maxT = 10; + static const int noSym = 10; + static const int eofSym = 0; //!< end-of-file token id + static const char EOL = '\n'; //!< end-of-line character + + void *firstHeap; //!< the start of the heap management + void *heap; //!< the currently active block + void *heapTop; //!< the top of the heap + void **heapEnd; //!< the end of the last heap block + + StartStates start; //!< A map of start states for particular characters + KeywordMap keywords; //!< A hash of keyword literals to token kind + + Token *t; //!< current token + wchar_t *tval; //!< text of current token + int tvalLength; //!< maximum capacity (length) for tval + int tlen; //!< length of tval + + Token *tokens; //!< list of tokens already peeked (first token is a dummy) + Token *pt; //!< current peek token + + int ch; //!< current input character + int pos; //!< byte position of current character + int line; //!< line number of current character + int col; //!< column number of current character + int oldEols; //!< the number of EOLs that appeared in a comment + + void CreateHeapBlock(); //!< add a heap block, freeing unused ones + Token* CreateToken(); //!< fit token on the heap + void AppendVal(Token* tok); //!< adjust tok->val to point to the heap and copy tval into it + void SetScannerBehindT(); + + void Init(); //!< complete the initialization for the constructors + void NextCh(); //!< get the next input character into ch + void AddCh(); //!< append the character ch to tval + bool Comment0(); + bool Comment1(); + Token* NextToken(); //!< get the next token + +public: + //! The scanner buffer + Buffer *buffer; + + //! Attach scanner to an existing character buffer + Scanner(const char* chars, int len); + + //! Attach scanner to an existing character buffer + Scanner(const unsigned char* chars, int len); + + //! Attach scanner to an existing open file handle + Scanner(FILE*); + +#ifdef _WIN32 + //! Open a file for reading and attach scanner - Windows only + explicit Scanner(const std::wstring& fileName); +#endif + + //! Open a file for reading and attach scanner + explicit Scanner(const std::string& fileName); + + //! Attach scanner to an existing open STL standard stream + explicit Scanner(std::istream&); + + ~Scanner(); //!< free heap and allocated memory + Token* Scan(); //!< get the next token (possibly a token already seen during peeking) + Token* Peek(); //!< peek for the next token, ignore pragmas + void ResetPeek(); //!< ensure that peeking starts at the current scan position + + int Line() const; //!< Return the current line + void Line(int lineNo); //!< Define the starting line for reporting errors + +}; // end Scanner + +} // End namespace + +#endif // COCO_wmkdependSCANNER_H__ + +// ************************************************************************* // -- GitLab