diff --git a/QuicMaf/EquationAdv.cpp b/QuicMaf/EquationAdv.cpp deleted file mode 100644 index 0cb8380..0000000 --- a/QuicMaf/EquationAdv.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "EquationAdv.h" - - - -EquationAdv::EquationAdv() -{ -} - - -EquationAdv::~EquationAdv() -{ -} diff --git a/QuicMaf/EquationAdv.h b/QuicMaf/EquationAdv.h deleted file mode 100644 index 4fab2a7..0000000 --- a/QuicMaf/EquationAdv.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once -class EquationAdv -{ -public: - EquationAdv(); - ~EquationAdv(); -}; - diff --git a/QuicMaf/QuicMaf.vcxproj b/QuicMaf/QuicMaf.vcxproj index f1f9820..42de025 100644 --- a/QuicMaf/QuicMaf.vcxproj +++ b/QuicMaf/QuicMaf.vcxproj @@ -119,19 +119,19 @@ - - - + + + diff --git a/QuicMaf/QuicMaf.vcxproj.filters b/QuicMaf/QuicMaf.vcxproj.filters index a0aa6f2..6514295 100644 --- a/QuicMaf/QuicMaf.vcxproj.filters +++ b/QuicMaf/QuicMaf.vcxproj.filters @@ -18,39 +18,39 @@ Source Files - - Source Files - - + Header Files - - Header Files - - + Header Files Header Files + + Header Files + + + Header Files + Header Files - - Header Files - - - Header Files - Header Files - + Header Files - + + Header Files + + + Header Files + + Header Files diff --git a/QuicMaf/app.cpp b/QuicMaf/app.cpp index 6579b62..6f9bbc1 100644 --- a/QuicMaf/app.cpp +++ b/QuicMaf/app.cpp @@ -1,8 +1,68 @@ +#pragma once +#define DEBUG_MODE +#include +#include + +#include "vendor/lexertk.hpp" + +#include "maths/tokenizer.h" + +#define MAIN_APP +//#define APP_TEST + + +#ifdef MAIN_APP + +using namespace std; -#include "maths/Equation.h" int main() { - auto terms = split_terms("(4 + 2(2)) + 123 = 131x"); - return true; -} \ No newline at end of file + while (true) { + cout << "Enter an equation: "; + string input; + cin >> input; + + lexertk::generator generator; + + if (!generator.process(input)) { + cout << "Failed to lex: " << input << endl; + system("PAUSE"); + system("CLS"); + return true; + } + + //lexertk::helper::commutative_inserter ci; + //ci.process(generator); + + lexertk::helper::bracket_checker bc; + bc.process(generator); + + if (!bc.result()) { + cout << "Failed Bracket Check!" << endl; + system("PAUSE"); + system("CLS"); + return 1; + } + +#ifdef DEBUG_MODE + lexertk::helper::dump(generator); +#endif // DEBUG_MODE + + + auto result = tokenize(generator); + + system("PAUSE"); + system("CLS"); + } + + + return true; +} +#endif // MAIN_APP + +#ifdef APP_TEST + + + +#endif \ No newline at end of file diff --git a/QuicMaf/diagrams/core_parser.ddd b/QuicMaf/diagrams/core_parser.ddd new file mode 100644 index 0000000..3f2f47c Binary files /dev/null and b/QuicMaf/diagrams/core_parser.ddd differ diff --git a/QuicMaf/maths/Equation.h b/QuicMaf/maths/Equation.h deleted file mode 100644 index be5a4f7..0000000 --- a/QuicMaf/maths/Equation.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef EQUATION_H -#define EQUATION_H -#pragma once - -using namespace std; -#include "terms/Term.h" -#include "terms/Brackets.h" -#include "terms/Constant.h" -#include "terms/Operator.h" -#include "terms/Variable.h" -#include "terms/Equal.h" - -#include "terms/term_parser.h" - -class Equation { -public: - vector lwing; - vector rwing; - - string Parse(std::string equation) { - - /* ALL OF THE COMMENTS GOT FROM THE NOTES.PARSING.ALGORITHM */ - - // - split the terms in the term of spaces - auto semi_terms = split(equation, " "); - - bool lwing_turn = true; - // - iterate over the terms - for (int i = 0; i < semi_terms.size(); i++) { - // , call the iterator mTerm: - auto term = semi_terms[i]; - // - if there exists an opening bracket in the mTerm - if (isContain(term, '(')) { - // find the closing. - vector terms_with_cbrack; - for (; i < semi_terms.size(); i++) { - if (isContain(semi_terms[i], ')')) { - terms_with_cbrack.push_back(semi_terms[i]); - break; - } - terms_with_cbrack.push_back(semi_terms[i]); - } - auto bracket = parseBracket(term, terms_with_cbrack); - (lwing_turn) ? lwing.push_back(bracket) : rwing.push_back(bracket); - continue; - } - auto _term = convertNormalToTerm(term); - if (_term->GetType() == TermTypes::Equ) { - lwing_turn = !lwing_turn; - continue; - } - - (lwing_turn) ? lwing.push_back(_term) : rwing.push_back(_term); - } - - return equation; - } -public: - Bracket* parseBracket(string str1, vector terms_with_cbrack) { - Bracket *bracket = nullptr; - bracket = new Bracket(); - - auto splitted = split(str1, "("); - string coefficient = splitted[0]; - - // convert coefficient to a term - if (coefficient != "") - bracket->setConstant((Term*)convertNormalToTerm(coefficient)); - - if (splitted.size() > 1) { - for (int i = 1; i < splitted.size(); i++) { - auto term = (Term*)convertNormalToTerm(splitted[i]); - bracket->mTerms.push_back(term); - } - } - - // terms: - for (int i = 1; i < terms_with_cbrack.size(); i++) { - auto term = terms_with_cbrack[i]; - if (isContain(term, ')')) { - term.erase(indexofChar(term, ')')); - bracket->mTerms.push_back(convertNormalToTerm(term)); - break; - } - bracket->mTerms.push_back(convertNormalToTerm(term)); - } - - return bracket; - } - - Term* convertNormalToTerm(string str) { - if (isContain(str, '^')) { - auto coeffic_terms = split(str, "^"); - if (isContainAlph(str)) { - auto terms = split(str, "^"); - Variable *var = nullptr; - var = new Variable(atof(&terms[0][0]), terms[0][indexofAlph(terms[0])], atof(&terms[1][0])); - return var; - } - else if (is_all_digits(coeffic_terms[0]) && is_all_digits(coeffic_terms[1])) { - Constant *Const = nullptr; - Const = new Constant(atof(&coeffic_terms[0][0]), atof(&coeffic_terms[1][0])); - return Const; - } - } - else if (is_all_digits(str)) { - Constant *Const = nullptr; - Const = new Constant(atof(&str[0])); - return Const; - } - else if (isContainAlph(str)) { - Variable *var = nullptr; - var = new Variable(atof(&str[0]), str[indexofAlph(str)]); - return var; - } - else if (isArithmitic(str[0])) { - Operator *op = nullptr; - op = new Operator(str[0]); - return op; - } - else if (isEqualChar(str[0])) { - Equal* equ = nullptr; - equ = new Equal(); - return equ; - } - } -}; -#endif // !EQUATION_H diff --git a/QuicMaf/maths/defines.h b/QuicMaf/maths/defines.h index 60e79bc..75e34d9 100644 --- a/QuicMaf/maths/defines.h +++ b/QuicMaf/maths/defines.h @@ -2,13 +2,18 @@ #define DEFINES_H #include +#include +#include + #include #include #include +using namespace std; + #define DEF_C -999 #define DEF_N 0.0 -typedef double NValue; +typedef long double NValue; typedef char CValue; enum TermTypes { @@ -22,7 +27,7 @@ enum TermTypes { // for string delimiter -vector split(string s, string delimiter) { +static vector split(string s, string delimiter) { size_t pos_start = 0, pos_end, delim_len = delimiter.length(); string token; vector res; @@ -38,7 +43,7 @@ vector split(string s, string delimiter) { } -void removeSubstrs(string& s, string& p) { +static void removeSubstrs(string& s, string& p) { string::size_type n = p.length(); for (string::size_type i = s.find(p); i != string::npos; @@ -46,8 +51,24 @@ void removeSubstrs(string& s, string& p) { s.erase(i, n); } +int getPrecedence(char c) { + //if (!isArithmitic(c)) return -1; // invalid -bool isArithmitic(char c) { + if (c == '*') return 1; + if (c == '/') return 1; + if (c == '+') return 0; + if (c == '-') return 0; + return -1; +} + + +// return true if c1 is higher precedence +// return false if c2 is higher precedence +bool compPrecedence(char c1, char c2) { + return getPrecedence(c1) > getPrecedence(c2); +} + +static bool isArithmitic(char c) { if (c == '-') return true; if (c == '+') return true; if (c == '/') return true; @@ -55,33 +76,47 @@ bool isArithmitic(char c) { return false; } -bool isEqualChar(char c) { +static bool isEqualChar(char c) { if (c == '=') return true; return false; } -bool isBrackets(char c) { - if (c == '(') return true; +static bool isPower(char c) { + if (c == '^') return true; + return false; +} +static bool isBrackets(char c) { + if (c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c== '}') return true; return false; } -bool isContain(string str, char c) { +static bool isBracketsOpening(char c) { + if (c == '(' || c == '[' || c == '{') return true; + return false; +} + +static bool isBracketsClosing(char c) { + if (c == ')' || c == ']' || c == '}') return true; + return false; +} + +static bool isContain(string str, char c) { for (int i = 0; i < str.size(); i++) if (str[i] == c) return true; return false; } -bool isContainAlph(string str) { +static bool isContainAlph(string str) { for (int i = 0; i < str.size(); i++) if (isalpha(str[i])) return true; return false; } -bool isContainDigit(string str) { +static bool isContainDigit(string str) { for (int i = 0; i < str.size(); i++) if (isdigit(str[i])) return true; return false; } -int indexofAlph(string str) { +static int indexofAlph(string str) { if (!isContainAlph(str)) return -1; for (int i = 0; i < str.size(); i++) @@ -91,19 +126,19 @@ int indexofAlph(string str) { return-1; } -int indexofChar(string str, char c) { +static int indexofChar(string str, char c) { for (int i = 0; i < str.size(); i++) if (str[i] == c) return i; return -1; } -bool is_all_digits(const std::string &str) +static bool is_all_digits(const std::string &str) { return std::all_of(str.begin(), str.end(), ::isdigit); // C++11 } -int getTermType(string str) { +static int getTermType(string str) { if (isContain(str, '=')) return Equ; if (isContain(str, '+')) return Op; if (isContain(str, '-')) return Op; diff --git a/QuicMaf/maths/terms/Paranthesis.h b/QuicMaf/maths/terms/Paranthesis.h new file mode 100644 index 0000000..9374bea --- /dev/null +++ b/QuicMaf/maths/terms/Paranthesis.h @@ -0,0 +1,31 @@ +#ifndef PARANTHESIS_H +#define PARANTHESIS_H +#pragma once + +using namespace std; +#include "Term.h" +#include "../defines.h" + +enum ParanTypes { + NORM_PARAN = 1, + BRAK_PARAN = 2, + CURL_PARAN = 3 +}; + +class Paranthesis : Term { + Paranthesis(); + Paranthesis(bool isopening, int type); + + bool isOpening() { return __isopening; } + int getType() { return __type; } + + void setOpening(bool is) { __isopening == is; } + void setType(char type) { __type == type; } + +private: + bool __isopening = true; + int __type = 0; +}; + + +#endif // !PARANTHESIS_H diff --git a/QuicMaf/maths/terms/term_parser.h b/QuicMaf/maths/terms/term_parser.h index 7679a4e..1d7cd0d 100644 --- a/QuicMaf/maths/terms/term_parser.h +++ b/QuicMaf/maths/terms/term_parser.h @@ -21,6 +21,7 @@ using namespace std; struct Token { unsigned int begin = 0; unsigned int end = 0; + }; string retrieveSubString(string str, Token token) { diff --git a/QuicMaf/maths/tokenizer.h b/QuicMaf/maths/tokenizer.h new file mode 100644 index 0000000..b263dce --- /dev/null +++ b/QuicMaf/maths/tokenizer.h @@ -0,0 +1,237 @@ +#ifndef TOKENIZER_H +#define TOKENIZER_H +#pragma once +#include "defines.h" +#include "terms/Brackets.h" +#include "terms/Constant.h" +#include "terms/Equal.h" +#include "terms/Operator.h" +#include "terms/Term.h" +#include "terms/Variable.h" +#include "terms/Paranthesis.h" + +#include "../vendor/lexertk.hpp" + +using namespace std; + +struct Token { + int begin = 0; + int end = 0; + Token(int b, int e) : begin(b), end(e) {} + Token() {} +}; + +static string retrieveSubString(string str, Token token) { + string res; + for (int i = token.begin; i <= token.end; i++) + if (str[i] != '\0') // ignore null terminator + res.push_back(str[i]); + return res; +} + +static lexertk::generator retriveSubLexer(lexertk::generator gen, Token tok) { + lexertk::generator res; + vector str; + for (int i = tok.begin; i <= tok.end; i++) + str.push_back(gen[i].value); + + string val; + for (int i = 0; i < str.size(); i++) + val.append(str[i]); + + res.process(val); + return res; +} + +//struct Paranthesis { +// bool isOpening = true; +// int pos = -1; +// +// Paranthesis(bool is, int _pos) : isOpening(is), pos(_pos) {} +//}; + +vector tokenize(lexertk::generator lexed); + +static Bracket* tokenize_bracket(lexertk::generator gen, Token* token, string coefficient) { + Bracket* result = nullptr; + result = new Bracket(); + + lexertk::generator bracks; + + + // DETERMINE THE ENDING OF THE BRACKETS + int counter = 0; + bool state = false; + int index = token->begin; + do { + if (isBracketsOpening(gen[index].value[0])) { + counter++; + state = true; + } + else if (isBracketsClosing(gen[index].value[0])) { + counter--; + state = false; + } + index++; + } while (!(!state && counter == 0)); + + bracks = retriveSubLexer(gen, Token(token->begin, index-1)); + token->end = index - 1; // to make sure we move the token pointer to the end of bracks + + // DELETE THIS BRACKET PARANTHESIS + bracks = retriveSubLexer(bracks, Token(1, bracks.size()-2)); + + // Tokenize its term + // first make sure it is not empty + if (bracks.empty()) { + cout << "Brackets can't be empty!" << endl; + system("PAUSE"); + exit(0); + } + // tokenize terms + auto terms = tokenize(bracks); + + result->mTerms = terms; + + // ADD COEFFICIENT TO RESULT + if (coefficient != "") { + lexertk::generator lex; + lex.process(coefficient); + result->setConstant(tokenize(lex)[0]); + } + return result; +} + +static vector tokenize(lexertk::generator lexed) { + vector result; + + for (int i = 0; i < lexed.size(); i++) { + auto lex = lexed[i]; + auto after_lex = lexed[i + 1]; + Token tok; + tok.begin = i; + + if (is_all_digits(lex.value)) { + // number + + // check for variables + if (isalpha(after_lex.value[0])) { + // variable detected + + // check for power + // if so read the power and its constant + if (isPower(lexed[i + 2].value[0])) { + // powers ONLY can be numbers no evaluation is done in the power + // ex: 5^2*3 // the expression will be 5 by 5 then multiply 3 + if (!is_all_digits(lexed[i + 3].value)) { + cout << "ONLY numbers are allowed in powers!" << endl; + system("PAUSE"); + exit(0); + /////// ENDING OF TREE + } + + Variable* Var = nullptr; + Var = new Variable(atof(&lex.value[0]), after_lex.value[0], atof(&lexed[i + 3].value[0])); + result.push_back(Var); + tok.end = i + 3; + /////// ENDING OF TREE + } + else { + // The variable is simple! + Variable* Var = nullptr; + Var = new Variable(atof(&lex.value[0]), after_lex.value[0]); + result.push_back(Var); + tok.end = i + 1; + } + } + else if (isBrackets(after_lex.value[0]) && isBracketsOpening(after_lex.value[0])) { + // check for brackets, + // if so tokenize the brackets + tok.begin++; // consume the coefficient + result.push_back(tokenize_bracket(lexed, &tok, lex.value)); + /////// ENDING OF TREE + } + else if (isPower(after_lex.value[0])) { + // check for powers, + // if so read the power and its constant + // powers ONLY can be numbers no evaluation is done in the power + // ex: 5^2*3 // the expression will be 5 by 5 then multiply 3 + if (!is_all_digits(lexed[i + 2].value)) { + cout << "ONLY numbers are allowed in powers!" << endl; + system("PAUSE"); + exit(0); + /////// ENDING OF TREE + } + + Constant* Const = nullptr; + Const = new Constant(atof(&lex.value[0]), atof(&lexed[i + 2].value[0])); + result.push_back(Const); + tok.end = i + 2; + /////// ENDING OF TREE + } + else { + // The number is simple! + Constant* Const = nullptr; + Const = new Constant(atof(&lex.value[0])); + result.push_back(Const); + tok.end = i; + /////// ENDING OF TREE + } + } + else if (isalpha(lex.value[0])) { + // variable + + // check for power + // if so read the power and its constant + if (isPower(after_lex.value[0])) { + // powers ONLY can be numbers no evaluation is done in the power + // ex: 5^2*3 // the expression will be 5 by 5 then multiply 3 + if (!is_all_digits(lexed[i + 2].value)) { + cout << "ONLY numbers are allowed in powers!" << endl; + system("PAUSE"); + exit(0); + /////// ENDING OF TREE + } + + Variable* Var = nullptr; + Var = new Variable(1.0, lex.value[0], lexed[i + 2].value[0]); + result.push_back(Var); + tok.end = i + 2; + /////// ENDING OF TREE + } + else { + // The variable is simple! + Variable* Var = nullptr; + Var = new Variable(1.0, lex.value[0]); + result.push_back(Var); + tok.end = i; + /////// ENDING OF TREE + } + } + else if (isBracketsOpening(lex.value[0])) { + // bracket + result.push_back(tokenize_bracket(lexed, &tok, "")); + } + else if (isArithmitic(lex.value[0])) { + // operator + + Operator *op = nullptr; + op = new Operator(lex.value[0]); + result.push_back(op); + tok.end = i; + } + else if (isEqualChar(lex.value[0])) { + // equal sign + + Equal* equ = nullptr; + equ = new Equal(); + result.push_back(equ); + tok.end = i; + } + + i = tok.end; // no need to increment, automatically done in loop statment + } + + return result; +} +#endif // !TOKENIZER_H diff --git a/QuicMaf/vendor/lexertk.hpp b/QuicMaf/vendor/lexertk.hpp new file mode 100644 index 0000000..3edb30f --- /dev/null +++ b/QuicMaf/vendor/lexertk.hpp @@ -0,0 +1,1831 @@ +/* + ***************************************************************** + * Simple C++ Lexer Toolkit Library * + * * + * Author: Arash Partow (2001) * + * URL: http://www.partow.net/programming/lexertk/index.html * + * * + * Copyright notice: * + * Free use of the Simple C++ Lexer Toolkit Library is permitted * + * under the guidelines and in accordance with the MIT License. * + * http://www.opensource.org/licenses/MIT * + * * + * * + * The lexer will tokenize input against the following BNF: * + * * + * expression ::= term { +|- term } * + * term ::= (symbol | factor) {operator symbol | factor} * + * factor ::= symbol | ( '(' {-} expression ')' ) * + * symbol ::= number | gensymb | string * + * gensymb ::= alphabet {alphabet | digit} * + * string ::= '"' {alphabet | digit | operator } '"' * + * operator ::= * | / | % | ^ | < | > | <= | >= | << | >> != * + * alphabet ::= a | b | .. | z | A | B | .. | Z * + * digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 * + * sign ::= + | - * + * edef ::= e | E * + * decimal ::= {digit} (digit [.] | [.] digit) {digit} * + * exponent ::= edef [sign] digit {digit} * + * real ::= [sign] decimal [exponent] * + * integer ::= [sign] {digit} * + * number ::= real | integer * + * * + * * + * Note: This lexer has been taken from the ExprTk Library. * + * * + ***************************************************************** +*/ + + +#ifndef INCLUDE_LEXERTK_HPP +#define INCLUDE_LEXERTK_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace lexertk +{ + + namespace details + { + + inline bool is_whitespace(const char c) + { + return (' ' == c) || ('\n' == c) || + ('\r' == c) || ('\t' == c) || + ('\b' == c) || ('\v' == c) || + ('\f' == c); + } + + inline bool is_operator_char(const char c) + { + return ('+' == c) || ('-' == c) || + ('*' == c) || ('/' == c) || + ('^' == c) || ('<' == c) || + ('>' == c) || ('=' == c) || + (',' == c) || ('!' == c) || + ('(' == c) || (')' == c) || + ('[' == c) || (']' == c) || + ('{' == c) || ('}' == c) || + ('%' == c) || (':' == c) || + ('?' == c) || ('&' == c) || + ('|' == c) || (';' == c); + } + + inline bool is_letter(const char c) + { + return (('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z')); + } + + inline bool is_digit(const char c) + { + return ('0' <= c) && (c <= '9'); + } + + inline bool is_letter_or_digit(const char c) + { + return is_letter(c) || is_digit(c); + } + + inline bool is_left_bracket(const char c) + { + return ('(' == c) || ('[' == c) || ('{' == c); + } + + inline bool is_right_bracket(const char c) + { + return (')' == c) || (']' == c) || ('}' == c); + } + + inline bool is_bracket(const char c) + { + return is_left_bracket(c) || is_right_bracket(c); + } + + inline bool is_sign(const char c) + { + return ('+' == c) || ('-' == c); + } + + inline bool is_invalid(const char c) + { + return !is_whitespace(c) && + !is_operator_char(c) && + !is_letter(c) && + !is_digit(c) && + ('.' != c) && + ('_' != c) && + ('$' != c) && + ('~' != c) && + ('\'' != c); + } + + inline bool imatch(const char c1, const char c2) + { + return std::tolower(c1) == std::tolower(c2); + } + + inline bool imatch(const std::string& s1, const std::string& s2) + { + if (s1.size() == s2.size()) + { + for (std::size_t i = 0; i < s1.size(); ++i) + { + if (std::tolower(s1[i]) != std::tolower(s2[i])) + { + return false; + } + } + + return true; + } + + return false; + } + + struct ilesscompare + { + inline bool operator()(const std::string& s1, const std::string& s2) const + { + const std::size_t length = std::min(s1.size(), s2.size()); + + for (std::size_t i = 0; i < length; ++i) + { + if (std::tolower(s1[i]) > std::tolower(s2[i])) + return false; + else if (std::tolower(s1[i]) < std::tolower(s2[i])) + return true; + } + + return s1.size() < s2.size(); + } + }; + + inline void cleanup_escapes(std::string& s) + { + typedef std::string::iterator str_itr_t; + + str_itr_t itr1 = s.begin(); + str_itr_t itr2 = s.begin(); + str_itr_t end = s.end(); + + std::size_t removal_count = 0; + + while (end != itr1) + { + if ('\\' == (*itr1)) + { + ++removal_count; + + if (end == ++itr1) + break; + else if ('\\' != (*itr1)) + { + switch (*itr1) + { + case 'n': (*itr1) = '\n'; break; + case 'r': (*itr1) = '\r'; break; + case 't': (*itr1) = '\t'; break; + } + + continue; + } + } + + if (itr1 != itr2) + { + (*itr2) = (*itr1); + } + + ++itr1; + ++itr2; + } + + s.resize(s.size() - removal_count); + } + } + + struct token + { + + enum token_type + { + e_none = 0, e_error = 1, e_err_symbol = 2, + e_err_number = 3, e_err_string = 4, e_err_sfunc = 5, + e_eof = 6, e_number = 7, e_symbol = 8, + e_string = 9, e_assign = 10, e_shr = 11, + e_shl = 12, e_lte = 13, e_ne = 14, + e_gte = 15, e_lt = '<', e_gt = '>', + e_eq = '=', e_rbracket = ')', e_lbracket = '(', + e_rsqrbracket = ']', e_lsqrbracket = '[', e_rcrlbracket = '}', + e_lcrlbracket = '{', e_comma = ',', e_add = '+', + e_sub = '-', e_div = '/', e_mul = '*', + e_mod = '%', e_pow = '^', e_colon = ':' + }; + + token() + : type(e_none), + value(""), + position(std::numeric_limits::max()) + {} + + void clear() + { + type = e_none; + value = ""; + position = std::numeric_limits::max(); + } + + template + inline token& set_operator(const token_type tt, const Iterator begin, const Iterator end, const Iterator base_begin = Iterator(0)) + { + type = tt; + value.assign(begin, end); + if (base_begin) + position = std::distance(base_begin, begin); + return *this; + } + + template + inline token& set_symbol(const Iterator begin, const Iterator end, const Iterator base_begin = Iterator(0)) + { + type = e_symbol; + value.assign(begin, end); + if (base_begin) + position = std::distance(base_begin, begin); + return *this; + } + + template + inline token& set_numeric(const Iterator begin, const Iterator end, const Iterator base_begin = Iterator(0)) + { + type = e_number; + value.assign(begin, end); + if (base_begin) + position = std::distance(base_begin, begin); + return *this; + } + + template + inline token& set_string(const Iterator begin, const Iterator end, const Iterator base_begin = Iterator(0)) + { + type = e_string; + value.assign(begin, end); + if (base_begin) + position = std::distance(base_begin, begin); + return *this; + } + + inline token& set_string(const std::string& s, const std::size_t p) + { + type = e_string; + value = s; + position = p; + return *this; + } + + template + inline token& set_error(const token_type et, const Iterator begin, const Iterator end, const Iterator base_begin = Iterator(0)) + { + if ( + (e_error == et) || + (e_err_symbol == et) || + (e_err_number == et) || + (e_err_string == et) + ) + { + type = e_error; + } + else + type = e_error; + + value.assign(begin, end); + + if (base_begin) + position = std::distance(base_begin, begin); + + return *this; + } + + static inline std::string to_str(token_type t) + { + switch (t) + { + case e_none: return "NONE"; + case e_error: return "ERROR"; + case e_err_symbol: return "ERROR_SYMBOL"; + case e_err_number: return "ERROR_NUMBER"; + case e_err_string: return "ERROR_STRING"; + case e_eof: return "EOF"; + case e_number: return "NUMBER"; + case e_symbol: return "SYMBOL"; + case e_string: return "STRING"; + case e_assign: return ":="; + case e_shr: return ">>"; + case e_shl: return "<<"; + case e_lte: return "<="; + case e_ne: return "!="; + case e_gte: return ">="; + case e_lt: return "<"; + case e_gt: return ">"; + case e_eq: return "="; + case e_rbracket: return ")"; + case e_lbracket: return "("; + case e_rsqrbracket: return "]"; + case e_lsqrbracket: return "["; + case e_rcrlbracket: return "}"; + case e_lcrlbracket: return "{"; + case e_comma: return ","; + case e_add: return "+"; + case e_sub: return "-"; + case e_div: return "/"; + case e_mul: return "*"; + case e_mod: return "%"; + case e_pow: return "^"; + case e_colon: return ":"; + default: return "UNKNOWN"; + } + } + + inline bool is_error() const + { + return ( + (e_error == type) || + (e_err_symbol == type) || + (e_err_number == type) || + (e_err_string == type) + ); + } + + token_type type; + std::string value; + std::size_t position; + }; + + class generator + { + public: + + typedef token token_t; + typedef std::deque token_list_t; + typedef std::deque::iterator token_list_itr_t; + + generator() + : base_itr_(0), + s_itr_(0), + s_end_(0) + { + clear(); + } + + inline void clear() + { + base_itr_ = 0; + s_itr_ = 0; + s_end_ = 0; + token_list_.clear(); + token_itr_ = token_list_.end(); + store_token_itr_ = token_list_.end(); + } + + inline bool process(const std::string& str) + { + base_itr_ = str.data(); + s_itr_ = str.data(); + s_end_ = str.data() + str.size(); + + eof_token_.set_operator(token_t::e_eof, s_end_, s_end_, base_itr_); + token_list_.clear(); + + while (!is_end(s_itr_)) + { + scan_token(); + + if (token_list_.empty()) + return true; + else if (token_list_.back().is_error()) + { + return false; + } + } + return true; + } + + inline bool empty() const + { + return token_list_.empty(); + } + + inline std::size_t size() const + { + return token_list_.size(); + } + + inline void begin() + { + token_itr_ = token_list_.begin(); + store_token_itr_ = token_list_.begin(); + } + + inline void store() + { + store_token_itr_ = token_itr_; + } + + inline void restore() + { + token_itr_ = store_token_itr_; + } + + inline token_t& next_token() + { + if (token_list_.end() != token_itr_) + { + return *token_itr_++; + } + else + return eof_token_; + } + + inline token_t& peek_next_token() + { + if (token_list_.end() != token_itr_) + { + return *token_itr_; + } + else + return eof_token_; + } + + inline token_t& operator[](const std::size_t& index) + { + if (index < token_list_.size()) + return token_list_[index]; + else + return eof_token_; + } + + inline token_t operator[](const std::size_t& index) const + { + if (index < token_list_.size()) + return token_list_[index]; + else + return eof_token_; + } + + inline bool finished() const + { + return (token_list_.end() == token_itr_); + } + + inline std::string remaining() const + { + if (finished()) + return ""; + else if (token_list_.begin() != token_itr_) + return std::string(base_itr_ + (token_itr_ - 1)->position, s_end_); + else + return std::string(base_itr_ + token_itr_->position, s_end_); + } + + private: + + inline bool is_end(const char* itr) + { + return (s_end_ == itr); + } + + inline void skip_whitespace() + { + while (!is_end(s_itr_) && details::is_whitespace(*s_itr_)) + { + ++s_itr_; + } + } + + inline void skip_comments() + { + //The following comment styles are supported: + // 1. // .... \n + // 2. # .... \n + // 3. /* .... */ + struct test + { + static inline bool comment_start(const char c0, const char c1, int& mode, int& incr) + { + mode = 0; + if ('#' == c0) { mode = 1; incr = 1; } + else if ('/' == c0) + { + if ('/' == c1) { mode = 1; incr = 2; } + else if ('*' == c1) { mode = 2; incr = 2; } + } + return (mode != 0); + } + + static inline bool comment_end(const char c0, const char c1, const int mode) + { + return ((1 == mode) && ('\n' == c0)) || + ((2 == mode) && ('*' == c0) && ('/' == c1)); + } + }; + + int mode = 0; + int increment = 0; + + if (is_end(s_itr_) || is_end((s_itr_ + 1))) + return; + else if (!test::comment_start(*s_itr_, *(s_itr_ + 1), mode, increment)) + return; + + s_itr_ += increment; + + while (!is_end(s_itr_) && !test::comment_end(*s_itr_, *(s_itr_ + 1), mode)) + { + ++s_itr_; + } + + if (!is_end(s_itr_)) + { + s_itr_ += mode; + skip_whitespace(); + skip_comments(); + } + } + + inline void scan_token() + { + skip_whitespace(); + + skip_comments(); + + if (is_end(s_itr_)) + { + return; + } + else if (details::is_operator_char(*s_itr_)) + { + scan_operator(); + return; + } + else if (details::is_letter(*s_itr_)) + { + scan_symbol(); + return; + } + else if (details::is_digit((*s_itr_)) || ('.' == (*s_itr_))) + { + scan_number(); + return; + } + else if ('\'' == (*s_itr_)) + { + scan_string(); + return; + } + else + { + token_t t; + t.set_error(token::e_error, s_itr_, s_itr_ + 2, base_itr_); + token_list_.push_back(t); + ++s_itr_; + } + } + + inline void scan_operator() + { + token_t t; + + if (!is_end(s_itr_ + 1)) + { + token_t::token_type ttype = token_t::e_none; + + char c0 = s_itr_[0]; + char c1 = s_itr_[1]; + + if ((c0 == '<') && (c1 == '=')) ttype = token_t::e_lte; + else if ((c0 == '>') && (c1 == '=')) ttype = token_t::e_gte; + else if ((c0 == '<') && (c1 == '>')) ttype = token_t::e_ne; + else if ((c0 == '!') && (c1 == '=')) ttype = token_t::e_ne; + else if ((c0 == '=') && (c1 == '=')) ttype = token_t::e_eq; + else if ((c0 == ':') && (c1 == '=')) ttype = token_t::e_assign; + else if ((c0 == '<') && (c1 == '<')) ttype = token_t::e_shl; + else if ((c0 == '>') && (c1 == '>')) ttype = token_t::e_shr; + + if (token_t::e_none != ttype) + { + t.set_operator(ttype, s_itr_, s_itr_ + 2, base_itr_); + token_list_.push_back(t); + s_itr_ += 2; + return; + } + } + + if ('<' == *s_itr_) + t.set_operator(token_t::e_lt, s_itr_, s_itr_ + 1, base_itr_); + else if ('>' == *s_itr_) + t.set_operator(token_t::e_gt, s_itr_, s_itr_ + 1, base_itr_); + else if (';' == *s_itr_) + t.set_operator(token_t::e_eof, s_itr_, s_itr_ + 1, base_itr_); + else if ('&' == *s_itr_) + t.set_symbol(s_itr_, s_itr_ + 1, base_itr_); + else if ('|' == *s_itr_) + t.set_symbol(s_itr_, s_itr_ + 1, base_itr_); + else + t.set_operator(token_t::token_type(*s_itr_), s_itr_, s_itr_ + 1, base_itr_); + + token_list_.push_back(t); + + ++s_itr_; + } + + inline void scan_symbol() + { + const char* begin = s_itr_; + while ( + (!is_end(s_itr_)) && + (details::is_letter_or_digit(*s_itr_) || ((*s_itr_) == '_')) + ) + { + ++s_itr_; + } + token_t t; + t.set_symbol(begin, s_itr_, base_itr_); + token_list_.push_back(t); + } + + inline void scan_number() + { + /* + Attempt to match a valid numeric value in one of the following formats: + 01. 123456 + 02. 123.456 + 03. 123.456e3 + 04. 123.456E3 + 05. 123.456e+3 + 06. 123.456E+3 + 07. 123.456e-3 + 08. 123.456E-3 + 09. .1234 + 10. .1234e3 + 11. .1234E+3 + 12. .1234e+3 + 13. .1234E-3 + 14. .1234e-3 + */ + const char* begin = s_itr_; + bool dot_found = false; + bool e_found = false; + bool post_e_sign_found = false; + bool post_e_digit_found = false; + token_t t; + + while (!is_end(s_itr_)) + { + if ('.' == (*s_itr_)) + { + if (dot_found) + { + t.set_error(token::e_err_number, begin, s_itr_, base_itr_); + token_list_.push_back(t); + + return; + } + + dot_found = true; + ++s_itr_; + + continue; + } + else if (details::imatch('e', (*s_itr_))) + { + const char& c = *(s_itr_ + 1); + + if (is_end(s_itr_ + 1)) + { + t.set_error(token::e_err_number, begin, s_itr_, base_itr_); + token_list_.push_back(t); + + return; + } + else if ( + ('+' != c) && + ('-' != c) && + !details::is_digit(c) + ) + { + t.set_error(token::e_err_number, begin, s_itr_, base_itr_); + token_list_.push_back(t); + + return; + } + + e_found = true; + ++s_itr_; + + continue; + } + else if (e_found && details::is_sign(*s_itr_) && !post_e_digit_found) + { + if (post_e_sign_found) + { + t.set_error(token::e_err_number, begin, s_itr_, base_itr_); + token_list_.push_back(t); + + return; + } + + post_e_sign_found = true; + ++s_itr_; + + continue; + } + else if (e_found && details::is_digit(*s_itr_)) + { + post_e_digit_found = true; + ++s_itr_; + + continue; + } + else if (('.' != (*s_itr_)) && !details::is_digit(*s_itr_)) + break; + else + ++s_itr_; + } + + t.set_numeric(begin, s_itr_, base_itr_); + + token_list_.push_back(t); + + return; + } + + inline void scan_string() + { + const char* begin = s_itr_ + 1; + + token_t t; + + if (std::distance(s_itr_, s_end_) < 2) + { + t.set_error(token::e_err_string, s_itr_, s_end_, base_itr_); + token_list_.push_back(t); + + return; + } + + ++s_itr_; + + bool escaped_found = false; + bool escaped = false; + + while (!is_end(s_itr_)) + { + if (!escaped && ('\\' == *s_itr_)) + { + escaped_found = true; + escaped = true; + ++s_itr_; + + continue; + } + else if (!escaped) + { + if ('\'' == *s_itr_) + break; + } + else if (escaped) + escaped = false; + + ++s_itr_; + } + + if (is_end(s_itr_)) + { + t.set_error(token::e_err_string, begin, s_itr_, base_itr_); + token_list_.push_back(t); + + return; + } + + if (!escaped_found) + t.set_string(begin, s_itr_, base_itr_); + else + { + std::string parsed_string(begin, s_itr_); + details::cleanup_escapes(parsed_string); + t.set_string(parsed_string, std::distance(base_itr_, begin)); + } + + token_list_.push_back(t); + ++s_itr_; + + return; + } + + private: + + token_list_t token_list_; + token_list_itr_t token_itr_; + token_list_itr_t store_token_itr_; + token_t eof_token_; + const char* base_itr_; + const char* s_itr_; + const char* s_end_; + + friend class token_scanner; + friend class token_modifier; + friend class token_inserter; + friend class token_joiner; + }; + + class helper_interface + { + public: + + virtual void init() { } + virtual void reset() { } + virtual bool result() { return true; } + virtual std::size_t process(generator&) { return 0; } + virtual ~helper_interface() { } + }; + + class token_scanner : public helper_interface + { + public: + + virtual ~token_scanner() + {} + + explicit token_scanner(const std::size_t& stride) + : stride_(stride) + { + if (stride > 4) + { + throw std::invalid_argument("token_scanner() - Invalid stride value"); + } + } + + inline std::size_t process(generator& g) + { + if (!g.token_list_.empty()) + { + for (std::size_t i = 0; i < (g.token_list_.size() - stride_ + 1); ++i) + { + token t; + switch (stride_) + { + case 1: + { + const token& t0 = g.token_list_[i]; + + if (!operator()(t0)) return i; + } + break; + + case 2: + { + const token& t0 = g.token_list_[i]; + const token& t1 = g.token_list_[i + 1]; + + if (!operator()(t0, t1)) return i; + } + break; + + case 3: + { + const token& t0 = g.token_list_[i]; + const token& t1 = g.token_list_[i + 1]; + const token& t2 = g.token_list_[i + 2]; + + if (!operator()(t0, t1, t2)) return i; + } + break; + + case 4: + { + const token& t0 = g.token_list_[i]; + const token& t1 = g.token_list_[i + 1]; + const token& t2 = g.token_list_[i + 2]; + const token& t3 = g.token_list_[i + 3]; + + if (!operator()(t0, t1, t2, t3)) return i; + } + break; + } + } + } + + return (g.token_list_.size() - stride_ + 1); + } + + virtual bool operator()(const token&) + { + return false; + } + + virtual bool operator()(const token&, const token&) + { + return false; + } + + virtual bool operator()(const token&, const token&, const token&) + { + return false; + } + + virtual bool operator()(const token&, const token&, const token&, const token&) + { + return false; + } + + private: + + std::size_t stride_; + }; + + class token_modifier : public helper_interface + { + public: + + inline std::size_t process(generator& g) + { + std::size_t changes = 0; + + for (std::size_t i = 0; i < g.token_list_.size(); ++i) + { + if (modify(g.token_list_[i])) changes++; + } + + return changes; + } + + virtual bool modify(token& t) = 0; + }; + + class token_inserter : public helper_interface + { + public: + + explicit token_inserter(const std::size_t& stride) + : stride_(stride) + { + if (stride > 5) + { + throw std::invalid_argument("token_inserter() - Invalid stride value"); + } + } + + inline std::size_t process(generator& g) + { + if (g.token_list_.empty()) + return 0; + + std::size_t changes = 0; + + for (std::size_t i = 0; i < (g.token_list_.size() - stride_ + 1); ++i) + { + token t; + int insert_index = -1; + + switch (stride_) + { + case 1: insert_index = insert(g.token_list_[i], t); + break; + + case 2: insert_index = insert(g.token_list_[i], g.token_list_[i + 1], t); + break; + + case 3: insert_index = insert(g.token_list_[i], g.token_list_[i + 1], g.token_list_[i + 2], t); + break; + + case 4: insert_index = insert(g.token_list_[i], g.token_list_[i + 1], g.token_list_[i + 2], g.token_list_[i + 3], t); + break; + + case 5: insert_index = insert(g.token_list_[i], g.token_list_[i + 1], g.token_list_[i + 2], g.token_list_[i + 3], g.token_list_[i + 4], t); + break; + } + + if ((insert_index >= 0) && (insert_index <= (static_cast(stride_) + 1))) + { + g.token_list_.insert(g.token_list_.begin() + (i + insert_index), t); + changes++; + } + } + + return changes; + } + + virtual inline int insert(const token&, token&) + { + return -1; + } + + virtual inline int insert(const token&, const token&, token&) + { + return -1; + } + + virtual inline int insert(const token&, const token&, const token&, token&) + { + return -1; + } + + virtual inline int insert(const token&, const token&, const token&, const token&, token&) + { + return -1; + } + + virtual inline int insert(const token&, const token&, const token&, const token&, const token&, token&) + { + return -1; + } + + private: + + std::size_t stride_; + }; + + class token_joiner : public helper_interface + { + public: + + inline std::size_t process(generator& g) + { + if (g.token_list_.empty()) + return 0; + + std::size_t changes = 0; + + for (std::size_t i = 0; i < g.token_list_.size() - 1; ++i) + { + token t; + + if (join(g.token_list_[i], g.token_list_[i + 1], t)) + { + g.token_list_[i] = t; + g.token_list_.erase(g.token_list_.begin() + (i + 1)); + + ++changes; + } + } + + return changes; + } + + virtual bool join(const token&, const token&, token&) = 0; + }; + + namespace helper + { + + inline void dump(lexertk::generator& generator) + { + for (std::size_t i = 0; i < generator.size(); ++i) + { + lexertk::token t = generator[i]; + printf("Token[%02d] @ %03d %6s --> '%s'\n", + static_cast(i), + static_cast(t.position), + t.to_str(t.type).c_str(), + t.value.c_str()); + } + } + + class commutative_inserter : public token_inserter + { + public: + + commutative_inserter() + : lexertk::token_inserter(2) + {} + + inline void ignore_symbol(const std::string& symbol) + { + ignore_set_.insert(symbol); + } + + inline int insert(const lexertk::token& t0, const lexertk::token& t1, lexertk::token& new_token) + { + new_token.type = lexertk::token::e_mul; + new_token.value = "*"; + new_token.position = t1.position; + bool match = false; + + if (t0.type == lexertk::token::e_symbol) + { + if (ignore_set_.end() != ignore_set_.find(t0.value)) + { + return -1; + } + else if (!t0.value.empty() && ('$' == t0.value[0])) + { + return -1; + } + } + + if (t1.type == lexertk::token::e_symbol) + { + if (ignore_set_.end() != ignore_set_.find(t1.value)) + { + return -1; + } + } + + if ((t0.type == lexertk::token::e_number) && (t1.type == lexertk::token::e_symbol)) match = true; + else if ((t0.type == lexertk::token::e_number) && (t1.type == lexertk::token::e_lbracket)) match = true; + else if ((t0.type == lexertk::token::e_number) && (t1.type == lexertk::token::e_lcrlbracket)) match = true; + else if ((t0.type == lexertk::token::e_number) && (t1.type == lexertk::token::e_lsqrbracket)) match = true; + else if ((t0.type == lexertk::token::e_symbol) && (t1.type == lexertk::token::e_number)) match = true; + else if ((t0.type == lexertk::token::e_rbracket) && (t1.type == lexertk::token::e_number)) match = true; + else if ((t0.type == lexertk::token::e_rcrlbracket) && (t1.type == lexertk::token::e_number)) match = true; + else if ((t0.type == lexertk::token::e_rsqrbracket) && (t1.type == lexertk::token::e_number)) match = true; + else if ((t0.type == lexertk::token::e_rbracket) && (t1.type == lexertk::token::e_symbol)) match = true; + else if ((t0.type == lexertk::token::e_rcrlbracket) && (t1.type == lexertk::token::e_symbol)) match = true; + else if ((t0.type == lexertk::token::e_rsqrbracket) && (t1.type == lexertk::token::e_symbol)) match = true; + + return (match) ? 1 : -1; + } + + private: + + std::set ignore_set_; + }; + + class operator_joiner : public token_joiner + { + public: + + inline bool join(const lexertk::token& t0, const lexertk::token& t1, lexertk::token& t) + { + //': =' --> ':=' + if ((t0.type == lexertk::token::e_colon) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_assign; + t.value = ":="; + t.position = t0.position; + + return true; + } + //'> =' --> '>=' + else if ((t0.type == lexertk::token::e_gt) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_gte; + t.value = ">="; + t.position = t0.position; + + return true; + } + //'< =' --> '<=' + else if ((t0.type == lexertk::token::e_lt) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_lte; + t.value = "<="; + t.position = t0.position; + + return true; + } + //'= =' --> '==' + else if ((t0.type == lexertk::token::e_eq) && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_eq; + t.value = "=="; + t.position = t0.position; + + return true; + } + //'! =' --> '!=' + else if ((static_cast(t0.type) == '!') && (t1.type == lexertk::token::e_eq)) + { + t.type = lexertk::token::e_ne; + t.value = "!="; + t.position = t0.position; + + return true; + } + //'< >' --> '<>' + else if ((t0.type == lexertk::token::e_lt) && (t1.type == lexertk::token::e_gt)) + { + t.type = lexertk::token::e_ne; + t.value = "<>"; + t.position = t0.position; + + return true; + } + else + return false; + } + }; + + class bracket_checker : public token_scanner + { + public: + + bracket_checker() + : token_scanner(1), + state_(true) + {} + + bool result() + { + return state_ && stack_.empty(); + } + + lexertk::token error_token() + { + return error_token_; + } + + void reset() + { + //why? because msvc doesn't support swap properly. + stack_ = std::stack(); + state_ = true; + error_token_.clear(); + } + + bool operator()(const lexertk::token& t) + { + if ( + !t.value.empty() && + (lexertk::token::e_string != t.type) && + (lexertk::token::e_symbol != t.type) && + details::is_bracket(t.value[0]) + ) + { + char c = t.value[0]; + + if (t.type == lexertk::token::e_lbracket) stack_.push(')'); + else if (t.type == lexertk::token::e_lcrlbracket) stack_.push('}'); + else if (t.type == lexertk::token::e_lsqrbracket) stack_.push(']'); + else if (details::is_right_bracket(c)) + { + if (stack_.empty()) + { + state_ = false; + error_token_ = t; + + return false; + } + else if (c != stack_.top()) + { + state_ = false; + error_token_ = t; + + return false; + } + else + stack_.pop(); + } + } + + return true; + } + + private: + + bool state_; + std::stack stack_; + lexertk::token error_token_; + }; + + class symbol_replacer : public token_modifier + { + private: + + typedef std::map, details::ilesscompare> replace_map_t; + + public: + + bool remove(const std::string& target_symbol) + { + replace_map_t::iterator itr = replace_map_.find(target_symbol); + + if (replace_map_.end() == itr) + return false; + + replace_map_.erase(itr); + + return true; + } + + bool add_replace(const std::string& target_symbol, + const std::string& replace_symbol, + const lexertk::token::token_type token_type = lexertk::token::e_symbol) + { + replace_map_t::iterator itr = replace_map_.find(target_symbol); + + if (replace_map_.end() != itr) + { + return false; + } + + replace_map_[target_symbol] = std::make_pair(replace_symbol, token_type); + + return true; + } + + void clear() + { + replace_map_.clear(); + } + + private: + + bool modify(lexertk::token& t) + { + if (lexertk::token::e_symbol == t.type) + { + if (replace_map_.empty()) + return false; + + replace_map_t::iterator itr = replace_map_.find(t.value); + + if (replace_map_.end() != itr) + { + t.value = itr->second.first; + t.type = itr->second.second; + + return true; + } + } + + return false; + } + + replace_map_t replace_map_; + }; + + class sequence_validator : public token_scanner + { + private: + + typedef std::pair token_pair_t; + typedef std::set set_t; + + public: + + sequence_validator() + : lexertk::token_scanner(2) + { + add_invalid(lexertk::token::e_number, lexertk::token::e_number); + add_invalid(lexertk::token::e_string, lexertk::token::e_string); + add_invalid(lexertk::token::e_number, lexertk::token::e_string); + add_invalid(lexertk::token::e_string, lexertk::token::e_number); + add_invalid(lexertk::token::e_string, lexertk::token::e_colon); + add_invalid(lexertk::token::e_colon, lexertk::token::e_string); + add_invalid_set1(lexertk::token::e_assign); + add_invalid_set1(lexertk::token::e_shr); + add_invalid_set1(lexertk::token::e_shl); + add_invalid_set1(lexertk::token::e_lte); + add_invalid_set1(lexertk::token::e_ne); + add_invalid_set1(lexertk::token::e_gte); + add_invalid_set1(lexertk::token::e_lt); + add_invalid_set1(lexertk::token::e_gt); + add_invalid_set1(lexertk::token::e_eq); + add_invalid_set1(lexertk::token::e_comma); + add_invalid_set1(lexertk::token::e_add); + add_invalid_set1(lexertk::token::e_sub); + add_invalid_set1(lexertk::token::e_div); + add_invalid_set1(lexertk::token::e_mul); + add_invalid_set1(lexertk::token::e_mod); + add_invalid_set1(lexertk::token::e_pow); + add_invalid_set1(lexertk::token::e_colon); + } + + bool result() + { + return error_list_.empty(); + } + + bool operator()(const lexertk::token& t0, const lexertk::token& t1) + { + set_t::value_type p = std::make_pair(t0.type, t1.type); + + if (invalid_bracket_check(t0.type, t1.type)) + { + error_list_.push_back(std::make_pair(t0, t1)); + } + else if (invalid_comb_.find(p) != invalid_comb_.end()) + error_list_.push_back(std::make_pair(t0, t1)); + + return true; + } + + std::size_t error_count() + { + return error_list_.size(); + } + + std::pair error(const std::size_t index) + { + if (index < error_list_.size()) + { + return error_list_[index]; + } + else + { + static const lexertk::token error_token; + return std::make_pair(error_token, error_token); + } + } + + void clear_errors() + { + error_list_.clear(); + } + + private: + + void add_invalid(lexertk::token::token_type base, lexertk::token::token_type t) + { + invalid_comb_.insert(std::make_pair(base, t)); + } + + void add_invalid_set1(lexertk::token::token_type t) + { + add_invalid(t, lexertk::token::e_assign); + add_invalid(t, lexertk::token::e_shr); + add_invalid(t, lexertk::token::e_shl); + add_invalid(t, lexertk::token::e_lte); + add_invalid(t, lexertk::token::e_ne); + add_invalid(t, lexertk::token::e_gte); + add_invalid(t, lexertk::token::e_lt); + add_invalid(t, lexertk::token::e_gt); + add_invalid(t, lexertk::token::e_eq); + add_invalid(t, lexertk::token::e_comma); + add_invalid(t, lexertk::token::e_div); + add_invalid(t, lexertk::token::e_mul); + add_invalid(t, lexertk::token::e_mod); + add_invalid(t, lexertk::token::e_pow); + add_invalid(t, lexertk::token::e_colon); + } + + bool invalid_bracket_check(lexertk::token::token_type base, lexertk::token::token_type t) + { + if (details::is_right_bracket(static_cast(base))) + { + switch (t) + { + case lexertk::token::e_string: return true; + case lexertk::token::e_assign: return true; + default: return false; + } + } + else if (details::is_left_bracket(static_cast(base))) + { + if (details::is_right_bracket(static_cast(t))) + return false; + else if (details::is_left_bracket(static_cast(t))) + return false; + else + { + switch (t) + { + case lexertk::token::e_number: return false; + case lexertk::token::e_symbol: return false; + case lexertk::token::e_string: return false; + case lexertk::token::e_add: return false; + case lexertk::token::e_sub: return false; + case lexertk::token::e_colon: return false; + default: return true; + } + } + } + else if (details::is_right_bracket(static_cast(t))) + { + switch (base) + { + case lexertk::token::e_number: return false; + case lexertk::token::e_symbol: return false; + case lexertk::token::e_string: return false; + case lexertk::token::e_eof: return false; + case lexertk::token::e_colon: return false; + default: return true; + } + } + else if (details::is_left_bracket(static_cast(t))) + { + switch (base) + { + case lexertk::token::e_rbracket: return true; + case lexertk::token::e_rsqrbracket: return true; + case lexertk::token::e_rcrlbracket: return true; + default: return false; + } + } + + return false; + } + + set_t invalid_comb_; + std::deque > error_list_; + + }; + + struct helper_assembly + { + inline bool register_scanner(lexertk::token_scanner* scanner) + { + if (token_scanner_list.end() != std::find(token_scanner_list.begin(), + token_scanner_list.end(), + scanner)) + { + return false; + } + + token_scanner_list.push_back(scanner); + + return true; + } + + inline bool register_modifier(lexertk::token_modifier* modifier) + { + if (token_modifier_list.end() != std::find(token_modifier_list.begin(), + token_modifier_list.end(), + modifier)) + { + return false; + } + + token_modifier_list.push_back(modifier); + + return true; + } + + inline bool register_joiner(lexertk::token_joiner* joiner) + { + if (token_joiner_list.end() != std::find(token_joiner_list.begin(), + token_joiner_list.end(), + joiner)) + { + return false; + } + + token_joiner_list.push_back(joiner); + + return true; + } + + inline bool register_inserter(lexertk::token_inserter* inserter) + { + if (token_inserter_list.end() != std::find(token_inserter_list.begin(), + token_inserter_list.end(), + inserter)) + { + return false; + } + + token_inserter_list.push_back(inserter); + + return true; + } + + inline bool run_modifiers(lexertk::generator& g) + { + error_token_modifier = reinterpret_cast(0); + + for (std::size_t i = 0; i < token_modifier_list.size(); ++i) + { + lexertk::token_modifier& modifier = (*token_modifier_list[i]); + + modifier.reset(); + modifier.process(g); + + if (!modifier.result()) + { + error_token_modifier = token_modifier_list[i]; + + return false; + } + } + + return true; + } + + inline bool run_joiners(lexertk::generator& g) + { + error_token_joiner = reinterpret_cast(0); + + for (std::size_t i = 0; i < token_joiner_list.size(); ++i) + { + lexertk::token_joiner& joiner = (*token_joiner_list[i]); + + joiner.reset(); + joiner.process(g); + + if (!joiner.result()) + { + error_token_joiner = token_joiner_list[i]; + + return false; + } + } + + return true; + } + + inline bool run_inserters(lexertk::generator& g) + { + error_token_inserter = reinterpret_cast(0); + + for (std::size_t i = 0; i < token_inserter_list.size(); ++i) + { + lexertk::token_inserter& inserter = (*token_inserter_list[i]); + + inserter.reset(); + inserter.process(g); + + if (!inserter.result()) + { + error_token_inserter = token_inserter_list[i]; + + return false; + } + } + + return true; + } + + inline bool run_scanners(lexertk::generator& g) + { + error_token_scanner = reinterpret_cast(0); + + for (std::size_t i = 0; i < token_scanner_list.size(); ++i) + { + lexertk::token_scanner& scanner = (*token_scanner_list[i]); + + scanner.reset(); + scanner.process(g); + + if (!scanner.result()) + { + error_token_scanner = token_scanner_list[i]; + + return false; + } + } + + return true; + } + + std::deque token_scanner_list; + std::deque token_modifier_list; + std::deque token_joiner_list; + std::deque token_inserter_list; + + lexertk::token_scanner* error_token_scanner; + lexertk::token_modifier* error_token_modifier; + lexertk::token_joiner* error_token_joiner; + lexertk::token_inserter* error_token_inserter; + }; + } + + class parser_helper + { + public: + + typedef token token_t; + typedef generator generator_t; + + inline bool init(const std::string& str) + { + if (!lexer_.process(str)) + { + return false; + } + + lexer_.begin(); + + next_token(); + + return true; + } + + inline generator_t& lexer() + { + return lexer_; + } + + inline const generator_t& lexer() const + { + return lexer_; + } + + inline void next_token() + { + current_token_ = lexer_.next_token(); + } + + inline const token_t& current_token() const + { + return current_token_; + } + + enum token_advance_mode + { + e_hold = 0, + e_advance = 1 + }; + + inline void advance_token(const token_advance_mode mode) + { + if (e_advance == mode) + { + next_token(); + } + } + + inline bool token_is(const token_t::token_type& ttype, const token_advance_mode mode = e_advance) + { + if (current_token().type != ttype) + { + return false; + } + + advance_token(mode); + + return true; + } + + inline bool token_is(const token_t::token_type& ttype, + const std::string& value, + const token_advance_mode mode = e_advance) + { + if ( + (current_token().type != ttype) || + !details::imatch(value, current_token().value) + ) + { + return false; + } + + advance_token(mode); + + return true; + } + + inline bool token_is_then_assign(const token_t::token_type& ttype, + std::string& token, + const token_advance_mode mode = e_advance) + { + if (current_token_.type != ttype) + { + return false; + } + + token = current_token_.value; + + advance_token(mode); + + return true; + } + + template class Container> + inline bool token_is_then_assign(const token_t::token_type& ttype, + Container& token_list, + const token_advance_mode mode = e_advance) + { + if (current_token_.type != ttype) + { + return false; + } + + token_list.push_back(current_token_.value); + + advance_token(mode); + + return true; + } + + inline bool peek_token_is(const token_t::token_type& ttype) + { + return (lexer_.peek_next_token().type == ttype); + } + + inline bool peek_token_is(const std::string& s) + { + return (details::imatch(lexer_.peek_next_token().value, s)); + } + + private: + + generator_t lexer_; + token_t current_token_; + }; +} + +#endif \ No newline at end of file diff --git a/bin/Debug-x64/QuicMaf.exe b/bin/Debug-x64/QuicMaf.exe index 69f577b..5a07a84 100644 Binary files a/bin/Debug-x64/QuicMaf.exe and b/bin/Debug-x64/QuicMaf.exe differ diff --git a/bin/Debug-x64/QuicMaf.ilk b/bin/Debug-x64/QuicMaf.ilk index 64296b1..8832296 100644 Binary files a/bin/Debug-x64/QuicMaf.ilk and b/bin/Debug-x64/QuicMaf.ilk differ diff --git a/bin/Debug-x64/QuicMaf.pdb b/bin/Debug-x64/QuicMaf.pdb index be83069..a90f8be 100644 Binary files a/bin/Debug-x64/QuicMaf.pdb and b/bin/Debug-x64/QuicMaf.pdb differ