Tokenizer Finished

2019-02-22 10:29:49 +03:00 · 2019-02-22 10:29:49 +03:00 · 99afb00916
parent 8e1228bc35
commit 99afb00916
15 changed files with 2232 additions and 185 deletions
--- a/QuicMaf/EquationAdv.cpp
+++ b/QuicMaf/EquationAdv.cpp
@ -1,12 +0,0 @@
-#include "EquationAdv.h"
-
-
-
-EquationAdv::EquationAdv()
-{
-}
-
-
-EquationAdv::~EquationAdv()
-{
-}
--- a/QuicMaf/EquationAdv.h
+++ b/QuicMaf/EquationAdv.h
@ -1,8 +0,0 @@
-#pragma once
-class EquationAdv
-{
-public:
-	EquationAdv();
-	~EquationAdv();
-};
-
--- a/QuicMaf/QuicMaf.vcxproj
+++ b/QuicMaf/QuicMaf.vcxproj
@ -119,19 +119,19 @@
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="app.cpp" />
-    <ClCompile Include="EquationAdv.cpp" />
  </ItemGroup>
  <ItemGroup>
-    <ClInclude Include="EquationAdv.h" />
    <ClInclude Include="maths\defines.h" />
-    <ClInclude Include="maths\Equation.h" />
    <ClInclude Include="maths\terms\Brackets.h" />
    <ClInclude Include="maths\terms\Constant.h" />
    <ClInclude Include="maths\terms\Equal.h" />
    <ClInclude Include="maths\terms\Operator.h" />
+    <ClInclude Include="maths\terms\Paranthesis.h" />
    <ClInclude Include="maths\terms\Term.h" />
    <ClInclude Include="maths\terms\term_parser.h" />
    <ClInclude Include="maths\terms\Variable.h" />
+    <ClInclude Include="maths\tokenizer.h" />
+    <ClInclude Include="vendor\lexertk.hpp" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
--- a/QuicMaf/QuicMaf.vcxproj.filters
+++ b/QuicMaf/QuicMaf.vcxproj.filters
@ -18,39 +18,39 @@
    <ClCompile Include="app.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="EquationAdv.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
  </ItemGroup>
  <ItemGroup>
-    <ClInclude Include="maths\defines.h">
+    <ClInclude Include="maths\tokenizer.h">
      <Filter>Header Files</Filter>
    </ClInclude>
-    <ClInclude Include="maths\terms\Term.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="maths\terms\Constant.h">
+    <ClInclude Include="vendor\lexertk.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="maths\terms\Variable.h">
      <Filter>Header Files</Filter>
    </ClInclude>
+    <ClInclude Include="maths\terms\term_parser.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="maths\terms\Term.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
    <ClInclude Include="maths\terms\Operator.h">
      <Filter>Header Files</Filter>
    </ClInclude>
-    <ClInclude Include="maths\Equation.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="maths\terms\Brackets.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
    <ClInclude Include="maths\terms\Equal.h">
      <Filter>Header Files</Filter>
    </ClInclude>
-    <ClInclude Include="maths\terms\term_parser.h">
+    <ClInclude Include="maths\terms\Constant.h">
      <Filter>Header Files</Filter>
    </ClInclude>
-    <ClInclude Include="EquationAdv.h">
+    <ClInclude Include="maths\terms\Brackets.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="maths\defines.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="maths\terms\Paranthesis.h">
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
--- a/QuicMaf/app.cpp
+++ b/QuicMaf/app.cpp
@ -1,8 +1,68 @@
+#pragma once
+#define DEBUG_MODE
+#include <iostream>
+#include <string>
+
+#include "vendor/lexertk.hpp"
+
+#include "maths/tokenizer.h"
+
+#define MAIN_APP
+//#define APP_TEST
+
+
+#ifdef MAIN_APP
+
+using namespace std;

-#include "maths/Equation.h"

 int main() {
-	auto terms = split_terms("(4 + 2(2)) + 123 = 131x");

-	return true; 
-}
+	while (true) {
+		cout << "Enter an equation: ";
+		string input;
+		cin >> input;
+
+		lexertk::generator generator;
+
+		if (!generator.process(input)) {
+			cout << "Failed to lex: " << input << endl;
+			system("PAUSE");
+			system("CLS");
+			return true;
+		}
+
+		//lexertk::helper::commutative_inserter ci;
+		//ci.process(generator);
+
+		lexertk::helper::bracket_checker bc;
+		bc.process(generator);
+
+		if (!bc.result()) {
+			cout << "Failed Bracket Check!" << endl;
+			system("PAUSE");
+			system("CLS");
+			return 1;
+		}
+		
+#ifdef DEBUG_MODE
+		lexertk::helper::dump(generator);
+#endif // DEBUG_MODE
+
+
+		auto result = tokenize(generator);
+
+		system("PAUSE");
+		system("CLS");
+	}
+
+
+	return true;
+}
+#endif // MAIN_APP
+
+#ifdef APP_TEST
+
+
+
+#endif
--- a/QuicMaf/diagrams/core_parser.ddd
+++ b/QuicMaf/diagrams/core_parser.ddd
--- a/QuicMaf/maths/Equation.h
+++ b/QuicMaf/maths/Equation.h
@ -1,128 +0,0 @@
-#ifndef EQUATION_H
-#define EQUATION_H
-#pragma once
-
-using namespace std;
-#include "terms/Term.h"
-#include "terms/Brackets.h"
-#include "terms/Constant.h"
-#include "terms/Operator.h"
-#include "terms/Variable.h"
-#include "terms/Equal.h"
-
-#include "terms/term_parser.h"
-
-class Equation {
-public:
-	vector<Term*> lwing;
-	vector<Term*> rwing;
-
-	string Parse(std::string equation) {
-
-		/* ALL OF THE COMMENTS GOT FROM THE NOTES.PARSING.ALGORITHM */
-
-		// - split the terms in the term of spaces
-		auto semi_terms = split(equation, " ");
-
-		bool lwing_turn = true;
-		// - iterate over the terms
-		for (int i = 0; i < semi_terms.size(); i++) {
-			// , call the iterator mTerm:
-			auto term = semi_terms[i];
-			// - if there exists an opening bracket in the mTerm
-			if (isContain(term, '(')) {
-				// find the closing.
-				vector<string> terms_with_cbrack;
-				for (; i < semi_terms.size(); i++) {
-					if (isContain(semi_terms[i], ')')) {
-						terms_with_cbrack.push_back(semi_terms[i]);
-						break;
-					}
-					terms_with_cbrack.push_back(semi_terms[i]);
-				}
-				auto bracket = parseBracket(term, terms_with_cbrack);
-				(lwing_turn) ? lwing.push_back(bracket) : rwing.push_back(bracket);
-				continue;
-			}
-			auto _term = convertNormalToTerm(term);
-			if (_term->GetType() == TermTypes::Equ) {
-				lwing_turn = !lwing_turn;
-				continue;
-			}
-
-			(lwing_turn) ? lwing.push_back(_term) : rwing.push_back(_term);
-		}
-
-		return equation;
-	}
-public:
-	Bracket* parseBracket(string str1, vector<string> terms_with_cbrack) {
-		Bracket *bracket = nullptr;
-		bracket = new Bracket();
-
-		auto splitted = split(str1, "(");
-		string coefficient = splitted[0];
-
-		// convert coefficient to a term
-		if (coefficient != "")
-		bracket->setConstant((Term*)convertNormalToTerm(coefficient));
-
-		if (splitted.size() > 1) {
-			for (int i = 1; i < splitted.size(); i++) {
-				auto term = (Term*)convertNormalToTerm(splitted[i]);
-				bracket->mTerms.push_back(term);
-			}
-		}
-
-		// terms:
-		for (int i = 1; i < terms_with_cbrack.size(); i++) {
-			auto term = terms_with_cbrack[i];
-			if (isContain(term, ')')) {
-				term.erase(indexofChar(term, ')'));
-				bracket->mTerms.push_back(convertNormalToTerm(term));
-				break;
-			}
-			bracket->mTerms.push_back(convertNormalToTerm(term));
-		}
-
-		return bracket;
-	}
-
-	Term* convertNormalToTerm(string str) {
-		if (isContain(str, '^')) {
-			auto coeffic_terms = split(str, "^");
-			if (isContainAlph(str)) {
-				auto terms = split(str, "^");
-				Variable *var = nullptr;
-				var = new Variable(atof(&terms[0][0]), terms[0][indexofAlph(terms[0])], atof(&terms[1][0]));
-				return var;
-			}
-			else if (is_all_digits(coeffic_terms[0]) && is_all_digits(coeffic_terms[1])) {
-				Constant *Const = nullptr;
-				Const = new Constant(atof(&coeffic_terms[0][0]), atof(&coeffic_terms[1][0]));
-				return Const;
-			}
-		}
-		else if (is_all_digits(str)) {
-			Constant *Const = nullptr;
-			Const = new Constant(atof(&str[0]));
-			return Const;
-		}
-		else if (isContainAlph(str)) {
-			Variable *var = nullptr;
-			var = new Variable(atof(&str[0]), str[indexofAlph(str)]);
-			return var;
-		}
-		else if (isArithmitic(str[0])) {
-			Operator *op = nullptr;
-			op = new Operator(str[0]);
-			return op;
-		}
-		else if (isEqualChar(str[0])) {
-			Equal* equ = nullptr;
-			equ = new Equal();
-			return equ;
-		}
-	}
-};
-#endif // !EQUATION_H
--- a/QuicMaf/maths/defines.h
+++ b/QuicMaf/maths/defines.h
@ -2,13 +2,18 @@
 #define DEFINES_H

 #include <vector>
+#include <deque>
+#include <stack>
+
 #include <string>
 #include <algorithm>
 #include <iostream>

+using namespace std;
+
 #define DEF_C -999
 #define DEF_N 0.0
-typedef double NValue;
+typedef long double NValue;
 typedef char CValue;

 enum TermTypes {
@ -22,7 +27,7 @@ enum TermTypes {


 // for string delimiter
-vector<string> split(string s, string delimiter) {
+static vector<string> split(string s, string delimiter) {
 	size_t pos_start = 0, pos_end, delim_len = delimiter.length();
 	string token;
 	vector<string> res;
@ -38,7 +43,7 @@ vector<string> split(string s, string delimiter) {
 }


-void removeSubstrs(string& s, string& p) {
+static void removeSubstrs(string& s, string& p) {
 	string::size_type n = p.length();
 	for (string::size_type i = s.find(p);
 		i != string::npos;
@ -46,8 +51,24 @@ void removeSubstrs(string& s, string& p) {
 		s.erase(i, n);
 }

+int getPrecedence(char c) {
+	//if (!isArithmitic(c)) return -1; // invalid

-bool isArithmitic(char c) {
+	if (c == '*') return 1;
+	if (c == '/') return 1;
+	if (c == '+') return 0;
+	if (c == '-') return 0;
+	return -1;
+}
+
+
+// return true if c1 is higher precedence
+// return false if c2 is higher precedence
+bool compPrecedence(char c1, char c2) {
+	return getPrecedence(c1) > getPrecedence(c2);
+}
+
+static bool isArithmitic(char c) {
 	if (c == '-') return true;
 	if (c == '+') return true;
 	if (c == '/') return true;
@ -55,33 +76,47 @@ bool isArithmitic(char c) {
 	return false;
 }

-bool isEqualChar(char c) {
+static bool isEqualChar(char c) {
 	if (c == '=') return true;
 	return false;
 }
-bool isBrackets(char c) {
-	if (c == '(') return true;
+static bool isPower(char c) {
+	if (c == '^') return true;
+	return false;
+}
+static bool isBrackets(char c) {
+	if (c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c== '}') return true;
 	return false;
 }

-bool isContain(string str, char c) {
+static bool isBracketsOpening(char c) {
+	if (c == '(' || c == '[' || c == '{') return true;
+	return false;
+}
+
+static bool isBracketsClosing(char c) {
+	if (c == ')' || c == ']' || c == '}') return true;
+	return false;
+}
+
+static bool isContain(string str, char c) {
 	for (int i = 0; i < str.size(); i++) if (str[i] == c) return true;
 	return false;
 }

-bool isContainAlph(string str) {
+static bool isContainAlph(string str) {
 	for (int i = 0; i < str.size(); i++)
 		if (isalpha(str[i])) return true;
 	return false;
 }

-bool isContainDigit(string str) {
+static bool isContainDigit(string str) {
 	for (int i = 0; i < str.size(); i++)
 		if (isdigit(str[i])) return true;
 	return false;
 }

-int indexofAlph(string str) {
+static int indexofAlph(string str) {
 	if (!isContainAlph(str)) return -1;

 	for (int i = 0; i < str.size(); i++)
@ -91,19 +126,19 @@ int indexofAlph(string str) {
 	return-1;
 }

-int indexofChar(string str, char c) {
+static int indexofChar(string str, char c) {
 	for (int i = 0; i < str.size(); i++)
 		if (str[i] == c) return i;
 	return -1;
 }


-bool is_all_digits(const std::string &str)
+static bool is_all_digits(const std::string &str)
 {
 	return std::all_of(str.begin(), str.end(), ::isdigit); // C++11
 }

-int getTermType(string str) {
+static int getTermType(string str) {
 	if (isContain(str, '=')) return Equ;
 	if (isContain(str, '+')) return Op;
 	if (isContain(str, '-')) return Op;
--- a/QuicMaf/maths/terms/Paranthesis.h
+++ b/QuicMaf/maths/terms/Paranthesis.h
@ -0,0 +1,31 @@
+#ifndef PARANTHESIS_H
+#define PARANTHESIS_H
+#pragma once
+
+using namespace std;
+#include "Term.h"
+#include "../defines.h"  
+
+enum ParanTypes {
+	NORM_PARAN = 1,
+	BRAK_PARAN = 2,
+	CURL_PARAN = 3
+};
+
+class Paranthesis : Term {
+	Paranthesis();
+	Paranthesis(bool isopening, int type);
+
+	bool isOpening() { return __isopening; }
+	int getType() { return __type; }
+
+	void setOpening(bool is) { __isopening == is; }
+	void setType(char type) { __type == type; }
+
+private:
+	bool __isopening = true;
+	int __type = 0;
+};
+
+
+#endif // !PARANTHESIS_H
--- a/QuicMaf/maths/terms/term_parser.h
+++ b/QuicMaf/maths/terms/term_parser.h
@ -21,6 +21,7 @@ using namespace std;
 struct Token {
 	unsigned int begin = 0;
 	unsigned int end = 0;
+
 };

 string retrieveSubString(string str, Token token) {
--- a/QuicMaf/maths/tokenizer.h
+++ b/QuicMaf/maths/tokenizer.h
@ -0,0 +1,237 @@
+#ifndef TOKENIZER_H
+#define TOKENIZER_H
+#pragma once
+#include "defines.h"
+#include "terms/Brackets.h"
+#include "terms/Constant.h"
+#include "terms/Equal.h"
+#include "terms/Operator.h"
+#include "terms/Term.h"
+#include "terms/Variable.h"
+#include "terms/Paranthesis.h"
+
+#include "../vendor/lexertk.hpp"
+
+using namespace std;
+
+struct Token {
+	int begin = 0;
+	int end = 0;
+	Token(int b, int e) : begin(b), end(e) {}
+	Token() {}
+};
+
+static string retrieveSubString(string str, Token token) {
+	string res;
+	for (int i = token.begin; i <= token.end; i++)
+		if (str[i] != '\0') // ignore null terminator
+			res.push_back(str[i]);
+	return res;
+}
+
+static lexertk::generator retriveSubLexer(lexertk::generator gen, Token tok) {
+	lexertk::generator res;
+	vector<string> str;
+	for (int i = tok.begin; i <= tok.end; i++)
+		str.push_back(gen[i].value);
+
+	string val;
+	for (int i = 0; i < str.size(); i++)
+		val.append(str[i]);
+
+	res.process(val);
+	return res;
+}
+
+//struct Paranthesis {
+//	bool isOpening = true;
+//	int pos = -1;
+//
+//	Paranthesis(bool is, int _pos) : isOpening(is), pos(_pos) {}
+//};
+
+vector<Term*> tokenize(lexertk::generator lexed);
+
+static Bracket* tokenize_bracket(lexertk::generator gen, Token* token, string coefficient) {
+	Bracket* result = nullptr;
+	result = new Bracket();
+
+	lexertk::generator bracks;
+
+
+	// DETERMINE THE ENDING OF THE BRACKETS
+	int counter = 0;
+	bool state = false;
+	int index = token->begin;
+	do {
+		if (isBracketsOpening(gen[index].value[0])) {
+			counter++;
+			state = true;
+		}
+		else if (isBracketsClosing(gen[index].value[0])) {
+			counter--;
+			state = false;
+		}
+		index++;
+	} while (!(!state && counter == 0));
+
+	bracks = retriveSubLexer(gen, Token(token->begin, index-1));
+	token->end = index - 1; // to make sure we move the token pointer to the end of bracks
+
+	// DELETE THIS BRACKET PARANTHESIS
+	bracks = retriveSubLexer(bracks, Token(1, bracks.size()-2));
+
+	// Tokenize its term
+	// first make sure it is not empty
+	if (bracks.empty()) {
+		cout << "Brackets can't be empty!" << endl;
+		system("PAUSE");
+		exit(0);
+	}
+	// tokenize terms
+	auto terms = tokenize(bracks);
+
+	result->mTerms = terms;
+
+	// ADD COEFFICIENT TO RESULT
+	if (coefficient != "") {
+		lexertk::generator lex;
+		lex.process(coefficient);
+		result->setConstant(tokenize(lex)[0]);
+	}
+	return result;
+}
+
+static vector<Term*> tokenize(lexertk::generator lexed) {
+	vector<Term*> result;
+
+	for (int i = 0; i < lexed.size(); i++) {
+		auto lex = lexed[i];
+		auto after_lex = lexed[i + 1];
+		Token tok;
+		tok.begin = i;
+
+		if (is_all_digits(lex.value)) {
+			// number
+
+			// check for variables
+			if (isalpha(after_lex.value[0])) {
+				// variable detected
+
+				// check for power
+				// if so read the power and its constant
+				if (isPower(lexed[i + 2].value[0])) {
+					// powers ONLY can be numbers no evaluation is done in the power
+					// ex: 5^2*3 // the expression will be 5 by 5 then multiply 3
+					if (!is_all_digits(lexed[i + 3].value)) {
+						cout << "ONLY numbers are allowed in powers!" << endl;
+						system("PAUSE");
+						exit(0);
+						/////// ENDING OF TREE
+					}
+
+					Variable* Var = nullptr;
+					Var = new Variable(atof(&lex.value[0]), after_lex.value[0], atof(&lexed[i + 3].value[0]));
+					result.push_back(Var);
+					tok.end = i + 3;
+					/////// ENDING OF TREE
+				}
+				else {
+					// The variable is simple!
+					Variable* Var = nullptr;
+					Var = new Variable(atof(&lex.value[0]), after_lex.value[0]);
+					result.push_back(Var);
+					tok.end = i + 1;
+				}
+			}
+			else if (isBrackets(after_lex.value[0]) && isBracketsOpening(after_lex.value[0])) {
+				// check for brackets,
+				// if so tokenize the brackets
+				tok.begin++; // consume the coefficient
+				result.push_back(tokenize_bracket(lexed, &tok, lex.value));
+				/////// ENDING OF TREE
+			}
+			else if (isPower(after_lex.value[0])) {
+				// check for powers,
+				// if so read the power and its constant
+				// powers ONLY can be numbers no evaluation is done in the power
+				// ex: 5^2*3 // the expression will be 5 by 5 then multiply 3
+				if (!is_all_digits(lexed[i + 2].value)) {
+					cout << "ONLY numbers are allowed in powers!" << endl;
+					system("PAUSE");
+					exit(0);
+					/////// ENDING OF TREE
+				}
+
+				Constant* Const = nullptr;
+				Const = new Constant(atof(&lex.value[0]), atof(&lexed[i + 2].value[0]));
+				result.push_back(Const);
+				tok.end = i + 2;
+				/////// ENDING OF TREE
+			}
+			else {
+				// The number is simple!
+				Constant* Const = nullptr;
+				Const = new Constant(atof(&lex.value[0]));
+				result.push_back(Const);
+				tok.end = i;
+				/////// ENDING OF TREE
+			}
+		}
+		else if (isalpha(lex.value[0])) {
+			// variable
+
+			// check for power
+			// if so read the power and its constant
+			if (isPower(after_lex.value[0])) {
+				// powers ONLY can be numbers no evaluation is done in the power
+				// ex: 5^2*3 // the expression will be 5 by 5 then multiply 3
+				if (!is_all_digits(lexed[i + 2].value)) {
+					cout << "ONLY numbers are allowed in powers!" << endl;
+					system("PAUSE");
+					exit(0);
+					/////// ENDING OF TREE
+				}
+
+				Variable* Var = nullptr;
+				Var = new Variable(1.0, lex.value[0], lexed[i + 2].value[0]);
+				result.push_back(Var);
+				tok.end = i + 2;
+				/////// ENDING OF TREE
+			}
+			else {
+				// The variable is simple!
+				Variable* Var = nullptr;
+				Var = new Variable(1.0, lex.value[0]);
+				result.push_back(Var);
+				tok.end = i;
+				/////// ENDING OF TREE
+			}
+		}
+		else if (isBracketsOpening(lex.value[0])) {
+			// bracket
+			result.push_back(tokenize_bracket(lexed, &tok, ""));
+		}
+		else if (isArithmitic(lex.value[0])) {
+			// operator
+
+			Operator *op = nullptr;
+			op = new Operator(lex.value[0]);
+			result.push_back(op);
+			tok.end = i;
+		}
+		else if (isEqualChar(lex.value[0])) {
+			// equal sign
+
+			Equal* equ = nullptr;
+			equ = new Equal();
+			result.push_back(equ);
+			tok.end = i;
+		}
+
+		i = tok.end; // no need to increment, automatically done in loop statment
+	}
+
+	return result;
+}
+#endif // !TOKENIZER_H
--- a/QuicMaf/vendor/lexertk.hpp
+++ b/QuicMaf/vendor/lexertk.hpp
--- a/bin/Debug-x64/QuicMaf.exe
+++ b/bin/Debug-x64/QuicMaf.exe
--- a/bin/Debug-x64/QuicMaf.ilk
+++ b/bin/Debug-x64/QuicMaf.ilk
--- a/bin/Debug-x64/QuicMaf.pdb
+++ b/bin/Debug-x64/QuicMaf.pdb