QuicMaf/QuicMaf/maths/tokenizer.h

329 lines
8.5 KiB
C++

/*
***************************************************************************
The goal of this class is to tokenize a string of equation and splits
it to mathematical term, with brackets begin a complete terms alone.
Ex:
2(5 + 9) - 56=90
If the above input was given to the main function in this class it must give an output of:
vector:
- 2(5 + 9)
- -
- 56
- =
- 90
***************************************************************************
*/
#ifndef TOKENIZER_H
#define TOKENIZER_H
#pragma once
#include "defines.h"
#include "terms/Brackets.h"
#include "terms/Constant.h"
#include "terms/Equal.h"
#include "terms/Operator.h"
#include "terms/Term.h"
#include "terms/Variable.h"
#include "terms/Paranthesis.h"
#include "../vendor/lexertk.hpp"
using namespace std;
struct Token {
int begin = 0;
int end = 0;
Token(int b, int e) : begin(b), end(e) {}
Token() {}
};
static string retrieveSubString(string str, Token token) {
string res;
for (int i = token.begin; i <= token.end; i++)
if (str[i] != '\0') // ignore null terminator
res.push_back(str[i]);
return res;
}
static lexertk::generator retriveSubLexer(lexertk::generator gen, Token tok) {
lexertk::generator res;
vector<string> str;
for (int i = tok.begin; i <= tok.end; i++)
str.push_back(gen[i].value);
string val;
for (int i = 0; i < str.size(); i++)
val.append(str[i]);
res.process(val);
return res;
}
//struct Paranthesis {
// bool isOpening = true;
// int pos = -1;
//
// Paranthesis(bool is, int _pos) : isOpening(is), pos(_pos) {}
//};
vector<Term*> tokenize(lexertk::generator lexed);
static Bracket* tokenize_bracket(lexertk::generator gen, Token* token, string coefficient) {
Bracket* result = nullptr;
result = new Bracket();
lexertk::generator bracks;
// DETERMINE THE ENDING OF THE BRACKETS
int counter = 0;
bool state = false;
int index = token->begin;
do {
if (isBracketsOpening(gen[index].value[0])) {
counter++;
state = true;
}
else if (isBracketsClosing(gen[index].value[0])) {
counter--;
state = false;
}
index++;
} while (!(!state && counter == 0));
bracks = retriveSubLexer(gen, Token(token->begin, index - 1));
token->end = index - 1; // to make sure we move the token pointer to the end of bracks
// DELETE THIS BRACKET PARANTHESIS
bracks = retriveSubLexer(bracks, Token(1, bracks.size() - 2));
// Tokenize its term
// first make sure it is not empty
if (bracks.empty()) {
cout << "Brackets can't be empty!" << endl;
system("PAUSE");
exit(0);
}
// tokenize terms
auto terms = tokenize(bracks);
result->mTerms = terms;
// ADD COEFFICIENT TO RESULT
if (coefficient != "") {
lexertk::generator lex;
lex.process(coefficient);
result->setConstant(tokenize(lex)[0]);
}
return result;
}
static vector<Term*> combineBrackets(vector<Term*> terms) {
vector<Term*> result;
for (int i = 0; i < terms.size(); i++) {
auto term = terms[i];
if (term->mType == TermTypes::Op &&
(term->mOperator == '-' || term->mOperator == '+')) {
// if after it was a number
if (terms[i + 1]->mType == TermTypes::Const || terms[i + 1]->mType == TermTypes::Var) {
// check for safety
if (!(terms.size() <= i + 2))
// if after it was a bracket too
if (terms[i + 2]->mType == TermTypes::Brack) {
// if so combine the terms
Bracket* brack = new Bracket();
brack->mTerms = static_cast<Bracket*>(terms[i + 2])->mTerms;
auto constant = static_cast<Bracket*>(terms[i + 2])->GetConstant();
constant->mValue *= (term->mOperator == '+') ? +1 : -1;
brack->setConstant(constant);
result.push_back(brack);
i += 2;
continue;
}
}
else {
if (terms[i + 1]->mType == TermTypes::Brack) {
// if so combine the terms
Bracket* brack = new Bracket();
brack->mTerms = static_cast<Bracket*>(terms[i + 1])->mTerms;
auto constant = static_cast<Bracket*>(terms[i + 1])->GetConstant();
constant->mValue *= (term->mOperator == '+') ? +1 : -1;
brack->setConstant(constant);
result.push_back(brack);
i += 1;
continue;
}
}
}
result.push_back(term);
}
return result;
}
static vector<Term*> tokenize(lexertk::generator lexed) {
vector<Term*> result;
for (int i = 0; i < lexed.size(); i++) {
auto lex = lexed[i];
auto after_lex = lexed[i + 1];
Token tok;
tok.begin = i;
if (is_all_digits(lex.value)) {
// number
// check for variables
if (isalpha(after_lex.value[0])) {
// variable detected
// check for power
// if so read the power and its constant
if (isPower(lexed[i + 2].value[0])) {
// powers ONLY can be numbers no evaluation is done in the power
// ex: 5^2*3 // the expression will be 5 by 5 then multiply 3
if (!is_all_digits(lexed[i + 3].value)) {
cout << "ONLY numbers are allowed in powers!" << endl;
system("PAUSE");
exit(0);
/////// ENDING OF TREE
}
// check for bracket
if (lexed[i + 4].value == "(") {
// bracket detected
tok.begin += 4; // consume coefficient
result.push_back(tokenize_bracket(lexed, &tok, lex.value + after_lex.value + lexed[i + 2].value + lexed[i + 3].value));
/////// ENDING OF TREE
}
else {
// simple power variable
Variable* Var = nullptr;
Var = new Variable(atof(&lex.value[0]), after_lex.value[0], atof(&lexed[i + 3].value[0]));
result.push_back(Var);
tok.end = i + 3;
/////// ENDING OF TREE
}
}
else {
if (lexed[i + 2].value == "(") {
// bracket detected
tok.begin += 2;
result.push_back(tokenize_bracket(lexed, &tok, lex.value + after_lex.value));
/////// ENDING OF TREE
}
else {
// The variable is simple!
Variable* Var = nullptr;
Var = new Variable(atof(&lex.value[0]), after_lex.value[0]);
result.push_back(Var);
tok.end = i + 1;
/////// ENDING OF TREE
}
}
}
else if (isBrackets(after_lex.value[0]) && isBracketsOpening(after_lex.value[0])) {
// check for brackets,
// if so tokenize the brackets
tok.begin++; // consume the coefficient
result.push_back(tokenize_bracket(lexed, &tok, lex.value));
/////// ENDING OF TREE
}
else if (isPower(after_lex.value[0])) {
// check for powers,
// if so read the power and its constant
// powers ONLY can be numbers no evaluation is done in the power
// ex: 5^2*3 // the expression will be 5 by 5 then multiply 3
if (!is_all_digits(lexed[i + 2].value)) {
cout << "ONLY numbers are allowed in powers!" << endl;
system("PAUSE");
exit(0);
/////// ENDING OF TREE
}
// check for brackets
if (lexed[i + 3].value == "(") {
tok.begin += 3;
result.push_back(tokenize_bracket(lexed, &tok, lex.value + after_lex.value + lexed[i + 2].value));
}
else {
Constant* Const = nullptr;
Const = new Constant(atof(&lex.value[0]), atof(&lexed[i + 2].value[0]));
result.push_back(Const);
tok.end = i + 2;
/////// ENDING OF TREE
}
}
else {
// The number is simple!
Constant* Const = nullptr;
Const = new Constant(atof(&lex.value[0]));
result.push_back(Const);
tok.end = i;
/////// ENDING OF TREE
}
}
else if (isalpha(lex.value[0])) {
// variable
// check for power
// if so read the power and its constant
if (isPower(after_lex.value[0])) {
// powers ONLY can be numbers no evaluation is done in the power
// ex: 5^2*3 // the expression will be 5 by 5 then multiply 3
if (!is_all_digits(lexed[i + 2].value)) {
cout << "ONLY numbers are allowed in powers!" << endl;
system("PAUSE");
exit(0);
/////// ENDING OF TREE
}
Variable* Var = nullptr;
Var = new Variable(1.0, lex.value[0], lexed[i + 2].value[0]);
result.push_back(Var);
tok.end = i + 2;
/////// ENDING OF TREE
}
else {
// The variable is simple!
Variable* Var = nullptr;
Var = new Variable(1.0, lex.value[0]);
result.push_back(Var);
tok.end = i;
/////// ENDING OF TREE
}
}
else if (isBracketsOpening(lex.value[0])) {
// bracket
result.push_back(tokenize_bracket(lexed, &tok, ""));
}
else if (isArithmitic(lex.value[0])) {
// operator
Operator *op = nullptr;
op = new Operator(lex.value[0]);
result.push_back(op);
tok.end = i;
}
else if (isEqualChar(lex.value[0])) {
// equal sign
Equal* equ = nullptr;
equ = new Equal();
result.push_back(equ);
tok.end = i;
}
i = tok.end; // no need to increment, automatically done in loop statment
}
result = combineBrackets(result);
return result;
}
#endif // !TOKENIZER_H