commit 3f2ed01095dcbdfa5808765db92e9de07701e539 Author: fsan Date: Fri Oct 1 11:20:03 2021 -0300 Initial commit diff --git a/a.out b/a.out new file mode 100755 index 0000000..42e273c Binary files /dev/null and b/a.out differ diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..df231ee --- /dev/null +++ b/build.sh @@ -0,0 +1,2 @@ +#!/bin/sh +g++ -g main.cpp interpreter.cpp lexer.cpp parser.cpp diff --git a/info b/info new file mode 100644 index 0000000..b57895e --- /dev/null +++ b/info @@ -0,0 +1,11 @@ +lenguaje orga-straight-line + +prgm -> stms +stms -> stm; stm (s = s1;s2) -> compound +stms -> stms; stm -> compound +stm -> id = exp (asignacion) -> assign +exp -> numero (literal) -> number +exp -> id (idExp) -> id +exp -> exp op exp (operacion) -> opexp +op -> + (suma) -> plus +op -> - (resta) -> minus \ No newline at end of file diff --git a/interpreter.cpp b/interpreter.cpp new file mode 100644 index 0000000..e53bd09 --- /dev/null +++ b/interpreter.cpp @@ -0,0 +1,39 @@ +#include "orga-comp.h" + +void +interpreter::evalSTM(struct token stm) { + switch (stm.t) { + case token::STM_ASSIGN: + table[stm.lvalue->value] = evalEXP(*stm.rvalue); + std::cout << stm.lvalue->value << " igual a " << table[stm.lvalue->value] << '\n'; + break; + case token::STM_COMPOUND: + evalSTM(*stm.lvalue); + evalSTM(*stm.rvalue); + break; + default: + std::cout << "ERROR \n"; + exit(-1); + break; + } +} + +int +interpreter::evalEXP(struct token exp) { + switch (exp.t) { + case token::EXP_NUMBER: + return std::stoi(exp.value); + break; + case token::EXP_ID: + return table[exp.value]; + break; + case token::EXP_OPERATION: + if(exp.value == "+") return evalEXP(*exp.lvalue) + evalEXP(*exp.rvalue); + if(exp.value == "-") return evalEXP(*exp.lvalue) - evalEXP(*exp.rvalue); + break; + default: + std::cout << "ERROR \n"; + exit(-1); + break; + } +} \ No newline at end of file diff --git a/lexer.cpp b/lexer.cpp new file mode 100644 index 0000000..21ea6e6 --- /dev/null +++ b/lexer.cpp @@ -0,0 +1,87 @@ +#include "orga-comp.h" + +std::vector +lexer::lex_file(std::string filename) { + //leer un char -> ver que es -> pasar a un estado + // [a-z] -> id + // [0-9] -> num + // +- -> op + // = -> assign + // ; -> semi + std::ifstream file("tst.cfran", std::ifstream::in); + + bool id = false; + bool num = false; + std::string *value = new std::string(); + char c; + + while(file.get(c)) { + switch (c) { + case 'a' ... 'z': + case 'A' ... 'Z': + if(!num) { + id = true; + *value += c; + } else { + std::cout << "Invalid identifier " << value << c << '\n'; + exit(-1); + } + //ID o condicional + break; + case '0' ... '9': + if(!id) num = true; + *value += c; + //NUM o ID + break; + case '+': + case '-': + if(id) add_token(token::TOK_ID, *value); + if(num) add_token(token::TOK_NUM, *value); + value = new std::string(); + *value += c; + add_token(token::TOK_OP, *value); + id = false; + num = false; + value = new std::string(); + //OP + break; + case '=': + if(id) add_token(token::TOK_ID, *value); + if(num) add_token(token::TOK_NUM, *value); + add_token(token::TOK_ASSIGN, ""); + id = false; + num = false; + value = new std::string(); + break; + case ';': + if(id) add_token(token::TOK_ID, *value); + if(num) add_token(token::TOK_NUM, *value); + add_token(token::TOK_SEMI, ""); + id = false; + num = false; + value = new std::string(); + //STM END + break; + case ' ': + if(id) add_token(token::TOK_ID, *value); + if(num) add_token(token::TOK_NUM, *value); + id = false; + num = false; + value = new std::string(); + default: + break; + } + } + + print_tokens(_tokens); + + return _tokens; +} + +void +lexer::add_token(token::type type, std::string value) { + struct token *token = new struct token; + token->t = type; + token->value = value; + _tokens.push_back(*token); +} \ No newline at end of file diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..34d90b3 --- /dev/null +++ b/main.cpp @@ -0,0 +1,81 @@ +#include "orga-comp.h" +#include + +int +main(){ + // lex -> parse -> analisis semantico + // realizar un interprete + // interpretar a = 2 + 2 + struct token test; + test.t = test.STM_ASSIGN; + + struct token rexp; + rexp.t = token::EXP_OPERATION; + + struct token lnum; + lnum.t = test.EXP_NUMBER; + lnum.value = "2"; + + struct token rnum; + rnum.t = test.EXP_NUMBER; + rnum.value = "2"; + + struct token lexp; + lexp.t = token::EXP_ID; + lexp.value = "a"; + + rexp.lvalue = &lnum; + rexp.rvalue = &rnum; + + test.rvalue = &rexp; + test.lvalue = &lexp; + + interpreter *inter = new interpreter(); + + inter->evalSTM(test); + + lexer lex; + parser parser(lex.lex_file("tst.cfran")); + struct token end = parser.parse_tokens(); + std::cout << "\n-----------------------\n"; + std::cout << "EVALUATING RESULTING TREE:\n"; + inter->evalSTM(end); + + return 1; +} + +void +print_tokens(std::vector tokens){ + for(struct token tok : tokens) { + std::cout << str_token(tok) << " "; + } + std::cout << '\n'; +} + +std::string +str_token(struct token tok){ + switch (tok.t) { + case token::TOK_ID: + return "ID(" + tok.value + ")"; + case token::TOK_NUM: + return "NUM(" + tok.value + ")"; + case token::TOK_OP: + return "OP(" + tok.value + ")"; + case token::TOK_ASSIGN: + return "ASSIGN "; + case token::TOK_SEMI: + return "SEMI "; + case token::STM_ASSIGN: + return "STM_ASSIGN( " + str_token(*tok.lvalue) + ", " + str_token(*tok.rvalue) + ")"; + case token::STM_COMPOUND: + return "STM_COMPOUND( " + str_token(*tok.lvalue) + ", " + str_token(*tok.rvalue) + ")"; + case token::EXP_NUMBER: + return "EXP_NUMBER(" + tok.value + ")"; + case token::EXP_OPERATION: + return "EXP_OPERATION( " + str_token(*tok.lvalue) + " " + tok.value + " " + str_token(*tok.rvalue) +")"; + case token::EXP_ID: + return "EXP_ID "; + default: + return "SOMETHING"; + } +} \ No newline at end of file diff --git a/orga-comp.h b/orga-comp.h new file mode 100644 index 0000000..c0017b5 --- /dev/null +++ b/orga-comp.h @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct token { + enum type { + TOK_ID, + TOK_NUM, + TOK_OP, + TOK_ASSIGN, + TOK_DO, + TOK_END, + TOK_LPAREN, + TOK_RPAREN, + TOK_COMMA, + TOK_SEMI, + STM_ASSIGN, + STM_COMPOUND, + EXP_NUMBER, + EXP_ID, + EXP_OPERATION + }; + type t; + std::string value; + struct token *lvalue; + struct token *rvalue; +}; + +void print_tokens(std::vector tokens); +std::string str_token(struct token tok); + +class interpreter { + +public: + void evalSTM(struct token stm); + int evalEXP(struct token exp); + +private: + std::map table; +}; + +class lexer { + public: + std::vector lex_file(std::string filename); + private: + std::vector _tokens; + void add_token(token::type, std::string value); +}; + +class parser { + public: + parser(std::vector tokens); + struct token parse_tokens(); + private: + void reduce(); + void reduce_num(struct token num); + void reduce_id(struct token id); + void reduce_stm(struct token stm); + void reduce_exp(struct token exp); + void statement(); //pops an statement into stack + void expression(); //pops an expression into stack + void err(); + + struct token pop(); + std::vector _tokens; + std::stack _stack; + size_t pos; +}; + +class translate { + +}; \ No newline at end of file diff --git a/parser.cpp b/parser.cpp new file mode 100644 index 0000000..a8a179d --- /dev/null +++ b/parser.cpp @@ -0,0 +1,203 @@ +#include "orga-comp.h" + +struct token +parser::parse_tokens() { + // LR(k) parsing. + // Pushea al stack hasta encontrar una cantidad de token suficiente + // como para saber que hacer. + // El parse a su vez arma un abstract syntax tree. + if(pos < _tokens.size()) { + _stack.push(_tokens[pos]); + pos++; + //_stack.push(tok); + switch (_stack.top().t) { + case token::TOK_ID: + std::cout << "STATEMENT\n"; + statement(); + break; + case token::STM_COMPOUND: + break; + default: + err(); + break; + } + return parse_tokens(); + } + /*std::cout << "ESTADO FINAL DEL STACK:\n"; + for(int i = 0; i < _stack.size(); i++) { + std::cout << str_token(pop()) << " "; + }*/ + + return _stack.top(); + //check if reduce is possible, if it is reduce. +} + +parser::parser(std::vector tokens) { + _tokens = tokens; + pos = 0; +} + +void +parser::reduce() { + if(_stack.size() == 1) return; + struct token tok = pop(); + + std::cout << str_token(tok) << " "; + switch (tok.t) { + case token::TOK_NUM: + std::cout << "reduzco numero\n"; + reduce_num(tok); +// reduce(); + break; + case token::EXP_ID: + case token::EXP_OPERATION: + case token::EXP_NUMBER: + // EL SIGUIENTE ES UN OP O UN ASSIGN + reduce_exp(tok); + break; + case token::TOK_ID: + reduce_id(tok); + break; + case token::TOK_SEMI: +// reduce(); + break; +// case token::TOK_ID: + + case token::STM_ASSIGN: + reduce_stm(tok); + default: break; + } + reduce(); +} + +void +parser::reduce_stm(struct token stm) { + struct token tok = pop(); + struct token *tmp; + switch (tok.t) { + case token::STM_ASSIGN: + case token::STM_COMPOUND: + tmp = new struct token; + tmp->t = token::STM_COMPOUND; + tmp->rvalue = new struct token; + *tmp->rvalue = stm; + tmp->lvalue = new struct token; + *tmp->lvalue = tok; + _stack.push(*tmp); + std::cout << "CHAD STM REDUCER " << str_token(*tmp); + break; + default: + err(); + break; + } +} + +void +parser::reduce_id(struct token id) { + struct token *tmp = new struct token; + tmp->t = token::EXP_ID; + tmp->value = id.value; + _stack.push(*tmp); +} + +void +parser::reduce_num(struct token num) { + struct token *tmp = new struct token; + tmp->t = token::EXP_NUMBER; + tmp->value = num.value; + _stack.push(*tmp); +} + +void +parser::reduce_exp(struct token exp) { + std::cout << "reducing exp: " << str_token(exp) << "\n"; + struct token modifier = pop(); + struct token *tmp; + struct token *stm; + switch (modifier.t) { + case token::TOK_ASSIGN: + std::cout << "assign found\n"; + if(_stack.top().t != token::TOK_ID) err(); + std::cout << str_token(_stack.top()) << "\n"; + stm = new struct token; + stm->t = token::STM_ASSIGN; + stm->rvalue = new struct token; + *stm->rvalue = exp; + stm->lvalue = new struct token; + *stm->lvalue = pop(); + _stack.push(*stm); + break; + case token::TOK_OP: + if(_stack.top().t == token::TOK_NUM) { + reduce_num(pop()); + } + if(_stack.top().t == token::TOK_ID) { + reduce_id(pop()); + } + stm = new struct token; + stm->t = token::EXP_OPERATION; + stm->value = modifier.value; + stm->rvalue = new struct token; + *stm->rvalue = exp; + stm->lvalue = new struct token; + *stm->lvalue = pop(); + _stack.push(*stm); + //reduce_exp(pop()); + break; + default: err(); + } + +} + +struct token +parser::pop() { + struct token tmp = _stack.top(); + _stack.pop(); + return tmp; +} + +void +parser::statement() { + _stack.push(_tokens[pos]); + pos++; + assert(_stack.top().t == token::TOK_ASSIGN); + expression(); +} + +void +parser::expression() { + _stack.push(_tokens[pos]); + pos++; + std::cout << "POS: " << pos << '\n'; + switch (_stack.top().t) { +// case token::EXP_NUMBER: +// case token::EXP_ID: + case token::TOK_ID: + case token::TOK_NUM: + expression(); + break; + case token::EXP_OPERATION: + std::cout << "Nested operations not supported\n"; + err(); + break; + case token::TOK_OP: + expression(); + break; + case token::TOK_SEMI: + std::cout << "REDUCE\n"; + reduce(); + break; + default: + err(); + break;; + } +} + +void +parser::err() { + std::cout + << "unexpected symbol: " + << str_token(_stack.top()) + << '\n'; + exit(-1); +} diff --git a/tst.cfran b/tst.cfran new file mode 100644 index 0000000..225c631 --- /dev/null +++ b/tst.cfran @@ -0,0 +1,6 @@ +a = 2 + 2; +j = 2 + 1; +h = 2 + 1; +b = 4 + 8; +p = a + 1; +c = 2 - 1; \ No newline at end of file