From c2b02f0f1e0ae9aac021a185dfc8422c4a3cf164 Mon Sep 17 00:00:00 2001 From: fsan Date: Mon, 1 Nov 2021 19:12:49 -0300 Subject: [PATCH] BIG COMMIT!! Conditional jumps added, way too much stuff changed for a single commit... --- info | 28 ++++++-- lexer.cpp | 24 ++++++- main.cpp | 2 +- orga-comp.h | 48 ++++++++++++-- parser.cpp | 122 ++++++++++++++++++++++++++++++++--- scope.cpp | 50 ++++++++++++++- translator.cpp | 170 +++++++++++++++++++++++++++++++++++++------------ tst.cfran | 15 +++-- utilities.cpp | 12 ++++ 9 files changed, 399 insertions(+), 72 deletions(-) diff --git a/info b/info index 71ac61a..478d2bd 100644 --- a/info +++ b/info @@ -10,13 +10,18 @@ exp -> exp op exp (operacion) -> opexp op -> + (suma) -> plus op -> - (resta) -> minus -IR: -example: -a = 2 + 2 -=> -a = 4 -=> -MOV a, 0x0004 +The formal sintax represented above will no longer be +used. Conditional jumps will be added. +For this we need the notion of "blocks" Im still thinking +on how to best approach this. + +prgm -> stms +compound -> block, cjump; +cjump -> if opexp do block end +block -> stm; stm.. +op -> = (cmp) + +lex -> parse -> semantic -> codgen a = b + 2 => @@ -37,3 +42,12 @@ equiv representation MOV TMP, 0x0002 exp('2') ADD TMP, [b] exp('b') '+' MOV a, TMP + +para la orga small + +a = b + 2 +=> +SET R1, 0x02 +LOAD R2, [b] +ADD R1, R2 +STR [a], R1 \ No newline at end of file diff --git a/lexer.cpp b/lexer.cpp index 9e87f16..e3f8f3c 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -10,8 +10,6 @@ lexer::lex_file(std::string filename) { // ; -> semi std::ifstream file("tst.cfran", std::ifstream::in); - bool id = false; - bool num = false; std::string *value = new std::string(); char c; @@ -51,7 +49,11 @@ lexer::lex_file(std::string filename) { case '=': if(id) add_token(token::TOK_ID, *value); if(num) add_token(token::TOK_NUM, *value); - add_token(token::TOK_ASSIGN, ""); + if(cond) { + add_token(token::TOK_EQ, "="); + } else { + add_token(token::TOK_ASSIGN, ""); + } id = false; num = false; delete value; @@ -67,6 +69,7 @@ lexer::lex_file(std::string filename) { value = new std::string(); //STM END break; + case '\n': case ' ': if(id) add_token(token::TOK_ID, *value); if(num) add_token(token::TOK_NUM, *value); @@ -87,6 +90,21 @@ lexer::lex_file(std::string filename) { void lexer::add_token(token::type type, std::string value) { struct token *token = new struct token; + if(type == token::TOK_ID) { + token->tok_type = type; + token->value = value; + if(value == "if") { + token->tok_type = token::TOK_IF; + cond = true; + } + if(value == "do") { + token->tok_type = token::TOK_DO; + cond = false; + } + if(value == "end") token->tok_type = token::TOK_END; + _tokens.push_back(token); + return; + } token->tok_type = type; token->value = value; _tokens.push_back(token); diff --git a/main.cpp b/main.cpp index f05c1a0..a019b79 100644 --- a/main.cpp +++ b/main.cpp @@ -15,7 +15,7 @@ main(){ std::cout << "\n-----------------------\n"; std::cout << "EVALUATING RESULTING TREE:\n"; std::cout << str_token(*end) << '\n'; - inter.evalSTM(*end); + //inter.evalSTM(*end); // -> intepreter of parse tree. tran.translate(end); return 1; } \ No newline at end of file diff --git a/orga-comp.h b/orga-comp.h index 1e08f94..07b3399 100644 --- a/orga-comp.h +++ b/orga-comp.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -18,21 +19,33 @@ struct token { TOK_ASSIGN, TOK_DO, TOK_WHILE, + TOK_IF, TOK_END, TOK_LPAREN, TOK_RPAREN, TOK_COMMA, TOK_SEMI, + TOK_EQ, STM_ASSIGN, STM_COMPOUND, + STM_CJUMP, + STM_BLOCK, EXP_NUMBER, EXP_ID, - EXP_OPERATION + EXP_OPERATION, + EXP_COMPARE, + BASIC_BLOCK, + CJUMP, + IR_SET, + IR_ADD, + IR_STORE, + IR_CJUMP }; type tok_type; std::string value; struct token *lvalue = nullptr; struct token *rvalue = nullptr; + struct token *tvalue = nullptr; }; /* @@ -65,6 +78,9 @@ class lexer { private: std::vector _tokens; void add_token(token::type, std::string value); + bool id = false; + bool num = false; + bool cond = false; }; class parser { @@ -74,13 +90,17 @@ class parser { ~parser(); private: void reduce(); + void reduce_blck(); void reduce_num(struct token *num); void reduce_id(struct token *id); void reduce_stm(struct token *stm); void reduce_exp(struct token *exp); void statement(); //pops an statement into stack void expression(); //pops an expression into stack + void conditional(); void err(); + + struct token *parse_until_tok_end(); struct token *pop(); void push(struct token *tok); @@ -94,18 +114,26 @@ class scope { enum symbol_type{ NOT_FOUND, VALUE, + FUNCTION, }; scope(); - void add(std::string key, symbol_type type, size_t pos); - symbol_type look(std::string key); + void add(std::string key, symbol_type type); + scope* new_scope(); + symbol_type look_type(std::string key); + size_t look_pos(std::string key); + size_t look_at_pos(std::string key); + void print_scope(); // scope(scope const &father); private: struct node { std::string key; symbol_type type; + size_t pos; struct node *next = nullptr; }; struct node *head; + size_t _var_in_use = 0; + size_t _initial_position = 240; }; class translator { @@ -115,8 +143,16 @@ class translator { to be later consumed by a code generator */ public: - void translate(struct token *head); + size_t translate(struct token *head); private: - void translate_assign(struct token *stm); - void translate_exp(struct token *exp); + size_t translate_assign(struct token *stm, scope *scope); + size_t translate_exp(struct token *exp, scope *scope); + size_t translate_stm(struct token *head, scope *scope); + size_t translate_block(struct token *head, scope *scope); + size_t set_tmp(std::string value); + size_t load_tmp(std::string value, scope *scope); + struct token *_tree; + bool _r0_in_use = false; + size_t _label_counter = 0; + size_t _mem_pos = 0; }; \ No newline at end of file diff --git a/parser.cpp b/parser.cpp index f78abfb..bcc04a5 100644 --- a/parser.cpp +++ b/parser.cpp @@ -17,11 +17,18 @@ parser::parse_tokens() { break; case token::STM_COMPOUND: break; + case token::TOK_IF: + std::cout << "ENTERING COND REDUCTION"; + conditional(); // conditional follows the form of IF EXP DO STM END + break; + case token::TOK_END: + break; default: + std::cout << "ERROR PARSING\n"; err(); break; } - return parse_tokens(); + parse_tokens(); } return _stack.top(); @@ -35,10 +42,10 @@ parser::parser(std::vector tokens) { void parser::reduce() { - if(_stack.size() == 1) return; + if(_stack.size() <= 1) return; struct token *tok = pop(); -// std::cout << str_token(*tok) << " "; + std::cout << str_token(*tok) << " "; switch (tok->tok_type) { case token::TOK_NUM: // std::cout << "reduzco numero\n"; @@ -57,8 +64,10 @@ parser::reduce() { case token::TOK_SEMI: // reduce(); break; -// case token::TOK_ID: - + case token::STM_CJUMP: + std::cout << "CJUMP 44\n"; + reduce_blck(); + break; case token::STM_ASSIGN: reduce_stm(tok); default: break; @@ -71,6 +80,7 @@ parser::reduce_stm(struct token *stm) { struct token *tok = pop(); struct token *tmp; switch (tok->tok_type) { + case token::STM_CJUMP: case token::STM_ASSIGN: case token::STM_COMPOUND: tmp = new struct token; @@ -80,7 +90,9 @@ parser::reduce_stm(struct token *stm) { push(tmp); // std::cout << "CHAD STM REDUCER " << str_token(*tmp); break; - default: + default: + std::cout << "ERROR REDUCING STM\n"; + std::cout << str_token(*tok) << "\n"; err(); break; } @@ -110,7 +122,7 @@ parser::reduce_exp(struct token *exp) { struct token *stm; switch (modifier->tok_type) { case token::TOK_ASSIGN: -// std::cout << "assign found\n"; + std::cout << "assign found\n"; if(_stack.top()->tok_type != token::TOK_ID) err(); // std::cout << str_token(*_stack.top()) << "\n"; stm = new struct token; @@ -119,6 +131,22 @@ parser::reduce_exp(struct token *exp) { stm->value = pop()->value; push(stm); break; + case token::TOK_EQ: + std::cout << "CMP_EXP\n"; + if(_stack.top()->tok_type == token::TOK_NUM) { + reduce_num(pop()); + } + if(_stack.top()->tok_type == token::TOK_ID) { + reduce_id(pop()); + } + stm = new struct token; + stm->tok_type = token::EXP_COMPARE; + stm->value = modifier->value; + stm->rvalue = exp; + stm->lvalue = pop(); + push(stm); + std::cout << "\n" << str_token(*stm) << '\n'; + break; case token::TOK_OP: if(_stack.top()->tok_type == token::TOK_NUM) { reduce_num(pop()); @@ -133,8 +161,12 @@ parser::reduce_exp(struct token *exp) { stm->lvalue = pop(); push(stm); break; - default: err(); + default: + std::cout << "ERROR REDUCING EXP\n"; + err(); } + std::cout << "\nTOKENS " << pos << '\n'; + print_tokens(_tokens); } struct token * @@ -172,18 +204,72 @@ parser::expression() { std::cout << "This shouldn't be here...\n"; err(); break; + case token::TOK_EQ: case token::TOK_OP: expression(); break; case token::TOK_SEMI: reduce(); break; + case token::TOK_DO: + std::cout << "Found do\n"; + reduce_blck(); + break; default: err(); break;; } } +void +parser::reduce_blck() { + if(_stack.size() == 1) return; + struct token *tok = pop(); + std::cout << "\n--REDUCING BLOCK--\n"; + std::cout << str_token(*tok) << "\n"; + struct token *tmp; + switch (tok->tok_type) { + case token::STM_CJUMP: + case token::STM_COMPOUND: + case token::STM_ASSIGN: + reduce_stm(tok); + break; + case token::EXP_COMPARE: + assert(_stack.top()->tok_type == token::TOK_IF); + pop(); + tmp = new struct token; + tmp->tok_type = token::STM_CJUMP; + tmp->rvalue = parse_until_tok_end(); + tmp->lvalue = tok; + std::cout << str_token(*tmp->lvalue) << "\n"; + std::cout << str_token(*tmp) << "\n"; + std::cout << "CJUMP 5\n"; + push(tmp); + break; + case token::TOK_NUM: + reduce_num(tok); + break; + case token::TOK_ID: + reduce_id(tok); + break; + case token::EXP_ID: + case token::EXP_OPERATION: + case token::EXP_NUMBER: + // EL SIGUIENTE ES UN OP O UN ASSIGN + reduce_exp(tok); + break; + default: break; + } + reduce_blck(); +} + +void +parser::conditional() { + push(_tokens[pos]); + pos++; + expression(); +} + void parser::err() { std::cout @@ -193,6 +279,22 @@ parser::err() { exit(-1); } -parser::~parser(){ - free_tree(pop()); +struct token* +parser::parse_until_tok_end() { + std::vector tokens; + int depth = 0; + while (depth >= 0) { + tokens.push_back(_tokens[pos]); + if(_tokens[pos]->tok_type == token::TOK_DO) depth++; + if(_tokens[pos]->tok_type == token::TOK_END) depth--; + pos++; + } + tokens.pop_back(); + parser pars(tokens); + std::cout << "NESTED PARSING BEGIN \n"; + return pars.parse_tokens(); +} + +parser::~parser() { +// free_tree(pop()); } \ No newline at end of file diff --git a/scope.cpp b/scope.cpp index 10dc52c..d7dbb4c 100644 --- a/scope.cpp +++ b/scope.cpp @@ -1,5 +1,7 @@ #include "orga-comp.h" #include +#include +#include #include scope::scope(){ @@ -7,22 +9,66 @@ scope::scope(){ } void -scope::add(std::string key, symbol_type type, size_t pos){ +scope::add(std::string key, symbol_type type){ if (head == nullptr){ head = new node; head->key = key; head->type = type; + head->pos = _initial_position + _var_in_use; + head->next = nullptr; }else{ struct node *tmp = head; head = new node; head->key = key; head->type = type; + head->pos = _initial_position + _var_in_use; head->next = tmp; } + _var_in_use++; +} + +void +scope::print_scope(){ + std::cout << "SCOPE AT : " << this << "\n"; + struct node *tmp = head; + while(tmp != nullptr){ + std::cout << tmp->key << " @ " << tmp->pos << "\n"; + tmp = tmp->next; + } } +scope* +scope::new_scope(){ + scope *tmp = new scope(); + tmp->head = head; + tmp->_var_in_use = _var_in_use; + return tmp; +} + +size_t +scope::look_pos(std::string key){ + struct node *tmp = head; + while(tmp != nullptr){ + if(tmp->key == key) return tmp->pos; + tmp = tmp->next; + } + add(key, symbol_type::VALUE); + return head->pos; +} + +size_t +scope::look_at_pos(std::string key){ + struct node *tmp = head; + while(tmp != nullptr){ + if(tmp->key == key) return tmp->pos; + tmp = tmp->next; + } + std::cout << "\n--ERROR--\n"; + std::cout << "ID " << key << " NOT DEFINED\n"; + std::exit(-1); +} scope::symbol_type -scope::look(std::string key){ +scope::look_type(std::string key){ struct node *tmp = head; while(tmp != nullptr){ if(tmp->key == key) return tmp->type; diff --git a/translator.cpp b/translator.cpp index 0292727..5b88a42 100644 --- a/translator.cpp +++ b/translator.cpp @@ -1,50 +1,142 @@ #include "orga-comp.h" +#include #include #include #include +#include -void +size_t translator::translate(struct token *head){ - switch (head->tok_type) { - case token::STM_COMPOUND: - translate(head->lvalue); - translate(head->rvalue); - break; - case token::STM_ASSIGN: - translate_assign(head); - break; - default: exit(-1); - } + scope *scp = new scope(); + size_t size = translate_stm(head, scp); + return size; } -void -translator::translate_assign(struct token *stm){ - struct token *exp = stm->rvalue; - translate_exp(exp); - std::cout << "STORE INTO " << stm->value << '\n'; +size_t +translator::translate_block(struct token *head, scope *scope){ + size_t size = 0; + switch (head->tok_type) { + case token::STM_COMPOUND: + size += translate_assign(head->lvalue, scope); + size += translate_assign(head->rvalue, scope); + return size; + break; + case token::STM_ASSIGN: + return translate_assign(head, scope); + break; + default: exit(-1); + } } -void -translator::translate_exp(struct token *exp){ - switch (exp->tok_type) { - case token::EXP_NUMBER: - std::cout << "LOAD LITERAL 0x" << std::hex << exp->value << '\n'; - break; - case token::EXP_ID: - std::cout << "LOAD " << exp->value << '\n'; - break; - case token::EXP_OPERATION: - translate_exp(exp->rvalue); - switch (exp->lvalue->tok_type) { - case token::EXP_NUMBER: - std::cout << "ADD 0x" << std::hex << exp->lvalue->value << '\n'; - break; - case token::EXP_ID: - std::cout << "ADD [" << std::hex << exp->lvalue->value << "]\n"; - break; - default: exit(-1); - } - break; - default: exit(-1); - } +size_t +translator::translate_stm(struct token *head, scope *scp){ + size_t size = 0; + size_t pos = _mem_pos; + size_t lbl = _label_counter; + scope *tmp_scp; + switch (head->tok_type) { + case token::STM_CJUMP: + size += translate_exp(head->lvalue, scp); + pos += size; + std::cout << "JZ do" << lbl << "\n"; +// << std::hex +// << std::setw(2) +// << std::setfill('0') +// << pos++ << '\n'; + std::cout << "JMP " + << "end" + << lbl << '\n'; + std::cout << "do" << lbl << ":\n"; + _label_counter++; + tmp_scp = scp->new_scope(); + size += translate_stm(head->rvalue, tmp_scp); + std::cout << "end" << lbl << ":\n"; + break; + case token::STM_COMPOUND: + size += translate_stm(head->lvalue, scp); + size += translate_stm(head->rvalue, scp); + break; + case token::STM_ASSIGN: + size = translate_assign(head, scp); + break; + default: exit(-1); + } + _mem_pos += size; + return size; +} + +size_t +translator::translate_assign(struct token *stm, scope *scope){ + size_t size = 0; + struct token *exp = stm->rvalue; + size += translate_exp(exp, scope); + std::cout << "STR [0x" + << scope->look_pos(stm->value) + << ']' + << ", R0\n"; + size++; +// scope->print_scope(); + _r0_in_use = false; + return size; +} + +size_t +translator::translate_exp(struct token *exp, scope *scope){ + size_t size = 0; + switch (exp->tok_type) { + case token::EXP_NUMBER: + size = set_tmp(exp->value); + break; + case token::EXP_ID: + size = load_tmp(exp->value, scope); + break; + case token::EXP_OPERATION: + size += translate_exp(exp->rvalue, scope); + size += translate_exp(exp->lvalue, scope); + std::cout << "ADD R0, R1\n"; + size++; + break; + case token::EXP_COMPARE: + size += translate_exp(exp->rvalue, scope); + size += translate_exp(exp->lvalue, scope); + std::cout << "AND R0, R1\n"; + size++; + break; + default: exit(-1); + } + return size; +} + +size_t +translator::set_tmp(std::string value){ + if(!_r0_in_use){ + std::cout << "SET R0, 0x" + << std::hex + << std::setw(2) + << std::setfill('0') + << value << '\n'; + _r0_in_use = true; + }else{ + std::cout << "SET R1, 0x" + << std::hex + << std::setw(2) + << std::setfill('0') + << value << '\n'; + } + return 1; +} + +size_t +translator::load_tmp(std::string value, scope *scope){ + if(!_r0_in_use){ + std::cout << "LOAD R0, [0x" + << scope->look_at_pos(value) + << "]\n"; + _r0_in_use = true; + }else{ + std::cout << "LOAD R1, [0x" + << scope->look_at_pos(value) + << "]\n"; + } + return 1; } \ No newline at end of file diff --git a/tst.cfran b/tst.cfran index 4dffe7b..35041a3 100644 --- a/tst.cfran +++ b/tst.cfran @@ -1,4 +1,11 @@ -a = 3; -j = 2 + 1; -h = 2 + 1; -p = a + 1; \ No newline at end of file +a = 2; +b = 8; +if a = 3 do + a = 1; + c = a + 3; + if b = 8 do + a = 4; + end + b = 4; +end +j = 4; \ No newline at end of file diff --git a/utilities.cpp b/utilities.cpp index aa1022b..bc8a5bf 100644 --- a/utilities.cpp +++ b/utilities.cpp @@ -46,14 +46,26 @@ str_token(struct token tok){ return "SEMI "; case token::STM_ASSIGN: return "STM_ASSIGN( " + tok.value + ", " + str_token(*tok.rvalue) + ")"; + case token::STM_CJUMP: + return "STM_CJUMP( " + str_token(*tok.lvalue) + ", " + str_token(*tok.rvalue) + ")"; case token::STM_COMPOUND: return "STM_COMPOUND< " + str_token(*tok.lvalue) + ", " + str_token(*tok.rvalue) + ">"; case token::EXP_NUMBER: return "EXP_NUMBER(" + tok.value + ")"; case token::EXP_OPERATION: return "EXP_OPERATION( " + str_token(*tok.lvalue) + " " + tok.value + " " + str_token(*tok.rvalue) +")"; + case token::EXP_COMPARE: + return "EXP_COMPARE( " + str_token(*tok.lvalue) + " " + tok.value + " " + str_token(*tok.rvalue) +")"; case token::EXP_ID: return "EXP_ID "; + case token::TOK_END: + return "END "; + case token::TOK_IF: + return "IF "; + case token::TOK_DO: + return "DO "; + case token::TOK_EQ: + return "EQUAL "; default: return "SOMETHING"; }