Initial project import
This commit is contained in:
commit
2e14ac0b3f
|
@ -0,0 +1,42 @@
|
|||
TARGET = lci
|
||||
LIBS = -lm
|
||||
OBJS = lexer.o tokenizer.o parser.o interpreter.o unicode.o main.o
|
||||
SRCS = lexer.c tokenizer.c parser.c interpreter.c unicode.c main.c
|
||||
INSTALL = /usr/local/bin/install -c
|
||||
CPPFLAGS = -O2
|
||||
|
||||
prefix = /usr/local
|
||||
bindir = $(prefix)/bin
|
||||
testdir = ./test
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJS) $(LIBS)
|
||||
$(CC) $(CPPFLAGS) -o $(TARGET) $(OBJS) $(LIBS)
|
||||
|
||||
pedantic: $(OBJS) $(LIBS)
|
||||
$(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -W -Wall -ansi -pedantic -g -o $(TARGET) $(OBJS) $(LIBS)
|
||||
|
||||
check: all
|
||||
@cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.2-Tests/
|
||||
|
||||
check-mem: all
|
||||
@echo "This will take a long time! Be patient!"
|
||||
@cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.2-Tests/
|
||||
|
||||
install: all
|
||||
$(INSTALL) $(TARGET) $(bindir)/$(TARGET)
|
||||
|
||||
TAGS: $(SRCS)
|
||||
ctags $(SRCS)
|
||||
|
||||
docs: Doxyfile $(SRCS)
|
||||
doxygen
|
||||
|
||||
clean:
|
||||
-rm -f $(OBJS)
|
||||
-rm -f $(TARGET)
|
||||
|
||||
distclean: clean
|
||||
-rm -f tags
|
||||
-rm -rf html
|
|
@ -0,0 +1,57 @@
|
|||
lci - a LOLCODE interpreter written in C
|
||||
|
||||
LICENSE
|
||||
|
||||
Copyright (C) 2010 Justin J. Meza
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
ABOUT
|
||||
|
||||
lci is a LOLCODE interpreter written in C and is designed to be correct,
|
||||
portable, fast, and precisely documented.
|
||||
|
||||
* correct: Every effort has been made to test lci's conformance to the
|
||||
LOLCODE language specification. Unit tests come packaged with the lci
|
||||
source code.
|
||||
* portable: lci follows the widely ported ANSI C specification allowing it
|
||||
to compile on a broad range of systems.
|
||||
* fast: Much effort has gone into producing simple and efficient code
|
||||
whenever possible to the extent that the above points are not
|
||||
compromized.
|
||||
* precisely documented: lci uses Doxygen to generate literate code
|
||||
documentation, browsable here.
|
||||
|
||||
This project's homepage is at http://icanhaslolcode.org. For help, visit
|
||||
http://groups.google.com/group/lci-general. To report a bug, go to
|
||||
http://github.com/justinmeza/lci/issues.
|
||||
|
||||
Created and maintained by Justin J. Meza <justin.meza@gmail.com>.
|
||||
|
||||
INSTALLATION
|
||||
|
||||
To install lci, you should be able to
|
||||
|
||||
$ make && make check
|
||||
|
||||
and, assuming no errors are encountered (if there are, please let us know at
|
||||
http://groups.google.com/group/lci-general) go ahead and run (with administrator
|
||||
privileges)
|
||||
|
||||
# make install
|
||||
|
||||
Optionally, you may want to make documentation for lci. This requires the
|
||||
doxygen program. To do so, do
|
||||
|
||||
$ make docs
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,202 @@
|
|||
/** Structures and functions for interpreting a parse tree. The interpreter
|
||||
* traverses a parse tree in a depth-first manner, interpreting each node it
|
||||
* reaches along the way. This is the last stage of the processing of a source
|
||||
* code file.
|
||||
*
|
||||
* \file interpreter.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010 */
|
||||
|
||||
#ifndef __INTERPRETER_H__
|
||||
#define __INTERPRETER_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "parser.h"
|
||||
#include "unicode.h"
|
||||
|
||||
/** Gets the integer data associated with a ValueObject structure. */
|
||||
#define getInteger(value) (value->data.i)
|
||||
/** Gets the floating point data associated with a ValueObject structure. */
|
||||
#define getFloat(value) (value->data.f)
|
||||
/** Gets the string data associated with a ValueObject structure. */
|
||||
#define getString(value) (value->data.s)
|
||||
|
||||
/** Denotes the type of a value. */
|
||||
typedef enum {
|
||||
VT_INTEGER, /**< An integer value. */
|
||||
VT_FLOAT, /**< A floating point decimal value. */
|
||||
VT_BOOLEAN, /**< A true/false value. */
|
||||
VT_STRING, /**< A character string value. */
|
||||
VT_NIL /**< Represents no value. */
|
||||
} ValueType;
|
||||
|
||||
/** Stores the data associated with a ValueObject structure. */
|
||||
typedef union {
|
||||
int i; /**< Integer data. */
|
||||
float f; /**< Floating point data. */
|
||||
char *s; /**< Character string data. */
|
||||
} ValueData;
|
||||
|
||||
/** Increments the semaphore of a ValueObject structure. */
|
||||
#define V(value) (value->semaphore++)
|
||||
|
||||
/** Decrements the semaphore of a ValueObject structure. */
|
||||
#define P(value) (value->semaphore--)
|
||||
|
||||
/** Stores a value.
|
||||
*
|
||||
* \see copyValueObject(ValueObject *)
|
||||
* \see deleteValueObject(ValueObject *) */
|
||||
typedef struct {
|
||||
ValueType type; /**< The type of value stored. */
|
||||
ValueData data; /**< The stored data. */
|
||||
unsigned short semaphore; /**< A semaphore for value usage. */
|
||||
} ValueObject;
|
||||
|
||||
/** Denotes the type of return encountered. */
|
||||
typedef enum {
|
||||
RT_DEFAULT, /**< A block of code returned after evaluating all of its statements. */
|
||||
RT_BREAK, /**< A block of code within a LoopStmtNode or SwitchStmtNode returned via a break statement. */
|
||||
RT_RETURN /**< A block of code within a FuncDefStmtNode called by a FuncCallExprNode returned (either with or without a value). */
|
||||
} ReturnType;
|
||||
|
||||
/** Stores a return state. Returns are encountered when
|
||||
* - a block of code evaluates all of its statements,
|
||||
* - a block of code within a LoopStmt or SwitchStmt encountered a break statement, or
|
||||
* - a block of code within a FunctionDefStmt called by a FunctionCallExpr encounters a ReturnStmt. */
|
||||
typedef struct {
|
||||
ReturnType type; /**< The type of return encountered. */
|
||||
ValueObject *value; /**< The optional return value. */
|
||||
} ReturnObject;
|
||||
|
||||
/** Stores the variables in a particular scope. Scopes are arranged
|
||||
* heirarchically from global (the ancestor of all other scopes) to local (the
|
||||
* temporary scope of a BlockNode).
|
||||
*
|
||||
* \see createScopeObject(ScopeObject *)
|
||||
* \see deleteScopeObject(ScopeObject *) */
|
||||
typedef struct scopeobject {
|
||||
struct scopeobject *parent; /**< A pointer to the parent ScopeObject. */
|
||||
ValueObject *impvar; /**< A pointer to the ValueObject representing the implicit variable for this scope. */
|
||||
unsigned int numvals; /**< The number of ValueObject structures in \a values. */
|
||||
IdentifierNode **names; /**< A pointer to the IdentifierNode structures naming the values in the scope. */
|
||||
ValueObject **values; /**< A pointer to an array of ValueObject structures in the scope. */
|
||||
} ScopeObject;
|
||||
|
||||
char *createString(char *);
|
||||
ValueObject *createNilValueObject(void);
|
||||
ValueObject *createBooleanValueObject(int);
|
||||
ValueObject *createIntegerValueObject(int);
|
||||
ValueObject *createFloatValueObject(float);
|
||||
ValueObject *createStringValueObject(char *);
|
||||
ValueObject *copyValueObject(ValueObject *);
|
||||
void deleteValueObject(ValueObject *);
|
||||
ReturnObject *createReturnObject(ReturnType, ValueObject *);
|
||||
void deleteReturnObject(ReturnObject *);
|
||||
ScopeObject *createScopeObject(ScopeObject *);
|
||||
void deleteScopeObject(ScopeObject *);
|
||||
ValueObject *getScopeValue(ScopeObject *, IdentifierNode *);
|
||||
ValueObject *getLocalScopeValue(ScopeObject *, IdentifierNode *);
|
||||
ValueObject *createScopeValue(ScopeObject *, IdentifierNode *);
|
||||
ValueObject *updateScopeValue(ScopeObject *, IdentifierNode *, ValueObject *);
|
||||
unsigned int isNumString(const char *);
|
||||
unsigned int isHexString(const char *);
|
||||
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castStringExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castBooleanImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castIntegerImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castFloatImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castStringImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *interpretExprNode(ExprNode *, ScopeObject *);
|
||||
ReturnObject *interpretStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretStmtNodeList(StmtNodeList *, ScopeObject *);
|
||||
ReturnObject *interpretBlockNode(BlockNode *, ScopeObject *);
|
||||
int interpretMainNode(MainNode *);
|
||||
|
||||
ValueObject *interpretImpVarExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretCastExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretFuncCallExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretIdentifierExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretConstantExprNode(ExprNode *, ScopeObject *);
|
||||
|
||||
ValueObject *interpretNotOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretArithOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretBoolOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretEqualityOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretConcatOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretOpExprNode(ExprNode *, ScopeObject *);
|
||||
|
||||
ReturnObject *interpretCastStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretPrintStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretInputStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretAssignmentStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretDeclarationStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretIfThenElseStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretSwitchStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretBreakStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretReturnStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretLoopStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretFuncDefStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretExprStmtNode(StmtNode *, ScopeObject *);
|
||||
|
||||
ValueObject *opAddIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opDivIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMaxIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opModIntegerInteger(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opAddIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opDivIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMaxIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opModIntegerFloat(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opAddFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opDivFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMaxFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opModFloatInteger(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opAddFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opDivFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMaxFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opModFloatFloat(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opEqBooleanBoolean(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqBooleanBoolean(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opEqIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqIntegerInteger(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opEqIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqIntegerFloat(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opEqFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqFloatInteger(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opEqFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqFloatFloat(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opEqStringString(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqStringString(ValueObject *, ValueObject *);
|
||||
|
||||
ValueObject *opEqNilNil(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqNilNil(ValueObject *, ValueObject *);
|
||||
|
||||
#endif /* __INTERPRETER_H__ */
|
|
@ -0,0 +1,309 @@
|
|||
#include "lexer.h"
|
||||
|
||||
/** Creates a Lexeme structure.
|
||||
*
|
||||
* \return A pointer to a Lexeme structure with the desired properties.
|
||||
*
|
||||
* \retval NULL malloc was unable to allocate memory.
|
||||
*
|
||||
* \see deleteLexeme(Lexeme *) */
|
||||
Lexeme *createLexeme(char *image, /**< [in] An array of characters that describe the lexeme. */
|
||||
const char *fname, /**< [in] A pointer to the name of the file containing the lexeme. */
|
||||
unsigned int line) /**< [in] The line number from the source file that the lexeme occurred on. */
|
||||
{
|
||||
Lexeme *ret = malloc(sizeof(Lexeme));
|
||||
if (!ret) {
|
||||
perror("malloc");
|
||||
return NULL;
|
||||
}
|
||||
ret->image = malloc(sizeof(char) * (strlen(image) + 1));
|
||||
if (!(ret->image)) {
|
||||
free(ret);
|
||||
perror("malloc");
|
||||
return NULL;
|
||||
}
|
||||
strcpy(ret->image, image);
|
||||
/** \note fname is not copied because it would only one copy is stored
|
||||
* for all Lexeme structures that share it. */
|
||||
ret->fname = fname;
|
||||
ret->line = line;
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "Creating lexeme [%s]\n", image);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Deletes a Lexeme structure.
|
||||
*
|
||||
* \pre \a lexeme points to a Lexeme structure created by createLexeme(char *, const char *, unsigned int).
|
||||
*
|
||||
* \post The memory at \a lexeme and all of its elements will be freed.
|
||||
*
|
||||
* \see createLexeme(char *, const char *, unsigned int) */
|
||||
void deleteLexeme(Lexeme *lexeme)
|
||||
{
|
||||
if (!lexeme) return;
|
||||
free(lexeme->image);
|
||||
/** \note We do not free (*lex)->fname because it is shared between many
|
||||
* Lexeme structures and is free'd by whoever created them. */
|
||||
free(lexeme);
|
||||
}
|
||||
|
||||
/** Creates a LexemeList structure.
|
||||
*
|
||||
* \return A pointer to a LexemeList structure with the desired properties.
|
||||
*
|
||||
* \retval NULL malloc was unable to allocate memory.
|
||||
*
|
||||
* \see deleteLexemeList(LexemeList *) */
|
||||
LexemeList *createLexemeList(void)
|
||||
{
|
||||
LexemeList *p = malloc(sizeof(LexemeList));
|
||||
if (!p) {
|
||||
perror("malloc");
|
||||
return NULL;
|
||||
}
|
||||
p->num = 0;
|
||||
p->lexemes = NULL;
|
||||
return p;
|
||||
}
|
||||
|
||||
/** Adds a Lexeme structure to a LexemeList structure.
|
||||
*
|
||||
* \pre \a list was created by createLexemeList(void).
|
||||
* \pre \a lexeme was created by createLexeme(char *, const char *, unsigned int).
|
||||
*
|
||||
* \post \a lexeme will be added on to the end of \a list and the size of
|
||||
* \a list will be updated accordingly.
|
||||
*
|
||||
* \return A pointer to the added Lexeme structure (will be the same as
|
||||
* \a lexeme).
|
||||
*
|
||||
* \retval NULL realloc was unable to allocate memory. */
|
||||
Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList structure to add \a lex to. */
|
||||
Lexeme *lexeme) /**< [in] A pointer to the Lexeme structure to add to \a list. */
|
||||
{
|
||||
unsigned int newsize;
|
||||
void *mem = NULL;
|
||||
if (!list) return NULL;
|
||||
newsize = list->num + 1;
|
||||
mem = realloc(list->lexemes, sizeof(Lexeme *) * newsize);
|
||||
if (!mem) {
|
||||
perror("realloc");
|
||||
return NULL;
|
||||
}
|
||||
list->lexemes = mem;
|
||||
list->lexemes[list->num] = lexeme;
|
||||
list->num = newsize;
|
||||
return lexeme;
|
||||
}
|
||||
|
||||
/** Deletes a LexemeList structure.
|
||||
*
|
||||
* \pre \a list was created by createLexemeList(void) and contains
|
||||
* items added by addLexeme(LexemeList *, Lexeme *).
|
||||
*
|
||||
* \post The memory at \a list and any of its associated members will be
|
||||
* freed.
|
||||
*
|
||||
* \see createLexemeList(void) */
|
||||
void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeList structure to delete. */
|
||||
{
|
||||
unsigned int n;
|
||||
if (!list) return;
|
||||
for (n = 0; n < list->num; n++)
|
||||
deleteLexeme(list->lexemes[n]);
|
||||
free(list->lexemes);
|
||||
free(list);
|
||||
}
|
||||
|
||||
/** Scans through a character buffer, removing unecessary characters and
|
||||
* generating lexemes. Lexemes are separated by whitespace (but newline
|
||||
* characters are kept as their own lexeme). String literals are handled a
|
||||
* bit differently: starting at the first quotation character, characters are
|
||||
* collected until either an unescaped quotation character is read (that is, a
|
||||
* quotation character not preceeded by a colon which itself is not proceeded
|
||||
* by a colon) or a newline or carriage return character is read, whichever
|
||||
* comes first. This handles the odd case of strings such as "::" which print
|
||||
* out a single colon. Also handled are the effects of commas, ellipses, and
|
||||
* bangs (!).
|
||||
*
|
||||
* \pre \a size is the number of characters starting at the memory location
|
||||
* pointed to by \a buffer.
|
||||
*
|
||||
* \return A pointer to a LexemeList structure. */
|
||||
LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to tokenize. */
|
||||
unsigned int size, /**< [in] The number of characters in \a buffer. */
|
||||
const char *fname) /**< [in] An array of characters representing the name of the file used to read \a buffer. */
|
||||
{
|
||||
const char *start = buffer;
|
||||
LexemeList *list = NULL;
|
||||
unsigned int line = 1;
|
||||
list = createLexemeList();
|
||||
if (!list) return NULL;
|
||||
while (start < buffer + size) {
|
||||
char *temp = NULL;
|
||||
unsigned int len = 1;
|
||||
/* Comma (,) is a soft newline */
|
||||
if (*start == ',') {
|
||||
Lexeme *lex = createLexeme("\n", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
if (!addLexeme(list, lex)) {
|
||||
deleteLexeme(lex);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
start++;
|
||||
continue;
|
||||
}
|
||||
/* Bang (!) is its own lexeme */
|
||||
if (*start == '!') {
|
||||
Lexeme *lex = createLexeme("!", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
if (!addLexeme(list, lex)) {
|
||||
deleteLexeme(lex);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
start++;
|
||||
continue;
|
||||
}
|
||||
/* Skip over leading whitespace */
|
||||
while (isspace(*start)) {
|
||||
unsigned int newline = 0;
|
||||
/* Newline is its own lexeme */
|
||||
if (!strncmp(start, "\r\n", 2)) {
|
||||
newline = 1;
|
||||
start++;
|
||||
}
|
||||
else if (*start == '\r' || *start == '\n') {
|
||||
newline = 1;
|
||||
}
|
||||
if (newline) {
|
||||
Lexeme *lex = createLexeme("\n", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
if (!addLexeme(list, lex)) {
|
||||
deleteLexeme(lex);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
line++;
|
||||
}
|
||||
start++;
|
||||
continue;
|
||||
}
|
||||
/* Skip over ellipses (...) and newline */
|
||||
if ((!strncmp(start, "\xE2\x80\xA6\r\n", 5) && (start += 5))
|
||||
|| (!strncmp(start, "\xE2\x80\xA6\r", 4) && (start += 4))
|
||||
|| (!strncmp(start, "\xE2\x80\xA6\n", 4) && (start += 4))
|
||||
|| (!strncmp(start, "...\r\n", 5) && (start += 5))
|
||||
|| (!strncmp(start, "...\r", 4) && (start += 4))
|
||||
|| (!strncmp(start, "...\n", 4) && (start += 4))) {
|
||||
const char *test = start;
|
||||
/* Make sure next line is not empty */
|
||||
while (*test && isspace(*test)) {
|
||||
if (*test == '\r' || *test == '\n') {
|
||||
fprintf(stderr, "%s:%d: a line with continuation may not be followed by an empty line\n", fname, line);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
test++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* Skip over comments */
|
||||
if ((list->num == 0
|
||||
|| *(list->lexemes[list->num - 1]->image) == '\n')
|
||||
&& !strncmp(start, "OBTW", 4)) {
|
||||
start += 4;
|
||||
while (strncmp(start, "TLDR", 4)) {
|
||||
if ((!strncmp(start, "\r\n", 2) && (start += 2))
|
||||
|| (*start == '\r' && start++)
|
||||
|| (*start == '\n' && start++))
|
||||
line++;
|
||||
else
|
||||
start++;
|
||||
}
|
||||
start += 4;
|
||||
/* Must end in newline */
|
||||
while (*start && isspace(*start) && *start != '\r' && *start != '\n')
|
||||
start++;
|
||||
if (start == buffer || *start == ',' || *start == '\r' || *start == '\n')
|
||||
continue;
|
||||
fprintf(stderr, "%s:%d: multiple line comment may not appear on the same line as code\n", fname, line);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
if (!strncmp(start, "BTW", 3)) {
|
||||
start += 3;
|
||||
while (*start && *start != '\r' && *start != '\n')
|
||||
start++;
|
||||
continue;
|
||||
}
|
||||
/* We have removed or processed any leading characters at this
|
||||
* point */
|
||||
if (!*start) break;
|
||||
if (*start == '"') {
|
||||
/* Find the end of the string, watching for escape
|
||||
* sequences */
|
||||
while ((start[len]
|
||||
&& *(start + len) != '\r'
|
||||
&& *(start + len) != '\n'
|
||||
&& *(start + len) != '"')
|
||||
|| (*(start + len - 1) == ':'
|
||||
&& *(start + len - 2) != '"'))
|
||||
len++;
|
||||
if (*(start + len) == '"') len++;
|
||||
}
|
||||
/* Scan for the end of the token */
|
||||
while (start[len] && !isspace(start[len])
|
||||
&& *(start + len) != ','
|
||||
&& *(start + len) != '!'
|
||||
&& strncmp(start + len, "...", 3)
|
||||
&& strncmp(start + len, "\xE2\x80\xA6", 3))
|
||||
len++;
|
||||
temp = malloc(sizeof(char) * (len + 1));
|
||||
if (!temp) {
|
||||
perror("malloc");
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
strncpy(temp, start, len);
|
||||
temp[len] = '\0';
|
||||
Lexeme *lex = createLexeme(temp, fname, line);
|
||||
if (!lex) {
|
||||
free(temp);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
if (!addLexeme(list, lex)) {
|
||||
free(temp);
|
||||
deleteLexeme(lex);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
free(temp);
|
||||
start += len;
|
||||
}
|
||||
/* Create an end-of-file lexeme */
|
||||
Lexeme *lex = createLexeme("$", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
if (!addLexeme(list, lex)) {
|
||||
deleteLexeme(lex);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
return list;
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/** Structures and functions for separating a character buffer into lexemes. The
|
||||
* lexer reads through a buffer of characters (themselves typically read from
|
||||
* standard input), strips whitespace, and breaks them up into logical atoms of
|
||||
* character strings which, in turn, may be passed on to later processes (such
|
||||
* as a tokenizer).
|
||||
*
|
||||
* \file lexer.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010 */
|
||||
|
||||
#ifndef __LEXER_H__
|
||||
#define __LEXER_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
/** Stores a lexeme. A lexeme is the smallest unit of contiguous characters,
|
||||
* namely, it has been stripped of surrounding whitespace.
|
||||
*
|
||||
* \note This structure does not have any list structure to hold groups of it.
|
||||
* Instead, pointers to arrays of these structures are employed to allow
|
||||
* for easier tokenizing.
|
||||
*
|
||||
* \see createLexeme(char *, unsigned int) */
|
||||
typedef struct {
|
||||
char *image; /**< An array of characters that describe the lexeme. */
|
||||
const char *fname; /**< A pointer to the name of the file containing the lexeme. */
|
||||
unsigned int line; /**< The line number from the source file that the lexeme occurred on. */
|
||||
} Lexeme;
|
||||
|
||||
/** Stores a list of lexemes. This structure allows sets of lexemes to be
|
||||
* grouped together.
|
||||
*
|
||||
* \see createLexemeList(void)
|
||||
* \see addLexeme(LexemeList *, Lexeme *)
|
||||
* \see deleteLexemeList(LexemeList *) */
|
||||
typedef struct {
|
||||
unsigned int num; /**< The number of Lexeme structures stored. */
|
||||
Lexeme **lexemes; /**< A pointer to the array of Lexeme structures. */
|
||||
} LexemeList;
|
||||
|
||||
Lexeme *createLexeme(char *, const char *, unsigned int);
|
||||
void deleteLexeme(Lexeme *);
|
||||
LexemeList *createLexemeList(void);
|
||||
Lexeme *addLexeme(LexemeList *, Lexeme*);
|
||||
void deleteLexemeList(LexemeList *);
|
||||
LexemeList *scanBuffer(const char *, unsigned int, const char *);
|
||||
|
||||
#endif /* __LEXER_H__ */
|
|
@ -0,0 +1,207 @@
|
|||
/** \mainpage lci Documentation
|
||||
*
|
||||
* \section license License
|
||||
*
|
||||
* lci - a LOLCODE interpreter written in C.
|
||||
* Copyright (C) 2010 Justin J. Meza
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* \section maintainer Maintainer
|
||||
*
|
||||
* The lead maintainer for this project is Justin J. Meza (justin.meza@gmail.com).
|
||||
* For more information, check this project's webpage at http://icanhaslolcode.org .
|
||||
*
|
||||
* \section about About
|
||||
*
|
||||
* lci is a LOLCODE interpreter written in C and is designed to be correct,
|
||||
* portable, fast, and precisely documented.
|
||||
*
|
||||
* - correct: Every effort has been made to test lci's conformance to the
|
||||
* LOLCODE language specification. Unit tests come packaged with
|
||||
* the lci source code.
|
||||
* - portable: lci follows the widely ported ANSI C specification allowing it
|
||||
* to compile on a broad range of systems.
|
||||
* - fast: Much effort has gone into producing simple and efficient code
|
||||
* whenever possible to the extent that the above points are not
|
||||
* compromized.
|
||||
* - precisely documented: lci uses Doxygen to generate literate code
|
||||
* documentation, browsable here.
|
||||
*
|
||||
* \section organization Organization
|
||||
*
|
||||
* lci employs several different modules which each perform a specific task
|
||||
* during interpretation of code:
|
||||
*
|
||||
* - \b lexer (lexer.c, lexer.h)- The lexer takes an array of characters and
|
||||
* splits it up into individual \e lexemes. Lexemes are divided by
|
||||
* whitespace and other rules of the language.
|
||||
* - \b tokenizer (tokenizer.c, tokenizer.h) - The tokenizer takes the
|
||||
* output of the lexer and converts it into individual \e tokens. Tokens
|
||||
* are different from lexemes in that a single token may be made up of
|
||||
* multiple lexemes. Also, the contents of some tokens are evaluated (such
|
||||
* as integers and floats) for later use.
|
||||
* - \b parser (parser.c, parser.h) - The parser takes the output of the
|
||||
* tokenizer and analyzes it semantically to turn it into a parse tree.
|
||||
* - \b interpreter (interpreter.c, interpreter.h) - The interpreter takes
|
||||
* the output of the parser and executes it.
|
||||
*
|
||||
* Each of these modules is contained within its own C header and source code
|
||||
* files of the same name.
|
||||
*
|
||||
* To handle the conversion of Unicode code points and normative names to
|
||||
* bytes, two additional files, unicode.c and unicode.h are used.
|
||||
*
|
||||
* Finally, main.c ties all of these modules together and handles the initial
|
||||
* loading of input data for the lexer. */
|
||||
|
||||
/** \page varscope Variable Scope
|
||||
*
|
||||
* The specification states that variables are local to the scope of the main
|
||||
* block or any function they are contained within--except for temporary loop
|
||||
* variables which are local to the loop they are instantiated within. This
|
||||
* behavior, combined with the fact that variables must be declared before
|
||||
* being used, means that variables may not be shadowed in different control
|
||||
* scopes (such as loops and conditional statements) and, more importantly,
|
||||
* programmers must keep track of whether variables have been previously
|
||||
* declared within conditionally executed code (for example, under this
|
||||
* scoping if a variable is declared in a conditional block it cannot be
|
||||
* safely used in later code).
|
||||
*
|
||||
* One advantage of a flat scoping scheme is that nearly everything can be
|
||||
* stored in a single structure, making lookups faster. However, I believe
|
||||
* that this advantage is not worth the extra frustration transferred to the
|
||||
* programmer and so scoping in lci is done in a similar manner to other
|
||||
* programming languages, to wit, within
|
||||
* - the main block of code,
|
||||
* - the body of functions,
|
||||
* - the body of loop statements, and
|
||||
* - the bodies of conditional statements.
|
||||
*
|
||||
* This should alleviate any confusion which may have been caused by using a
|
||||
* completely local free-for-all scope. Also, there seems to be a general
|
||||
* consensus on the LOLCODE forums that this is the way to go and flat scoping
|
||||
* causes too many problems for the programmer. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "lexer.h"
|
||||
#include "tokenizer.h"
|
||||
#include "parser.h"
|
||||
#include "interpreter.h"
|
||||
|
||||
#define READSIZE 512
|
||||
|
||||
char *getFileArg(int argc, char **argv, char *vals) {
|
||||
int n;
|
||||
for (n = 1; n < argc; n++) {
|
||||
if (argv[n][0] == '-' && argv[n][1]) {
|
||||
int c;
|
||||
for (c = 0; vals[c] != '\0'; c++) {
|
||||
if (argv[n][1] == vals[c]) {
|
||||
n++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else return argv[n];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
long size = 0;
|
||||
long length = 0;
|
||||
char *buffer = NULL;
|
||||
LexemeList *lexemes = NULL;
|
||||
Token **tokens = NULL;
|
||||
FunctionTable *functab = NULL;
|
||||
MainNode *node = NULL;
|
||||
char *fname = NULL;
|
||||
FILE *file = NULL;
|
||||
|
||||
fname = getFileArg(argc, argv, "");
|
||||
if (fname == NULL || fname[0] == '-') {
|
||||
fname = "stdin";
|
||||
file = stdin;
|
||||
}
|
||||
else {
|
||||
file = fopen(fname, "r");
|
||||
}
|
||||
if (!file) {
|
||||
fprintf(stderr, "File does not exist.\n");
|
||||
return 1;
|
||||
}
|
||||
while (!feof(file)) {
|
||||
size += READSIZE;
|
||||
buffer = realloc(buffer, sizeof(char) * size);
|
||||
length += fread((buffer + size) - READSIZE, 1, READSIZE, file);
|
||||
}
|
||||
fclose(file);
|
||||
if (!buffer) return 1;
|
||||
buffer[length] = '\0';
|
||||
|
||||
/* Remove hash bang line if run as a standalone script */
|
||||
if (buffer[0] == '#' && buffer[1] == '!') {
|
||||
unsigned int n;
|
||||
for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
|
||||
buffer[n] = ' ';
|
||||
}
|
||||
|
||||
/* Remove UTF-8 BOM if present and add it to the output stream (we
|
||||
* assume here that if a BOM is present, the system will also expect
|
||||
* the output to include a BOM). */
|
||||
if (buffer[0] == (char)0xef
|
||||
|| buffer[1] == (char)0xbb
|
||||
|| buffer[2] == (char)0xbf) {
|
||||
buffer[0] = ' ';
|
||||
buffer[1] = ' ';
|
||||
buffer[2] = ' ';
|
||||
printf("%c%c%c", 0xef, 0xbb, 0xbf);
|
||||
}
|
||||
|
||||
/* Begin main pipeline */
|
||||
if (!(lexemes = scanBuffer(buffer, length, fname))) {
|
||||
free(buffer);
|
||||
return 1;
|
||||
}
|
||||
free(buffer);
|
||||
if (!(tokens = tokenizeLexemes(lexemes))) {
|
||||
deleteLexemeList(lexemes);
|
||||
return 1;
|
||||
}
|
||||
deleteLexemeList(lexemes);
|
||||
if (!(functab = setupFunctionTable(tokens))) {
|
||||
deleteFunctionTable(functab);
|
||||
return 1;
|
||||
}
|
||||
if (!(node = parseMainNode(tokens, functab))) {
|
||||
deleteFunctionTable(functab);
|
||||
deleteTokens(tokens);
|
||||
return 1;
|
||||
}
|
||||
deleteTokens(tokens);
|
||||
if (interpretMainNode(node)) {
|
||||
deleteMainNode(node);
|
||||
return 1;
|
||||
}
|
||||
deleteMainNode(node);
|
||||
/* End main pipeline */
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,618 @@
|
|||
/** Structures and functions for parsing tokens into a parse tree. The parser
|
||||
* reads through a series of tokens (generated by the tokenizer) and adds
|
||||
* semantic meaning to them by forming them into a parse tree which can, in
|
||||
* turn, be passed on to later processes (such as an interpreter).
|
||||
*
|
||||
* \file parser.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010 */
|
||||
|
||||
/** \page impvar The Implicit Variable
|
||||
*
|
||||
* The implicit variable in LOLCODE is denoted by the keyword \c IT and stores
|
||||
* a copy of the result of the most recently evaluated expression statement,
|
||||
* that is, an expression all by itself on a line. (See
|
||||
* http://lolcode.com/specs/1.2#conditionals for an example.) */
|
||||
|
||||
/** \page lolebnf The LOLCODE EBNF
|
||||
* Presented below is the EBNF (see http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form)
|
||||
* for LOLCODE that \c lci parses. Note that by this stage, the scanner has:
|
||||
* - already removed any whitespace between tokens,
|
||||
* - added in and truncated newline tokens at logical line breaks, and
|
||||
* - added an end-of-file (\c $) token.
|
||||
*
|
||||
* \section progebnf Program Structure
|
||||
*
|
||||
* These production rules dictate the overall form of the program.
|
||||
*
|
||||
* \par
|
||||
* MainNode ::= \c TT_HAI \a version \c TT_NEWLINE BlockNode \c $
|
||||
*
|
||||
* \par
|
||||
* BlockNode ::= StmtNode *
|
||||
*
|
||||
* \section stmtebnf Statements
|
||||
*
|
||||
* These production rules specify some general types of parse structures.
|
||||
*
|
||||
* \par
|
||||
* ConstantNode ::= Boolean | Integer | Float | String
|
||||
*
|
||||
* \par
|
||||
* IdentifierNode ::= Identifier
|
||||
*
|
||||
* \par
|
||||
* TypeNode ::= \c TT_NOOB | \c TT_TROOF | \c TT_NUMBR | \c TT_NUMBAR | \c TT_YARN
|
||||
*
|
||||
* \section stmtebnf Statements
|
||||
*
|
||||
* These production rules specify the types of statements formed.
|
||||
*
|
||||
* \par
|
||||
* StmtNode ::= CastStmtNode | PrintStmtNode | InputStmtNode | AssignmentStmtNode | DeclarationStmtNode | IfThenElseStmtNode | SwitchStmtNode | BreakStmt | ReturnStmtNode | LoopStmtNode | FuncDefStmtNode | ExprStmt
|
||||
*
|
||||
* \par
|
||||
* CastStmtNode ::= IdentifierNode \c TT_ISNOWA TypeNode \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* PrintStmtNode ::= \c TT_VISIBLE ExprNodeList \c [ \c TT_BANG ] TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* InputStmtNode ::= \c TT_GIMMEH IdentifierNode TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* AssignmentStmtNode ::= IdentifierNode \c TT_R ExprNode \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* DeclarationStmtNode ::= IdentifierNode \c TT_HASA IdentifierNode [ Initialization ] \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* Initialization ::= \c TT_ITZ ExprNode
|
||||
*
|
||||
* \par
|
||||
* IfThenElseStmtNode ::= \c TT_ORLY \c TT_NEWLINE \c TT_YARLY \c TT_NEWLINE BlockNode ElseIf * [ Else ] \c TT_OIC \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* ElseIf ::= \c TT_MEBBE ExprNode \c TT_NEWLINE BlockNode
|
||||
*
|
||||
* \par
|
||||
* Else ::= \c TT_NOWAI \c TT_NEWLINE BlockNode
|
||||
*
|
||||
* \par
|
||||
* SwitchStmtNode ::= \c TT_WTF \c TT_NEWLINE Case + [ DefaultCase ] \c TT_OIC \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* Case ::= \c TT_OMG ExprNode \c TT_NEWLINE BlockNode
|
||||
*
|
||||
* \par
|
||||
* DefaultCase ::= \c TT_OMGWTF \c TT_NEWLINE BlockNode
|
||||
*
|
||||
* \par
|
||||
* BreakStmt ::= \c TT_GTFO \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* ReturnStmtNode ::= \c TT_FOUNDYR ExprNode \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* LoopStmtNode ::= \c TT_IMINYR IdentifierNode [ LoopUpdate ] [ LoopGuard ] \c TT_NEWLINE \c TT_IMOUTTAYR IdentifierNode \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* LoopUpdate ::= LoopUpdateOp \c TT_YR IdentifierNode
|
||||
*
|
||||
* \par
|
||||
* LoopUpdateOp ::= \c TT_UPPIN | \c TT_NERFIN | UnaryFunction
|
||||
*
|
||||
* \par
|
||||
* UnaryFunction ::= The name of a previously defined unary function.
|
||||
*
|
||||
* \par
|
||||
* LoopGuard ::= \c TT_TIL ExprNode | \c TT_WILE ExprNode
|
||||
*
|
||||
* \par
|
||||
* FuncDefStmtNode ::= \c TT_HOWDUZ IdentifierNode IdentifierNode [ FunctionDefArgs ] \c TT_NEWLINE BlockNode \c TT_IFUSAYSO \c TT_NEWLINE
|
||||
*
|
||||
* \par
|
||||
* FunctionDefArgs ::= \c TT_YR IdentifierNode FunctionDefArg *
|
||||
*
|
||||
* \par
|
||||
* FunctionDefArg ::= \c TT_ANYR IdentifierNode
|
||||
*
|
||||
* \par
|
||||
* ExprStmt ::= ExprNode \c TT_NEWLINE
|
||||
*
|
||||
* \section exprebnf Expressions
|
||||
*
|
||||
* These production rules specify the types of expressions formed.
|
||||
*
|
||||
* \par
|
||||
* ExprNode ::= CastExprNode | ConstantNode | IdentifierNode | FuncCallExprNode | OpExprNode | ImplicitVariable
|
||||
*
|
||||
* \par
|
||||
* CastExprNode ::= \c TT_MAEK ExprNode \c TT_A TypeNode
|
||||
*
|
||||
* \par
|
||||
* FuncCallExprNode ::= IdentifierNode
|
||||
*
|
||||
* \par
|
||||
* OpExprNode ::= UnaryOp | BinaryOp | NaryOp
|
||||
*
|
||||
* \par
|
||||
* UnaryOp ::= UnaryOpType ExprNode
|
||||
*
|
||||
* \par
|
||||
* UnaryOpType ::= \c TT_NOT
|
||||
*
|
||||
* \par
|
||||
* BinaryOp ::= BinaryOpType ExprNode [ \c TT_AN ] ExprNode
|
||||
*
|
||||
* \par
|
||||
* BinaryOpType ::= \c TT_SUMOF | \c TT_DIFFOF | \c TT_PRODUKTOF | \c TT_QUOSHUNTOF | \c TT_MODOF | \c BIGGROF | \c SMALLROF | \c TT_BOTHOF | \c TT_EITHEROF | \c TT_WONOF
|
||||
*
|
||||
* \par
|
||||
* NaryOp ::= NaryOpType NaryOpArgs \c TT_MKAY
|
||||
*
|
||||
* \par
|
||||
* NaryOpType ::= \c TT_ALLOF | \c TT_ANYOF
|
||||
*
|
||||
* \par
|
||||
* NaryOpArgs ::= ExprNode NaryOpArg +
|
||||
*
|
||||
* \par
|
||||
* NaryOpArg ::= [ \c TT_AN ] ExprNode
|
||||
*
|
||||
* \par
|
||||
* ImplicitVariable ::= \c TT_IT */
|
||||
|
||||
#ifndef __PARSER_H__
|
||||
#define __PARSER_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "tokenizer.h"
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
/** Stores an identifier. An identifier is the string of characters that are
|
||||
* used to uniquely name a particular variable.
|
||||
*
|
||||
* \see createIdentifierNode(char *)
|
||||
* \see deleteIdentifierNode(IdentifierNode *) */
|
||||
typedef struct {
|
||||
char *image; /**< An array of characters that name the identifier. */
|
||||
const char *fname; /**< A pointer to the name of the file containing the identifier. */
|
||||
unsigned int line; /**< The line number from the source file that the identifier occurred on. */
|
||||
} IdentifierNode;
|
||||
|
||||
/** Stores a list of identifiers. This structure allows sets of identifiers
|
||||
* to be grouped together.
|
||||
*
|
||||
* \see createIdentifierNodeList(void)
|
||||
* \see addIdentifierNode(IdentifierNodeList *, IdentifierNode *)
|
||||
* \see deleteIdentifierNodeList(IdentifierNodeList *) */
|
||||
typedef struct {
|
||||
unsigned int num; /**< The number of IdentifierNode structures stored. */
|
||||
IdentifierNode **ids; /**< A pointer to the array of IdentifierNode structures. */
|
||||
} IdentifierNodeList;
|
||||
|
||||
/** Denotes the type of statement a StmtNode stores. */
|
||||
typedef enum {
|
||||
ST_CAST, /**< A CastStmtNode structure. */
|
||||
ST_PRINT, /**< A PrintStmtNode structure. */
|
||||
ST_INPUT, /**< An InputStmtNode structure. */
|
||||
ST_ASSIGNMENT, /**< An AssignmentStmtNode structure. */
|
||||
ST_DECLARATION, /**< A DeclarationStmtNode structure. */
|
||||
ST_IFTHENELSE, /**< An IfThenElseStmtNode structure. */
|
||||
ST_SWITCH, /**< A SwitchStmtNode structure. */
|
||||
ST_BREAK, /**< A break statement (no structure is needed for this type of statement). */
|
||||
ST_RETURN, /**< A ReturnStmtNode structure. */
|
||||
ST_LOOP, /**< A LoopStmtNode structure. */
|
||||
ST_FUNCDEF, /**< A FuncDefStmtNode structure. */
|
||||
ST_EXPR /**< An ExprNode structure. */
|
||||
} StmtType;
|
||||
|
||||
/** Stores a statement. A statement is a unit of code which can be executed by
|
||||
* itself and may possibly cause side-effects to occur.
|
||||
*
|
||||
* \see createStmtNode(StmtType, void *)
|
||||
* \see deleteStmtNode(StmtNode *) */
|
||||
typedef struct {
|
||||
StmtType type; /**< The type of statement stored in \a node. */
|
||||
void *stmt; /**< A pointer to the particular statement structure. */
|
||||
} StmtNode;
|
||||
|
||||
/** Stores a list of statements. This structure allows sets of statements to be
|
||||
* grouped together.
|
||||
*
|
||||
* \see createStmtNodeList(void)
|
||||
* \see addStmtNode(StmtNodeList *, StmtNode *)
|
||||
* \see deleteStmtNodeList(StmtNodeList *) */
|
||||
typedef struct {
|
||||
unsigned int num; /**< The number of StmtNode structures stored. */
|
||||
StmtNode **stmts; /**< A pointer to the array of StmtNode structures. */
|
||||
} StmtNodeList;
|
||||
|
||||
/** Denotes the type of expression an ExprNode stores. */
|
||||
typedef enum {
|
||||
ET_CAST, /**< A CastExprNode structure. */
|
||||
ET_CONSTANT, /**< A ConstantNode structure. */
|
||||
ET_IDENTIFIER, /**< An IdentifierNode structure. */
|
||||
ET_FUNCCALL, /**< A FuncCallExprNode structure. */
|
||||
ET_OP, /**< An OpExprNode structure. */
|
||||
ET_IMPVAR /**< An \ref impvar "implicit variable" (no structure is needed for this type of expression). */
|
||||
} ExprType;
|
||||
|
||||
/** Stores an expression. An expression is a unit of code which evaluates to
|
||||
* some value and typically does not cause side-effects to occur.
|
||||
*
|
||||
* \see createExprNode(ExprType, void *)
|
||||
* \see deleteExprNode(ExprNode *) */
|
||||
typedef struct {
|
||||
ExprType type; /**< The type of expression stored in \a expr. */
|
||||
void *expr; /**< A pointer to the particular expression structure. */
|
||||
} ExprNode;
|
||||
|
||||
/** Stores a list of expressions. This structure allows sets of expressions to
|
||||
* be grouped together.
|
||||
*
|
||||
* \see createExprNodeList(void)
|
||||
* \see addExprNode(ExprNodeList *, ExprNode *)
|
||||
* \see deleteExprNodeList(ExprNodeList *) */
|
||||
typedef struct {
|
||||
unsigned int num; /**< The number of ExprNode structures stored. */
|
||||
ExprNode **exprs; /**< A pointer to an array of ExprNode structures. */
|
||||
} ExprNodeList;
|
||||
|
||||
/** Stores a a block of code. A block of code consists of a set of statements.
|
||||
*
|
||||
* \see createBlockNode(StmtNodeList *)
|
||||
* \see deleteBlockNode(BlockNode *) */
|
||||
typedef struct {
|
||||
StmtNodeList *stmts; /**< A pointer to the list of statements which comprise the block of code. */
|
||||
} BlockNode;
|
||||
|
||||
/** Stores a list of blocks of code. This structure allows sets of blocks of
|
||||
* code to be grouped together.
|
||||
*
|
||||
* \see createBlockNodeList(void)
|
||||
* \see addBlockNode(BlockNodeList *, BlockNode *)
|
||||
* \see deleteBlockNodeList(BlockNodeList *) */
|
||||
typedef struct {
|
||||
unsigned int num; /**< The number of BlockNode structures stored. */
|
||||
BlockNode **blocks; /**< A pointer to an array of BlockNode structures. */
|
||||
} BlockNodeList;
|
||||
|
||||
/** Denotes the type of a constant. */
|
||||
typedef enum {
|
||||
CT_INTEGER, /**< An integer value. */
|
||||
CT_FLOAT, /**< A floating point decimal value. */
|
||||
CT_BOOLEAN, /**< A true/false value. */
|
||||
CT_STRING, /**< A character string value. */
|
||||
CT_NIL /**< Represents no value. */
|
||||
} ConstantType;
|
||||
|
||||
/** Stores the data associated with a ConstantNode structure. */
|
||||
typedef union {
|
||||
int i; /**< Integer data. */
|
||||
float f; /**< Floating point data. */
|
||||
char *s; /**< Character string data. */
|
||||
} ConstantData;
|
||||
|
||||
/** Stores a constant value. A constant value evaluates to its contents,
|
||||
* depending on its \a type.
|
||||
*
|
||||
* \see createBooleanConstantNode(int)
|
||||
* \see createIntegerConstantNode(int)
|
||||
* \see createFloatConstantNode(float)
|
||||
* \see createStringConstantNode(char *)
|
||||
* \see deleteConstantNode(ConstantNode *) */
|
||||
typedef struct {
|
||||
ConstantType type; /**< The type of the constant. */
|
||||
ConstantData data; /**< The stored data of type \a type. */
|
||||
} ConstantNode;
|
||||
|
||||
/** Stores a function definition statement. A function definition statement
|
||||
* defines the prototype and contents of a function.
|
||||
*
|
||||
* \see createFuncDefStmtNode(IdentifierNode *, IdentifierNode *, IdentifierNodeList *, BlockNode *)
|
||||
* \see deleteFuncDefStmtNode(FuncDefStmtNode *) */
|
||||
typedef struct {
|
||||
IdentifierNode *scope; /**< A pointer to the scope to define the function in. */
|
||||
IdentifierNode *name; /**< A pointer to the name of the function. */
|
||||
IdentifierNodeList *args; /**< A pointer to a list of the names of the arguments of the function. */
|
||||
BlockNode *body; /**< A pointer to the block of code defined by the function. */
|
||||
} FuncDefStmtNode;
|
||||
|
||||
/** Stores the contents of the function table. The function table contains the
|
||||
* definitions of all declared functions. It is used for making sure function
|
||||
* calls provide a valid arity, typechecking, however, is performed at
|
||||
* runtime. */
|
||||
typedef struct {
|
||||
unsigned int num; /**< The number of declared functions. */
|
||||
FuncDefStmtNode **funcs; /**< A pointer to an array of declared functions. */
|
||||
} FunctionTable;
|
||||
|
||||
/** Stores the main block of code a program executes. This structure could be
|
||||
* accomplished using only a BlockNode instead, but its logical importance to
|
||||
* program control flow (namely, it is the first portion of code executed)
|
||||
* merits its own structure.
|
||||
*
|
||||
* \see createMainNode(BlockNode *)
|
||||
* \see deleteMainNode(MainNode *) */
|
||||
typedef struct {
|
||||
BlockNode *block; /**< A pointer to the block of code to execute first. */
|
||||
FunctionTable *functab; /**< A pointer to the function table associated with this block of code. */
|
||||
} MainNode;
|
||||
|
||||
/** Stores a variable type.
|
||||
*
|
||||
* \see createTypeNode(ConstantType)
|
||||
* \see deleteTypeNode(TypeNode *) */
|
||||
typedef struct {
|
||||
ConstantType type; /**< The type of the variable. */
|
||||
} TypeNode;
|
||||
|
||||
/** Stores a cast statement. A cast statement changes the type of a variable
|
||||
* identified by \a target to the type given by \a newtype.
|
||||
*
|
||||
* \see createCastStmtNode(IdentifierNode *, TypeNode *)
|
||||
* \see deleteCastStmtNode(CastStmtNode *) */
|
||||
typedef struct {
|
||||
IdentifierNode *target; /**< A pointer to the name of the variable whose type is to be changed to \a newtype. */
|
||||
TypeNode *newtype; /**< A pointer to the type to change \a target to. */
|
||||
} CastStmtNode;
|
||||
|
||||
/** Stores a print statement. A print statement prints its arguments to some
|
||||
* output device (by default standard output).
|
||||
*
|
||||
* \see createPrintStmtNode(ExprNodeList *, int)
|
||||
* \see deletePrintStmtNode(PrintStmtNode *) */
|
||||
typedef struct {
|
||||
ExprNodeList *args; /**< A pointer to the list of expressions to evaluate and print. */
|
||||
int nonl; /**< Denotes an ending newline should be surpressed if not \c 0 and printed if \c 0. */
|
||||
} PrintStmtNode;
|
||||
|
||||
/** Stores an input statement. An input statement accepts a line of input from
|
||||
* the use on an input device (by default standard input) and stores it in a
|
||||
* variable.
|
||||
*
|
||||
* \see createInputStmtNode(IdentifierNode *)
|
||||
* \see deleteInputStmtNode(InputStmtNode *) */
|
||||
typedef struct {
|
||||
IdentifierNode *target; /**< A pointer to the name of the variable to store the input in. */
|
||||
} InputStmtNode;
|
||||
|
||||
/** Stores an assignment statement. An assignment statement updates the value
|
||||
* of a variable, \a target, to the result of an expression, \a expr.
|
||||
*
|
||||
* \see createAssignmentStmtNode(IdentifierNode *, ExprNode *)
|
||||
* \see deleteAssignmentStmtNode(AssignmentStmtNode *) */
|
||||
typedef struct {
|
||||
IdentifierNode *target; /**< A pointer to the name of the variable to store the evaluated contents of \a expr into. */
|
||||
ExprNode *expr; /**< A pointer to the expression to evaluate and store in \a target. */
|
||||
} AssignmentStmtNode;
|
||||
|
||||
/** Stores a declaration statement. A declaration statement creates a new
|
||||
* variable named by \a target, optionally initializing it to the evaluated
|
||||
* contents of \a expr. \a scope determines which level of scope the variable
|
||||
* is to be created in.
|
||||
*
|
||||
* \see createDeclarationStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *)
|
||||
* \see deleteDeclarationStmtNode(DeclarationStmtNode *) */
|
||||
typedef struct {
|
||||
IdentifierNode *scope; /**< A pointer to the scope to create the variable in. */
|
||||
IdentifierNode *target; /**< A pointer to the name of the variable to create. */
|
||||
ExprNode *expr; /**< An optional pointer to expression to initialize \a target to. */
|
||||
} DeclarationStmtNode;
|
||||
|
||||
/** Stores an if/then/else statement. A conditional statement checks the value
|
||||
* of the \ref impvar "implicit variable" and executes \a yes if it casts to \c
|
||||
* true. If the value of the \ref impvar "implicit variable" casts to \c
|
||||
* false, each of the optional expressions in \a guards is evaluated and if it
|
||||
* casts to true, the corresponding block in \a blocks is executes. If the
|
||||
* value of the \ref impvar "implicit variable" casts to false \b and all of
|
||||
* the guards cast to false, the contents of \a no is executed.
|
||||
*
|
||||
* \see createIfThenElseStmtNode(BlockNode *, BlockNode *, ExprNodeList *, BlockNodeList *)
|
||||
* \see deleteIfThenElseStmtNode(IfThenElseStmtNode *) */
|
||||
typedef struct {
|
||||
BlockNode *yes; /**< A pointer to the block of code to execute if the \ref impvar "implicit variable" casts to false. */
|
||||
BlockNode *no; /**< A pointer to the block of code to execute if the \ref impvar "implicit variable" casts to false \b and the evaluations of all of the \a guards cast to false. */
|
||||
ExprNodeList *guards; /**< A pointer to the expressions to test if the \ref impvar "implicit variable" casts to false. */
|
||||
BlockNodeList *blocks; /**< A pointer to the respective blocks of code to execute if one of the evaluated \a guards casts to true. */
|
||||
} IfThenElseStmtNode;
|
||||
|
||||
/** Stores a switch statement. A switch statement compares the value of the
|
||||
* \ref impvar "implicit variable" to each of the \a guards and executes the
|
||||
* respective block of code in \a blocks if they match. If no matches are
|
||||
* found between the \ref impvar "implicit variable" and one of the \a guards,
|
||||
* the optional default block of code, \a def, is executed.
|
||||
*
|
||||
* \see createSwitchStmtNode(ExprNodeList *, BlockNodeList *, BlockNode *)
|
||||
* \see deleteSwitchStmtNode(SwitchStmtNode *) */
|
||||
typedef struct {
|
||||
ExprNodeList *guards; /**< A pointer to the expressions to evaluate and compare to the \ref impvar "implicit variable". */
|
||||
BlockNodeList *blocks; /**< A pointer to the respective blocks of code to execute if one of the \a guards matches the \ref impvar "implicit variable". */
|
||||
BlockNode *def; /**< A pointer to the default block of code to execute if none of the \a guards match the \ref impvar "implicit variable". */
|
||||
} SwitchStmtNode;
|
||||
|
||||
/** Stores a return statement. A return statement signals that the current
|
||||
* function is to be returned from with value \a value.
|
||||
*
|
||||
* \see createReturnStmtNode(ExprNode *)
|
||||
* \see deleteReturnStmtNode(ReturnStmtNode *) */
|
||||
typedef struct {
|
||||
ExprNode *value; /**< A pointer to the value to return. */
|
||||
} ReturnStmtNode;
|
||||
|
||||
/** Stores a loop statement. A loop statement repeatedly executes its \a body
|
||||
* while \a guard evaluates to true, executing \a update at the end of each
|
||||
* cycle.
|
||||
*
|
||||
* \see createLoopStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *, ExprNode *, BlockNode *)
|
||||
* \see deleteLoopStmtNode(LoopStmtNode *) */
|
||||
typedef struct {
|
||||
IdentifierNode *name; /**< A pointer to the name of the loop. */
|
||||
IdentifierNode *var; /**< A pointer to the name of the variable to be updated by \a update. */
|
||||
ExprNode *guard; /**< A pointer to the expression to determine if the loop will continue. */
|
||||
ExprNode *update; /**< A pointer to the expression to evaluate to update \a var. */
|
||||
BlockNode *body; /**< A pointer to the block of code to be executed with each iteration of the loop. */
|
||||
} LoopStmtNode;
|
||||
|
||||
/** Stores a cast expression. A cast expression evaluates an expression and
|
||||
* casts it to a particular type.
|
||||
*
|
||||
* \see createCastExprNode(ExprNode *, TypeNode *)
|
||||
* \see deleteCastExprNode(CastExprNode *) */
|
||||
typedef struct {
|
||||
ExprNode *target; /**< A pointer to the expression to cast. */
|
||||
TypeNode *newtype; /**< A pointer to the type to cast the copy of \a target to. */
|
||||
} CastExprNode;
|
||||
|
||||
/** Stores a function call expression. A function call expression evaluates to
|
||||
* the return value of the function defined in \a def called with the arguments
|
||||
* listed in \a args.
|
||||
*
|
||||
* \note \a args is not an ExprNodeList because its arity is known in advance
|
||||
* (because a FuncDefStmtNode for it has presumably been created) and
|
||||
* thus the benefit of an ExprNodeList (easy syntax for adding new elements)
|
||||
* would not be relevant.
|
||||
*
|
||||
* \see createFuncCallExprNode(FuncDefStmtNode *, ExprNodeList *)
|
||||
* \see deleteFuncCallExprNode(FuncCallExprNode *) */
|
||||
typedef struct {
|
||||
FuncDefStmtNode *def; /**< A pointer to the function definition to call. */
|
||||
ExprNodeList *args; /**< A pointer to a list of ExprNode structure arguments to be supplied to the function defined by \a def. */
|
||||
} FuncCallExprNode;
|
||||
|
||||
/** Denotes the type of operation an OpExprNode performs. */
|
||||
typedef enum {
|
||||
OP_ADD, /**< Addition. */
|
||||
OP_SUB, /**< Subtraction. */
|
||||
OP_MULT, /**< Multiplication. */
|
||||
OP_DIV, /**< Division. */
|
||||
OP_MOD, /**< Modulo. */
|
||||
OP_MAX, /**< Maximum. */
|
||||
OP_MIN, /**< Minimum. */
|
||||
|
||||
OP_AND, /**< Logical AND. */
|
||||
OP_OR, /**< Logical OR. */
|
||||
OP_XOR, /**< Logical XOR. */
|
||||
OP_NOT, /**< Logical NOT. */
|
||||
|
||||
OP_EQ, /**< Equality. */
|
||||
OP_NEQ, /**< Inequality. */
|
||||
|
||||
OP_CAT /**< String concatenation. */
|
||||
} OpType;
|
||||
|
||||
/** Stores an operation expression. An operation expression evaluates to
|
||||
* the result of the operation performed on its arguments.
|
||||
*
|
||||
* \see createOpExprNode(OpType, ExprNodeList *)
|
||||
* \see deleteOpExprNode(OpExprNode *) */
|
||||
typedef struct {
|
||||
OpType type; /**< The type of operation to perform on \a args. */
|
||||
ExprNodeList *args; /**< A pointer to the arguments to perform the operation on. */
|
||||
} OpExprNode;
|
||||
|
||||
MainNode *createMainNode(BlockNode *, FunctionTable *);
|
||||
void deleteMainNode(MainNode *);
|
||||
|
||||
BlockNode *createBlockNode(StmtNodeList *);
|
||||
void deleteBlockNode(BlockNode *);
|
||||
BlockNodeList *createBlockNodeList(void);
|
||||
BlockNode *addBlockNode(BlockNodeList *, BlockNode *);
|
||||
void deleteBlockNodeList(BlockNodeList *);
|
||||
|
||||
IdentifierNode *createIdentifierNode(char *, const char *, unsigned int);
|
||||
void deleteIdentifierNode(IdentifierNode *);
|
||||
|
||||
TypeNode *createTypeNode(ConstantType);
|
||||
void deleteTypeNode(TypeNode *);
|
||||
|
||||
StmtNode *createStmtNode(StmtType, void *);
|
||||
void deleteStmtNode(StmtNode *);
|
||||
StmtNodeList *createStmtNodeList(void);
|
||||
StmtNode *addStmtNode(StmtNodeList *, StmtNode *);
|
||||
void deleteStmtNodeList(StmtNodeList *);
|
||||
|
||||
CastStmtNode *createCastStmtNode(IdentifierNode *, TypeNode *);
|
||||
void deleteCastStmtNode(CastStmtNode *);
|
||||
|
||||
PrintStmtNode *createPrintStmtNode(ExprNodeList *, int);
|
||||
void deletePrintStmtNode(PrintStmtNode *);
|
||||
|
||||
InputStmtNode *createInputStmtNode(IdentifierNode *);
|
||||
void deleteInputStmtNode(InputStmtNode *);
|
||||
|
||||
AssignmentStmtNode *createAssignmentStmtNode(IdentifierNode *, ExprNode *);
|
||||
void deleteAssignmentStmtNode(AssignmentStmtNode *);
|
||||
|
||||
DeclarationStmtNode *createDeclarationStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *);
|
||||
void deleteDeclarationStmtNode(DeclarationStmtNode *);
|
||||
|
||||
IfThenElseStmtNode *createIfThenElseStmtNode(BlockNode *, BlockNode *, ExprNodeList *, BlockNodeList *);
|
||||
void deleteIfThenElseStmtNode(IfThenElseStmtNode *);
|
||||
|
||||
SwitchStmtNode *createSwitchStmtNode(ExprNodeList *, BlockNodeList *, BlockNode *);
|
||||
void deleteSwitchStmtNode(SwitchStmtNode *);
|
||||
|
||||
ReturnStmtNode *createReturnStmtNode(ExprNode *);
|
||||
void deleteReturnStmtNode(ReturnStmtNode *);
|
||||
|
||||
LoopStmtNode *createLoopStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *, ExprNode *, BlockNode *);
|
||||
void deleteLoopStmtNode(LoopStmtNode *);
|
||||
|
||||
FuncDefStmtNode *createFuncDefStmtNode(IdentifierNode *, IdentifierNode *, IdentifierNodeList *, BlockNode *);
|
||||
void deleteFuncDefStmtNode(FuncDefStmtNode *);
|
||||
|
||||
ExprNode *createExprNode(ExprType, void *);
|
||||
void deleteExprNode(ExprNode *);
|
||||
ExprNodeList *createExprNodeList(void);
|
||||
ExprNode *addExprNode(ExprNodeList *, ExprNode *);
|
||||
void deleteExprNodeList(ExprNodeList *);
|
||||
|
||||
CastExprNode *createCastExprNode(ExprNode *, TypeNode *);
|
||||
void deleteCastExprNode(CastExprNode *);
|
||||
|
||||
FuncCallExprNode *createFuncCallExprNode(FuncDefStmtNode *, ExprNodeList *);
|
||||
void deleteFuncCallExprNode(FuncCallExprNode *);
|
||||
|
||||
OpExprNode *createOpExprNode(OpType, ExprNodeList *);
|
||||
void deleteOpExprNode(OpExprNode *);
|
||||
|
||||
FunctionTable *createFunctionTable(void);
|
||||
FuncDefStmtNode *addFuncDefStmtNode(FunctionTable *, FuncDefStmtNode *);
|
||||
void deleteFunctionTable(FunctionTable *);
|
||||
FuncDefStmtNode *lookupFuncDefStmtNode(FunctionTable *, const char *);
|
||||
|
||||
int acceptToken(Token ***, TokenType);
|
||||
int peekToken(Token ***, TokenType);
|
||||
int nextToken(Token ***, TokenType);
|
||||
|
||||
void error(const char *, Token **);
|
||||
|
||||
ConstantNode *parseConstantNode(Token ***);
|
||||
TypeNode *parseTypeNode(Token ***);
|
||||
IdentifierNode *parseIdentifierNode(Token ***);
|
||||
ExprNode *parseExprNode(Token ***, FunctionTable *);
|
||||
StmtNode *parseStmtNode(Token ***, FunctionTable *);
|
||||
BlockNode *parseBlockNode(Token ***, FunctionTable *);
|
||||
MainNode *parseMainNode(Token **, FunctionTable *);
|
||||
FunctionTable *setupFunctionTable(Token **);
|
||||
|
||||
ConstantNode *createBooleanConstantNode(int);
|
||||
ConstantNode *createIntegerConstantNode(int);
|
||||
ConstantNode *createFloatConstantNode(float);
|
||||
ConstantNode *createStringConstantNode(char *);
|
||||
void deleteConstantNode(ConstantNode *);
|
||||
|
||||
IdentifierNodeList *createIdentifierNodeList(void);
|
||||
IdentifierNode *addIdentifierNode(IdentifierNodeList *, IdentifierNode *);
|
||||
void deleteIdentifierNodeList(IdentifierNodeList *);
|
||||
|
||||
#endif /* __PARSER_H__ */
|
|
@ -0,0 +1,2 @@
|
|||
HAI 1.2
|
||||
KTHXBYE
|
|
@ -0,0 +1,2 @@
|
|||
This test checks to see whether the bare minimum program--consisting only of a
|
||||
main block--is correctly interpreted and produces no output.
|
|
@ -0,0 +1,3 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem", VISIBLE "ipsum", VISIBLE "dolor", VISIBLE "sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1,4 @@
|
|||
Lorem
|
||||
ipsum
|
||||
dolor
|
||||
sit
|
|
@ -0,0 +1 @@
|
|||
This test checks that commas separate multiple statements on the same line.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem "...
|
||||
"ipsum "...
|
||||
"dolor "...
|
||||
"sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that ellipses followed by a linefeed character (\n) join
|
||||
multiple portions of a statement on separate lines.
|
|
@ -0,0 +1 @@
|
|||
HAI 1.2
VISIBLE "Lorem "...
"ipsum "...
"dolor "...
"sit"
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that ellipses followed by a carriage return character (\r) join
|
||||
multiple portions of a statement on separate lines.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem "...
|
||||
"ipsum "...
|
||||
"dolor "...
|
||||
"sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that ellipses followed by the carriage return and linefeed
|
||||
characters (\r\n) join multiple portions of a statement on separate lines.
|
|
@ -0,0 +1,5 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem ipsum "...
|
||||
|
||||
"dolor sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that ellipses followed by a linefeed (\n) character must not be
|
||||
followed by an additional linefeed character.
|
|
@ -0,0 +1 @@
|
|||
HAI 1.2
VISIBLE "Lorem ipsum "...
"dolor sit"
KTHXBYE
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that ellipses followed by a carriage return (\r) character must
|
||||
not be followed by an additional linefeed character.
|
|
@ -0,0 +1,5 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem ipsum "...
|
||||
|
||||
"dolor sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1,3 @@
|
|||
This test checks that ellipses followed by carriage return (\r) and linefeed
|
||||
(\n) characters must not be followed by additional carriage return and linefeed
|
||||
characters.
|
|
@ -0,0 +1 @@
|
|||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
This test makes sure a program begins with the HAI keyword.
|
|
@ -0,0 +1,2 @@
|
|||
HAI
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
This test makes sure a program beginning without a version generates and error.
|
|
@ -0,0 +1 @@
|
|||
HAI 1.2
|
|
@ -0,0 +1,2 @@
|
|||
This test makes sure a program ending without a KTHXBYE statement generates an
|
||||
error.
|
|
@ -0,0 +1,3 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem ipsum dolor sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit
|
|
@ -0,0 +1 @@
|
|||
This test checks to see whether line indentation is ignored.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem " "ipsum " "dolor " "sit"
|
||||
VISIBLE "Lorem " "ipsum " "dolor " "sit"
|
||||
VISIBLE "Lorem " "ipsum " "dolor " "sit"
|
||||
VISIBLE "Lorem " "ipsum " "dolor " "sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1,4 @@
|
|||
Lorem ipsum dolor sit
|
||||
Lorem ipsum dolor sit
|
||||
Lorem ipsum dolor sit
|
||||
Lorem ipsum dolor sit
|
|
@ -0,0 +1,3 @@
|
|||
This test checks that whitespace in between tokens is handled properly. It
|
||||
tests whitespace only, tabs only, alternating whitespace and tabs, and
|
||||
alternating tabs and whitespace.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem"
|
||||
VISIBLE "ipsum"
|
||||
VISIBLE "dolor"
|
||||
VISIBLE "sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1,4 @@
|
|||
Lorem
|
||||
ipsum
|
||||
dolor
|
||||
sit
|
|
@ -0,0 +1,2 @@
|
|||
This test checks to make sure the linefeed character (\n) indicates the end of a
|
||||
statement.
|
|
@ -0,0 +1 @@
|
|||
HAI 1.2
VISIBLE "Lorem"
VISIBLE "ipsum"
VISIBLE "dolor"
VISIBLE "sit"
KTHXBYE
|
|
@ -0,0 +1,4 @@
|
|||
Lorem
|
||||
ipsum
|
||||
dolor
|
||||
sit
|
|
@ -0,0 +1,2 @@
|
|||
This test checks to make sure the carriage return character (\r) indicates the
|
||||
end of a statement.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem"
|
||||
VISIBLE "ipsum"
|
||||
VISIBLE "dolor"
|
||||
VISIBLE "sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1,4 @@
|
|||
Lorem
|
||||
ipsum
|
||||
dolor
|
||||
sit
|
|
@ -0,0 +1,2 @@
|
|||
This test checks to make sure the carriage return and linefeed characters (\r\n)
|
||||
indicate the end of a statement.
|
|
@ -0,0 +1,12 @@
|
|||
HAI 1.2
|
||||
I HAS A var ITZ 0
|
||||
IM IN YR loop
|
||||
VISIBLE var
|
||||
var R SUM OF var AN 1
|
||||
BOTH SAEM var AN 10
|
||||
O RLY?
|
||||
YA RLY
|
||||
GTFO
|
||||
OIC
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,10 @@
|
|||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
|
@ -0,0 +1 @@
|
|||
This test checks that breaking from loops work correctly.
|
|
@ -0,0 +1,10 @@
|
|||
HAI 1.2
|
||||
IM IN YR loop UPPIN YR var
|
||||
VISIBLE var
|
||||
BOTH SAEM var AN 9
|
||||
O RLY?
|
||||
YA RLY
|
||||
GTFO
|
||||
OIC
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,10 @@
|
|||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that a loop with an incremented temporary variable works
|
||||
correctly.
|
|
@ -0,0 +1,10 @@
|
|||
HAI 1.2
|
||||
IM IN YR loop NERFIN YR var
|
||||
VISIBLE var
|
||||
BOTH SAEM var AN -9
|
||||
O RLY?
|
||||
YA RLY
|
||||
GTFO
|
||||
OIC
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,10 @@
|
|||
0
|
||||
-1
|
||||
-2
|
||||
-3
|
||||
-4
|
||||
-5
|
||||
-6
|
||||
-7
|
||||
-8
|
||||
-9
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that a loop with a decremented temporary variable works
|
||||
correctly.
|
|
@ -0,0 +1,5 @@
|
|||
HAI 1.2
|
||||
IM IN YR loop UPPIN YR var TIL BOTH SAEM var AN 10
|
||||
VISIBLE var
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,10 @@
|
|||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
|
@ -0,0 +1 @@
|
|||
This test checks that loops with an "until" ending condition work correctly.
|
|
@ -0,0 +1,5 @@
|
|||
HAI 1.2
|
||||
IM IN YR loop NERFIN YR var WILE DIFFRINT var AN -10
|
||||
VISIBLE var
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,10 @@
|
|||
0
|
||||
-1
|
||||
-2
|
||||
-3
|
||||
-4
|
||||
-5
|
||||
-6
|
||||
-7
|
||||
-8
|
||||
-9
|
|
@ -0,0 +1 @@
|
|||
This test checks that loops with a "while" ending condition work correctly.
|
|
@ -0,0 +1,14 @@
|
|||
HAI 1.2
|
||||
HOW DUZ I plustwoin YR var
|
||||
FOUND YR SUM OF var AN 2
|
||||
IF U SAY SO
|
||||
|
||||
IM IN YR loop plustwoin YR var
|
||||
VISIBLE var
|
||||
BOTH SAEM var AN 10
|
||||
O RLY?
|
||||
YA RLY
|
||||
GTFO
|
||||
OIC
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,6 @@
|
|||
0
|
||||
2
|
||||
4
|
||||
6
|
||||
8
|
||||
10
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that a loop with an arbitrary unary function applied to a
|
||||
temporary variable works correctly.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "a"
|
||||
IM IN YR loop UPPIN YR var TIL BOTH SAEM var AN 10
|
||||
IM OUTTA YR loop
|
||||
VISIBLE "b"
|
||||
KTHXBYE
|
|
@ -0,0 +1,2 @@
|
|||
a
|
||||
b
|
|
@ -0,0 +1 @@
|
|||
This test makes sure an empty loop is allowed.
|
|
@ -0,0 +1,4 @@
|
|||
HAI 1.2
|
||||
IM IN YR loop TIL WIN
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,2 @@
|
|||
This test makes sure that an error occurs if an "until" stopping condition is
|
||||
used without a temporary loop variable.
|
|
@ -0,0 +1,4 @@
|
|||
HAI 1.2
|
||||
IM IN YR loop WILE FAIL
|
||||
IM OUTTA YR loop
|
||||
KTHXBYE
|
|
@ -0,0 +1,2 @@
|
|||
This test makes sure that an error occurs if a "while" stopping condition is
|
||||
used without a temporary loop variable.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem "…
|
||||
"ipsum "…
|
||||
"dolor "…
|
||||
"sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit
|
|
@ -0,0 +1,3 @@
|
|||
This test checks that the Unicode ellipses character (U+2026) allows line
|
||||
continuation when followed by a newline character (\n). This test is encoded
|
||||
in UTF-8 format with no BOM.
|
|
@ -0,0 +1 @@
|
|||
HAI 1.2
VISIBLE "Lorem "…
"ipsum "…
"dolor "…
"sit"
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit
|
|
@ -0,0 +1,3 @@
|
|||
This test checks that the Unicode ellipses character (U+2026) allows line
|
||||
continuation when followed by a carriage return character (\r). This test is
|
||||
encoded in UTF-8 format with no BOM.
|
|
@ -0,0 +1,6 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem "…
|
||||
"ipsum "…
|
||||
"dolor "…
|
||||
"sit"
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem ipsum dolor sit
|
|
@ -0,0 +1,3 @@
|
|||
This test checks that the Unicode ellipses character (U+2026) allows line
|
||||
continuation when followed by a carriage return character (\r) and a newline
|
||||
character (\n). This test is encoded in UTF-8 format with no BOM.
|
|
@ -0,0 +1,3 @@
|
|||
HAI 1.2
|
||||
VISIBLE "ʇıs ɹoʃop ɯnsdı ɯǝɹo⅂"
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
ʇıs ɹoʃop ɯnsdı ɯǝɹo⅂
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that Unicode strings are handled correctly. This test is
|
||||
encoded in UTF-8 format with no BOM.
|
|
@ -0,0 +1,3 @@
|
|||
HAI 1.2
|
||||
VISIBLE "Lorem :(0024) ipsum :(00A2) dolor :(20AC) sit :(024B62)"
|
||||
KTHXBYE
|
|
@ -0,0 +1 @@
|
|||
Lorem $ ipsum ¢ dolor € sit 𤭢
|
|
@ -0,0 +1,2 @@
|
|||
This test checks that when Unicode code points are inserted into strings, they
|
||||
are translated correctly.
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue