Initial project import

This commit is contained in:
Justin J. Meza 2010-08-09 00:01:59 -07:00
commit 2e14ac0b3f
888 changed files with 50478 additions and 0 deletions

1519
Doxyfile Normal file

File diff suppressed because it is too large Load Diff

42
Makefile Normal file
View File

@ -0,0 +1,42 @@
TARGET = lci
LIBS = -lm
OBJS = lexer.o tokenizer.o parser.o interpreter.o unicode.o main.o
SRCS = lexer.c tokenizer.c parser.c interpreter.c unicode.c main.c
INSTALL = /usr/local/bin/install -c
CPPFLAGS = -O2
prefix = /usr/local
bindir = $(prefix)/bin
testdir = ./test
all: $(TARGET)
$(TARGET): $(OBJS) $(LIBS)
$(CC) $(CPPFLAGS) -o $(TARGET) $(OBJS) $(LIBS)
pedantic: $(OBJS) $(LIBS)
$(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -W -Wall -ansi -pedantic -g -o $(TARGET) $(OBJS) $(LIBS)
check: all
@cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.2-Tests/
check-mem: all
@echo "This will take a long time! Be patient!"
@cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.2-Tests/
install: all
$(INSTALL) $(TARGET) $(bindir)/$(TARGET)
TAGS: $(SRCS)
ctags $(SRCS)
docs: Doxyfile $(SRCS)
doxygen
clean:
-rm -f $(OBJS)
-rm -f $(TARGET)
distclean: clean
-rm -f tags
-rm -rf html

57
README Normal file
View File

@ -0,0 +1,57 @@
lci - a LOLCODE interpreter written in C
LICENSE
Copyright (C) 2010 Justin J. Meza
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
ABOUT
lci is a LOLCODE interpreter written in C and is designed to be correct,
portable, fast, and precisely documented.
* correct: Every effort has been made to test lci's conformance to the
LOLCODE language specification. Unit tests come packaged with the lci
source code.
* portable: lci follows the widely ported ANSI C specification allowing it
to compile on a broad range of systems.
* fast: Much effort has gone into producing simple and efficient code
whenever possible to the extent that the above points are not
compromized.
* precisely documented: lci uses Doxygen to generate literate code
documentation, browsable here.
This project's homepage is at http://icanhaslolcode.org. For help, visit
http://groups.google.com/group/lci-general. To report a bug, go to
http://github.com/justinmeza/lci/issues.
Created and maintained by Justin J. Meza <justin.meza@gmail.com>.
INSTALLATION
To install lci, you should be able to
$ make && make check
and, assuming no errors are encountered (if there are, please let us know at
http://groups.google.com/group/lci-general) go ahead and run (with administrator
privileges)
# make install
Optionally, you may want to make documentation for lci. This requires the
doxygen program. To do so, do
$ make docs

3298
interpreter.c Normal file

File diff suppressed because it is too large Load Diff

202
interpreter.h Normal file
View File

@ -0,0 +1,202 @@
/** Structures and functions for interpreting a parse tree. The interpreter
* traverses a parse tree in a depth-first manner, interpreting each node it
* reaches along the way. This is the last stage of the processing of a source
* code file.
*
* \file interpreter.h
*
* \author Justin J. Meza
*
* \date 2010 */
#ifndef __INTERPRETER_H__
#define __INTERPRETER_H__
#include <stdio.h>
#include <ctype.h>
#include <math.h>
#include "parser.h"
#include "unicode.h"
/** Gets the integer data associated with a ValueObject structure. */
#define getInteger(value) (value->data.i)
/** Gets the floating point data associated with a ValueObject structure. */
#define getFloat(value) (value->data.f)
/** Gets the string data associated with a ValueObject structure. */
#define getString(value) (value->data.s)
/** Denotes the type of a value. */
typedef enum {
VT_INTEGER, /**< An integer value. */
VT_FLOAT, /**< A floating point decimal value. */
VT_BOOLEAN, /**< A true/false value. */
VT_STRING, /**< A character string value. */
VT_NIL /**< Represents no value. */
} ValueType;
/** Stores the data associated with a ValueObject structure. */
typedef union {
int i; /**< Integer data. */
float f; /**< Floating point data. */
char *s; /**< Character string data. */
} ValueData;
/** Increments the semaphore of a ValueObject structure. */
#define V(value) (value->semaphore++)
/** Decrements the semaphore of a ValueObject structure. */
#define P(value) (value->semaphore--)
/** Stores a value.
*
* \see copyValueObject(ValueObject *)
* \see deleteValueObject(ValueObject *) */
typedef struct {
ValueType type; /**< The type of value stored. */
ValueData data; /**< The stored data. */
unsigned short semaphore; /**< A semaphore for value usage. */
} ValueObject;
/** Denotes the type of return encountered. */
typedef enum {
RT_DEFAULT, /**< A block of code returned after evaluating all of its statements. */
RT_BREAK, /**< A block of code within a LoopStmtNode or SwitchStmtNode returned via a break statement. */
RT_RETURN /**< A block of code within a FuncDefStmtNode called by a FuncCallExprNode returned (either with or without a value). */
} ReturnType;
/** Stores a return state. Returns are encountered when
* - a block of code evaluates all of its statements,
* - a block of code within a LoopStmt or SwitchStmt encountered a break statement, or
* - a block of code within a FunctionDefStmt called by a FunctionCallExpr encounters a ReturnStmt. */
typedef struct {
ReturnType type; /**< The type of return encountered. */
ValueObject *value; /**< The optional return value. */
} ReturnObject;
/** Stores the variables in a particular scope. Scopes are arranged
* heirarchically from global (the ancestor of all other scopes) to local (the
* temporary scope of a BlockNode).
*
* \see createScopeObject(ScopeObject *)
* \see deleteScopeObject(ScopeObject *) */
typedef struct scopeobject {
struct scopeobject *parent; /**< A pointer to the parent ScopeObject. */
ValueObject *impvar; /**< A pointer to the ValueObject representing the implicit variable for this scope. */
unsigned int numvals; /**< The number of ValueObject structures in \a values. */
IdentifierNode **names; /**< A pointer to the IdentifierNode structures naming the values in the scope. */
ValueObject **values; /**< A pointer to an array of ValueObject structures in the scope. */
} ScopeObject;
char *createString(char *);
ValueObject *createNilValueObject(void);
ValueObject *createBooleanValueObject(int);
ValueObject *createIntegerValueObject(int);
ValueObject *createFloatValueObject(float);
ValueObject *createStringValueObject(char *);
ValueObject *copyValueObject(ValueObject *);
void deleteValueObject(ValueObject *);
ReturnObject *createReturnObject(ReturnType, ValueObject *);
void deleteReturnObject(ReturnObject *);
ScopeObject *createScopeObject(ScopeObject *);
void deleteScopeObject(ScopeObject *);
ValueObject *getScopeValue(ScopeObject *, IdentifierNode *);
ValueObject *getLocalScopeValue(ScopeObject *, IdentifierNode *);
ValueObject *createScopeValue(ScopeObject *, IdentifierNode *);
ValueObject *updateScopeValue(ScopeObject *, IdentifierNode *, ValueObject *);
unsigned int isNumString(const char *);
unsigned int isHexString(const char *);
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *);
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *);
ValueObject *castStringExplicit(ValueObject *, ScopeObject *);
ValueObject *castBooleanImplicit(ValueObject *, ScopeObject *);
ValueObject *castIntegerImplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatImplicit(ValueObject *, ScopeObject *);
ValueObject *castStringImplicit(ValueObject *, ScopeObject *);
ValueObject *interpretExprNode(ExprNode *, ScopeObject *);
ReturnObject *interpretStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretStmtNodeList(StmtNodeList *, ScopeObject *);
ReturnObject *interpretBlockNode(BlockNode *, ScopeObject *);
int interpretMainNode(MainNode *);
ValueObject *interpretImpVarExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretCastExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretFuncCallExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretIdentifierExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretConstantExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretNotOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretArithOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretBoolOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretEqualityOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretConcatOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretOpExprNode(ExprNode *, ScopeObject *);
ReturnObject *interpretCastStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretPrintStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretInputStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretAssignmentStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretDeclarationStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretIfThenElseStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretSwitchStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretBreakStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretReturnStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretLoopStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretFuncDefStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretExprStmtNode(StmtNode *, ScopeObject *);
ValueObject *opAddIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opSubIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMultIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opDivIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMaxIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMinIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opModIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opAddIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opSubIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMultIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opDivIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMaxIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMinIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opModIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opAddFloatInteger(ValueObject *, ValueObject *);
ValueObject *opSubFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMultFloatInteger(ValueObject *, ValueObject *);
ValueObject *opDivFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMaxFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMinFloatInteger(ValueObject *, ValueObject *);
ValueObject *opModFloatInteger(ValueObject *, ValueObject *);
ValueObject *opAddFloatFloat(ValueObject *, ValueObject *);
ValueObject *opSubFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMultFloatFloat(ValueObject *, ValueObject *);
ValueObject *opDivFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMaxFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMinFloatFloat(ValueObject *, ValueObject *);
ValueObject *opModFloatFloat(ValueObject *, ValueObject *);
ValueObject *opEqBooleanBoolean(ValueObject *, ValueObject *);
ValueObject *opNeqBooleanBoolean(ValueObject *, ValueObject *);
ValueObject *opEqIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opNeqIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opEqIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opNeqIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opEqFloatInteger(ValueObject *, ValueObject *);
ValueObject *opNeqFloatInteger(ValueObject *, ValueObject *);
ValueObject *opEqFloatFloat(ValueObject *, ValueObject *);
ValueObject *opNeqFloatFloat(ValueObject *, ValueObject *);
ValueObject *opEqStringString(ValueObject *, ValueObject *);
ValueObject *opNeqStringString(ValueObject *, ValueObject *);
ValueObject *opEqNilNil(ValueObject *, ValueObject *);
ValueObject *opNeqNilNil(ValueObject *, ValueObject *);
#endif /* __INTERPRETER_H__ */

309
lexer.c Normal file
View File

@ -0,0 +1,309 @@
#include "lexer.h"
/** Creates a Lexeme structure.
*
* \return A pointer to a Lexeme structure with the desired properties.
*
* \retval NULL malloc was unable to allocate memory.
*
* \see deleteLexeme(Lexeme *) */
Lexeme *createLexeme(char *image, /**< [in] An array of characters that describe the lexeme. */
const char *fname, /**< [in] A pointer to the name of the file containing the lexeme. */
unsigned int line) /**< [in] The line number from the source file that the lexeme occurred on. */
{
Lexeme *ret = malloc(sizeof(Lexeme));
if (!ret) {
perror("malloc");
return NULL;
}
ret->image = malloc(sizeof(char) * (strlen(image) + 1));
if (!(ret->image)) {
free(ret);
perror("malloc");
return NULL;
}
strcpy(ret->image, image);
/** \note fname is not copied because it would only one copy is stored
* for all Lexeme structures that share it. */
ret->fname = fname;
ret->line = line;
#ifdef DEBUG
fprintf(stderr, "Creating lexeme [%s]\n", image);
#endif
return ret;
}
/** Deletes a Lexeme structure.
*
* \pre \a lexeme points to a Lexeme structure created by createLexeme(char *, const char *, unsigned int).
*
* \post The memory at \a lexeme and all of its elements will be freed.
*
* \see createLexeme(char *, const char *, unsigned int) */
void deleteLexeme(Lexeme *lexeme)
{
if (!lexeme) return;
free(lexeme->image);
/** \note We do not free (*lex)->fname because it is shared between many
* Lexeme structures and is free'd by whoever created them. */
free(lexeme);
}
/** Creates a LexemeList structure.
*
* \return A pointer to a LexemeList structure with the desired properties.
*
* \retval NULL malloc was unable to allocate memory.
*
* \see deleteLexemeList(LexemeList *) */
LexemeList *createLexemeList(void)
{
LexemeList *p = malloc(sizeof(LexemeList));
if (!p) {
perror("malloc");
return NULL;
}
p->num = 0;
p->lexemes = NULL;
return p;
}
/** Adds a Lexeme structure to a LexemeList structure.
*
* \pre \a list was created by createLexemeList(void).
* \pre \a lexeme was created by createLexeme(char *, const char *, unsigned int).
*
* \post \a lexeme will be added on to the end of \a list and the size of
* \a list will be updated accordingly.
*
* \return A pointer to the added Lexeme structure (will be the same as
* \a lexeme).
*
* \retval NULL realloc was unable to allocate memory. */
Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList structure to add \a lex to. */
Lexeme *lexeme) /**< [in] A pointer to the Lexeme structure to add to \a list. */
{
unsigned int newsize;
void *mem = NULL;
if (!list) return NULL;
newsize = list->num + 1;
mem = realloc(list->lexemes, sizeof(Lexeme *) * newsize);
if (!mem) {
perror("realloc");
return NULL;
}
list->lexemes = mem;
list->lexemes[list->num] = lexeme;
list->num = newsize;
return lexeme;
}
/** Deletes a LexemeList structure.
*
* \pre \a list was created by createLexemeList(void) and contains
* items added by addLexeme(LexemeList *, Lexeme *).
*
* \post The memory at \a list and any of its associated members will be
* freed.
*
* \see createLexemeList(void) */
void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeList structure to delete. */
{
unsigned int n;
if (!list) return;
for (n = 0; n < list->num; n++)
deleteLexeme(list->lexemes[n]);
free(list->lexemes);
free(list);
}
/** Scans through a character buffer, removing unecessary characters and
* generating lexemes. Lexemes are separated by whitespace (but newline
* characters are kept as their own lexeme). String literals are handled a
* bit differently: starting at the first quotation character, characters are
* collected until either an unescaped quotation character is read (that is, a
* quotation character not preceeded by a colon which itself is not proceeded
* by a colon) or a newline or carriage return character is read, whichever
* comes first. This handles the odd case of strings such as "::" which print
* out a single colon. Also handled are the effects of commas, ellipses, and
* bangs (!).
*
* \pre \a size is the number of characters starting at the memory location
* pointed to by \a buffer.
*
* \return A pointer to a LexemeList structure. */
LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to tokenize. */
unsigned int size, /**< [in] The number of characters in \a buffer. */
const char *fname) /**< [in] An array of characters representing the name of the file used to read \a buffer. */
{
const char *start = buffer;
LexemeList *list = NULL;
unsigned int line = 1;
list = createLexemeList();
if (!list) return NULL;
while (start < buffer + size) {
char *temp = NULL;
unsigned int len = 1;
/* Comma (,) is a soft newline */
if (*start == ',') {
Lexeme *lex = createLexeme("\n", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
}
if (!addLexeme(list, lex)) {
deleteLexeme(lex);
deleteLexemeList(list);
return NULL;
}
start++;
continue;
}
/* Bang (!) is its own lexeme */
if (*start == '!') {
Lexeme *lex = createLexeme("!", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
}
if (!addLexeme(list, lex)) {
deleteLexeme(lex);
deleteLexemeList(list);
return NULL;
}
start++;
continue;
}
/* Skip over leading whitespace */
while (isspace(*start)) {
unsigned int newline = 0;
/* Newline is its own lexeme */
if (!strncmp(start, "\r\n", 2)) {
newline = 1;
start++;
}
else if (*start == '\r' || *start == '\n') {
newline = 1;
}
if (newline) {
Lexeme *lex = createLexeme("\n", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
}
if (!addLexeme(list, lex)) {
deleteLexeme(lex);
deleteLexemeList(list);
return NULL;
}
line++;
}
start++;
continue;
}
/* Skip over ellipses (...) and newline */
if ((!strncmp(start, "\xE2\x80\xA6\r\n", 5) && (start += 5))
|| (!strncmp(start, "\xE2\x80\xA6\r", 4) && (start += 4))
|| (!strncmp(start, "\xE2\x80\xA6\n", 4) && (start += 4))
|| (!strncmp(start, "...\r\n", 5) && (start += 5))
|| (!strncmp(start, "...\r", 4) && (start += 4))
|| (!strncmp(start, "...\n", 4) && (start += 4))) {
const char *test = start;
/* Make sure next line is not empty */
while (*test && isspace(*test)) {
if (*test == '\r' || *test == '\n') {
fprintf(stderr, "%s:%d: a line with continuation may not be followed by an empty line\n", fname, line);
deleteLexemeList(list);
return NULL;
}
test++;
}
continue;
}
/* Skip over comments */
if ((list->num == 0
|| *(list->lexemes[list->num - 1]->image) == '\n')
&& !strncmp(start, "OBTW", 4)) {
start += 4;
while (strncmp(start, "TLDR", 4)) {
if ((!strncmp(start, "\r\n", 2) && (start += 2))
|| (*start == '\r' && start++)
|| (*start == '\n' && start++))
line++;
else
start++;
}
start += 4;
/* Must end in newline */
while (*start && isspace(*start) && *start != '\r' && *start != '\n')
start++;
if (start == buffer || *start == ',' || *start == '\r' || *start == '\n')
continue;
fprintf(stderr, "%s:%d: multiple line comment may not appear on the same line as code\n", fname, line);
deleteLexemeList(list);
return NULL;
}
if (!strncmp(start, "BTW", 3)) {
start += 3;
while (*start && *start != '\r' && *start != '\n')
start++;
continue;
}
/* We have removed or processed any leading characters at this
* point */
if (!*start) break;
if (*start == '"') {
/* Find the end of the string, watching for escape
* sequences */
while ((start[len]
&& *(start + len) != '\r'
&& *(start + len) != '\n'
&& *(start + len) != '"')
|| (*(start + len - 1) == ':'
&& *(start + len - 2) != '"'))
len++;
if (*(start + len) == '"') len++;
}
/* Scan for the end of the token */
while (start[len] && !isspace(start[len])
&& *(start + len) != ','
&& *(start + len) != '!'
&& strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3))
len++;
temp = malloc(sizeof(char) * (len + 1));
if (!temp) {
perror("malloc");
deleteLexemeList(list);
return NULL;
}
strncpy(temp, start, len);
temp[len] = '\0';
Lexeme *lex = createLexeme(temp, fname, line);
if (!lex) {
free(temp);
deleteLexemeList(list);
return NULL;
}
if (!addLexeme(list, lex)) {
free(temp);
deleteLexeme(lex);
deleteLexemeList(list);
return NULL;
}
free(temp);
start += len;
}
/* Create an end-of-file lexeme */
Lexeme *lex = createLexeme("$", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
}
if (!addLexeme(list, lex)) {
deleteLexeme(lex);
deleteLexemeList(list);
return NULL;
}
return list;
}

55
lexer.h Normal file
View File

@ -0,0 +1,55 @@
/** Structures and functions for separating a character buffer into lexemes. The
* lexer reads through a buffer of characters (themselves typically read from
* standard input), strips whitespace, and breaks them up into logical atoms of
* character strings which, in turn, may be passed on to later processes (such
* as a tokenizer).
*
* \file lexer.h
*
* \author Justin J. Meza
*
* \date 2010 */
#ifndef __LEXER_H__
#define __LEXER_H__
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#undef DEBUG
/** Stores a lexeme. A lexeme is the smallest unit of contiguous characters,
* namely, it has been stripped of surrounding whitespace.
*
* \note This structure does not have any list structure to hold groups of it.
* Instead, pointers to arrays of these structures are employed to allow
* for easier tokenizing.
*
* \see createLexeme(char *, unsigned int) */
typedef struct {
char *image; /**< An array of characters that describe the lexeme. */
const char *fname; /**< A pointer to the name of the file containing the lexeme. */
unsigned int line; /**< The line number from the source file that the lexeme occurred on. */
} Lexeme;
/** Stores a list of lexemes. This structure allows sets of lexemes to be
* grouped together.
*
* \see createLexemeList(void)
* \see addLexeme(LexemeList *, Lexeme *)
* \see deleteLexemeList(LexemeList *) */
typedef struct {
unsigned int num; /**< The number of Lexeme structures stored. */
Lexeme **lexemes; /**< A pointer to the array of Lexeme structures. */
} LexemeList;
Lexeme *createLexeme(char *, const char *, unsigned int);
void deleteLexeme(Lexeme *);
LexemeList *createLexemeList(void);
Lexeme *addLexeme(LexemeList *, Lexeme*);
void deleteLexemeList(LexemeList *);
LexemeList *scanBuffer(const char *, unsigned int, const char *);
#endif /* __LEXER_H__ */

207
main.c Normal file
View File

@ -0,0 +1,207 @@
/** \mainpage lci Documentation
*
* \section license License
*
* lci - a LOLCODE interpreter written in C.
* Copyright (C) 2010 Justin J. Meza
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* \section maintainer Maintainer
*
* The lead maintainer for this project is Justin J. Meza (justin.meza@gmail.com).
* For more information, check this project's webpage at http://icanhaslolcode.org .
*
* \section about About
*
* lci is a LOLCODE interpreter written in C and is designed to be correct,
* portable, fast, and precisely documented.
*
* - correct: Every effort has been made to test lci's conformance to the
* LOLCODE language specification. Unit tests come packaged with
* the lci source code.
* - portable: lci follows the widely ported ANSI C specification allowing it
* to compile on a broad range of systems.
* - fast: Much effort has gone into producing simple and efficient code
* whenever possible to the extent that the above points are not
* compromized.
* - precisely documented: lci uses Doxygen to generate literate code
* documentation, browsable here.
*
* \section organization Organization
*
* lci employs several different modules which each perform a specific task
* during interpretation of code:
*
* - \b lexer (lexer.c, lexer.h)- The lexer takes an array of characters and
* splits it up into individual \e lexemes. Lexemes are divided by
* whitespace and other rules of the language.
* - \b tokenizer (tokenizer.c, tokenizer.h) - The tokenizer takes the
* output of the lexer and converts it into individual \e tokens. Tokens
* are different from lexemes in that a single token may be made up of
* multiple lexemes. Also, the contents of some tokens are evaluated (such
* as integers and floats) for later use.
* - \b parser (parser.c, parser.h) - The parser takes the output of the
* tokenizer and analyzes it semantically to turn it into a parse tree.
* - \b interpreter (interpreter.c, interpreter.h) - The interpreter takes
* the output of the parser and executes it.
*
* Each of these modules is contained within its own C header and source code
* files of the same name.
*
* To handle the conversion of Unicode code points and normative names to
* bytes, two additional files, unicode.c and unicode.h are used.
*
* Finally, main.c ties all of these modules together and handles the initial
* loading of input data for the lexer. */
/** \page varscope Variable Scope
*
* The specification states that variables are local to the scope of the main
* block or any function they are contained within--except for temporary loop
* variables which are local to the loop they are instantiated within. This
* behavior, combined with the fact that variables must be declared before
* being used, means that variables may not be shadowed in different control
* scopes (such as loops and conditional statements) and, more importantly,
* programmers must keep track of whether variables have been previously
* declared within conditionally executed code (for example, under this
* scoping if a variable is declared in a conditional block it cannot be
* safely used in later code).
*
* One advantage of a flat scoping scheme is that nearly everything can be
* stored in a single structure, making lookups faster. However, I believe
* that this advantage is not worth the extra frustration transferred to the
* programmer and so scoping in lci is done in a similar manner to other
* programming languages, to wit, within
* - the main block of code,
* - the body of functions,
* - the body of loop statements, and
* - the bodies of conditional statements.
*
* This should alleviate any confusion which may have been caused by using a
* completely local free-for-all scope. Also, there seems to be a general
* consensus on the LOLCODE forums that this is the way to go and flat scoping
* causes too many problems for the programmer. */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "lexer.h"
#include "tokenizer.h"
#include "parser.h"
#include "interpreter.h"
#define READSIZE 512
char *getFileArg(int argc, char **argv, char *vals) {
int n;
for (n = 1; n < argc; n++) {
if (argv[n][0] == '-' && argv[n][1]) {
int c;
for (c = 0; vals[c] != '\0'; c++) {
if (argv[n][1] == vals[c]) {
n++;
break;
}
}
continue;
}
else return argv[n];
}
return NULL;
}
int main(int argc, char **argv)
{
long size = 0;
long length = 0;
char *buffer = NULL;
LexemeList *lexemes = NULL;
Token **tokens = NULL;
FunctionTable *functab = NULL;
MainNode *node = NULL;
char *fname = NULL;
FILE *file = NULL;
fname = getFileArg(argc, argv, "");
if (fname == NULL || fname[0] == '-') {
fname = "stdin";
file = stdin;
}
else {
file = fopen(fname, "r");
}
if (!file) {
fprintf(stderr, "File does not exist.\n");
return 1;
}
while (!feof(file)) {
size += READSIZE;
buffer = realloc(buffer, sizeof(char) * size);
length += fread((buffer + size) - READSIZE, 1, READSIZE, file);
}
fclose(file);
if (!buffer) return 1;
buffer[length] = '\0';
/* Remove hash bang line if run as a standalone script */
if (buffer[0] == '#' && buffer[1] == '!') {
unsigned int n;
for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
buffer[n] = ' ';
}
/* Remove UTF-8 BOM if present and add it to the output stream (we
* assume here that if a BOM is present, the system will also expect
* the output to include a BOM). */
if (buffer[0] == (char)0xef
|| buffer[1] == (char)0xbb
|| buffer[2] == (char)0xbf) {
buffer[0] = ' ';
buffer[1] = ' ';
buffer[2] = ' ';
printf("%c%c%c", 0xef, 0xbb, 0xbf);
}
/* Begin main pipeline */
if (!(lexemes = scanBuffer(buffer, length, fname))) {
free(buffer);
return 1;
}
free(buffer);
if (!(tokens = tokenizeLexemes(lexemes))) {
deleteLexemeList(lexemes);
return 1;
}
deleteLexemeList(lexemes);
if (!(functab = setupFunctionTable(tokens))) {
deleteFunctionTable(functab);
return 1;
}
if (!(node = parseMainNode(tokens, functab))) {
deleteFunctionTable(functab);
deleteTokens(tokens);
return 1;
}
deleteTokens(tokens);
if (interpretMainNode(node)) {
deleteMainNode(node);
return 1;
}
deleteMainNode(node);
/* End main pipeline */
return 0;
}

3075
parser.c Normal file

File diff suppressed because it is too large Load Diff

618
parser.h Normal file
View File

@ -0,0 +1,618 @@
/** Structures and functions for parsing tokens into a parse tree. The parser
* reads through a series of tokens (generated by the tokenizer) and adds
* semantic meaning to them by forming them into a parse tree which can, in
* turn, be passed on to later processes (such as an interpreter).
*
* \file parser.h
*
* \author Justin J. Meza
*
* \date 2010 */
/** \page impvar The Implicit Variable
*
* The implicit variable in LOLCODE is denoted by the keyword \c IT and stores
* a copy of the result of the most recently evaluated expression statement,
* that is, an expression all by itself on a line. (See
* http://lolcode.com/specs/1.2#conditionals for an example.) */
/** \page lolebnf The LOLCODE EBNF
* Presented below is the EBNF (see http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form)
* for LOLCODE that \c lci parses. Note that by this stage, the scanner has:
* - already removed any whitespace between tokens,
* - added in and truncated newline tokens at logical line breaks, and
* - added an end-of-file (\c $) token.
*
* \section progebnf Program Structure
*
* These production rules dictate the overall form of the program.
*
* \par
* MainNode ::= \c TT_HAI \a version \c TT_NEWLINE BlockNode \c $
*
* \par
* BlockNode ::= StmtNode *
*
* \section stmtebnf Statements
*
* These production rules specify some general types of parse structures.
*
* \par
* ConstantNode ::= Boolean | Integer | Float | String
*
* \par
* IdentifierNode ::= Identifier
*
* \par
* TypeNode ::= \c TT_NOOB | \c TT_TROOF | \c TT_NUMBR | \c TT_NUMBAR | \c TT_YARN
*
* \section stmtebnf Statements
*
* These production rules specify the types of statements formed.
*
* \par
* StmtNode ::= CastStmtNode | PrintStmtNode | InputStmtNode | AssignmentStmtNode | DeclarationStmtNode | IfThenElseStmtNode | SwitchStmtNode | BreakStmt | ReturnStmtNode | LoopStmtNode | FuncDefStmtNode | ExprStmt
*
* \par
* CastStmtNode ::= IdentifierNode \c TT_ISNOWA TypeNode \c TT_NEWLINE
*
* \par
* PrintStmtNode ::= \c TT_VISIBLE ExprNodeList \c [ \c TT_BANG ] TT_NEWLINE
*
* \par
* InputStmtNode ::= \c TT_GIMMEH IdentifierNode TT_NEWLINE
*
* \par
* AssignmentStmtNode ::= IdentifierNode \c TT_R ExprNode \c TT_NEWLINE
*
* \par
* DeclarationStmtNode ::= IdentifierNode \c TT_HASA IdentifierNode [ Initialization ] \c TT_NEWLINE
*
* \par
* Initialization ::= \c TT_ITZ ExprNode
*
* \par
* IfThenElseStmtNode ::= \c TT_ORLY \c TT_NEWLINE \c TT_YARLY \c TT_NEWLINE BlockNode ElseIf * [ Else ] \c TT_OIC \c TT_NEWLINE
*
* \par
* ElseIf ::= \c TT_MEBBE ExprNode \c TT_NEWLINE BlockNode
*
* \par
* Else ::= \c TT_NOWAI \c TT_NEWLINE BlockNode
*
* \par
* SwitchStmtNode ::= \c TT_WTF \c TT_NEWLINE Case + [ DefaultCase ] \c TT_OIC \c TT_NEWLINE
*
* \par
* Case ::= \c TT_OMG ExprNode \c TT_NEWLINE BlockNode
*
* \par
* DefaultCase ::= \c TT_OMGWTF \c TT_NEWLINE BlockNode
*
* \par
* BreakStmt ::= \c TT_GTFO \c TT_NEWLINE
*
* \par
* ReturnStmtNode ::= \c TT_FOUNDYR ExprNode \c TT_NEWLINE
*
* \par
* LoopStmtNode ::= \c TT_IMINYR IdentifierNode [ LoopUpdate ] [ LoopGuard ] \c TT_NEWLINE \c TT_IMOUTTAYR IdentifierNode \c TT_NEWLINE
*
* \par
* LoopUpdate ::= LoopUpdateOp \c TT_YR IdentifierNode
*
* \par
* LoopUpdateOp ::= \c TT_UPPIN | \c TT_NERFIN | UnaryFunction
*
* \par
* UnaryFunction ::= The name of a previously defined unary function.
*
* \par
* LoopGuard ::= \c TT_TIL ExprNode | \c TT_WILE ExprNode
*
* \par
* FuncDefStmtNode ::= \c TT_HOWDUZ IdentifierNode IdentifierNode [ FunctionDefArgs ] \c TT_NEWLINE BlockNode \c TT_IFUSAYSO \c TT_NEWLINE
*
* \par
* FunctionDefArgs ::= \c TT_YR IdentifierNode FunctionDefArg *
*
* \par
* FunctionDefArg ::= \c TT_ANYR IdentifierNode
*
* \par
* ExprStmt ::= ExprNode \c TT_NEWLINE
*
* \section exprebnf Expressions
*
* These production rules specify the types of expressions formed.
*
* \par
* ExprNode ::= CastExprNode | ConstantNode | IdentifierNode | FuncCallExprNode | OpExprNode | ImplicitVariable
*
* \par
* CastExprNode ::= \c TT_MAEK ExprNode \c TT_A TypeNode
*
* \par
* FuncCallExprNode ::= IdentifierNode
*
* \par
* OpExprNode ::= UnaryOp | BinaryOp | NaryOp
*
* \par
* UnaryOp ::= UnaryOpType ExprNode
*
* \par
* UnaryOpType ::= \c TT_NOT
*
* \par
* BinaryOp ::= BinaryOpType ExprNode [ \c TT_AN ] ExprNode
*
* \par
* BinaryOpType ::= \c TT_SUMOF | \c TT_DIFFOF | \c TT_PRODUKTOF | \c TT_QUOSHUNTOF | \c TT_MODOF | \c BIGGROF | \c SMALLROF | \c TT_BOTHOF | \c TT_EITHEROF | \c TT_WONOF
*
* \par
* NaryOp ::= NaryOpType NaryOpArgs \c TT_MKAY
*
* \par
* NaryOpType ::= \c TT_ALLOF | \c TT_ANYOF
*
* \par
* NaryOpArgs ::= ExprNode NaryOpArg +
*
* \par
* NaryOpArg ::= [ \c TT_AN ] ExprNode
*
* \par
* ImplicitVariable ::= \c TT_IT */
#ifndef __PARSER_H__
#define __PARSER_H__
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "tokenizer.h"
#undef DEBUG
/** Stores an identifier. An identifier is the string of characters that are
* used to uniquely name a particular variable.
*
* \see createIdentifierNode(char *)
* \see deleteIdentifierNode(IdentifierNode *) */
typedef struct {
char *image; /**< An array of characters that name the identifier. */
const char *fname; /**< A pointer to the name of the file containing the identifier. */
unsigned int line; /**< The line number from the source file that the identifier occurred on. */
} IdentifierNode;
/** Stores a list of identifiers. This structure allows sets of identifiers
* to be grouped together.
*
* \see createIdentifierNodeList(void)
* \see addIdentifierNode(IdentifierNodeList *, IdentifierNode *)
* \see deleteIdentifierNodeList(IdentifierNodeList *) */
typedef struct {
unsigned int num; /**< The number of IdentifierNode structures stored. */
IdentifierNode **ids; /**< A pointer to the array of IdentifierNode structures. */
} IdentifierNodeList;
/** Denotes the type of statement a StmtNode stores. */
typedef enum {
ST_CAST, /**< A CastStmtNode structure. */
ST_PRINT, /**< A PrintStmtNode structure. */
ST_INPUT, /**< An InputStmtNode structure. */
ST_ASSIGNMENT, /**< An AssignmentStmtNode structure. */
ST_DECLARATION, /**< A DeclarationStmtNode structure. */
ST_IFTHENELSE, /**< An IfThenElseStmtNode structure. */
ST_SWITCH, /**< A SwitchStmtNode structure. */
ST_BREAK, /**< A break statement (no structure is needed for this type of statement). */
ST_RETURN, /**< A ReturnStmtNode structure. */
ST_LOOP, /**< A LoopStmtNode structure. */
ST_FUNCDEF, /**< A FuncDefStmtNode structure. */
ST_EXPR /**< An ExprNode structure. */
} StmtType;
/** Stores a statement. A statement is a unit of code which can be executed by
* itself and may possibly cause side-effects to occur.
*
* \see createStmtNode(StmtType, void *)
* \see deleteStmtNode(StmtNode *) */
typedef struct {
StmtType type; /**< The type of statement stored in \a node. */
void *stmt; /**< A pointer to the particular statement structure. */
} StmtNode;
/** Stores a list of statements. This structure allows sets of statements to be
* grouped together.
*
* \see createStmtNodeList(void)
* \see addStmtNode(StmtNodeList *, StmtNode *)
* \see deleteStmtNodeList(StmtNodeList *) */
typedef struct {
unsigned int num; /**< The number of StmtNode structures stored. */
StmtNode **stmts; /**< A pointer to the array of StmtNode structures. */
} StmtNodeList;
/** Denotes the type of expression an ExprNode stores. */
typedef enum {
ET_CAST, /**< A CastExprNode structure. */
ET_CONSTANT, /**< A ConstantNode structure. */
ET_IDENTIFIER, /**< An IdentifierNode structure. */
ET_FUNCCALL, /**< A FuncCallExprNode structure. */
ET_OP, /**< An OpExprNode structure. */
ET_IMPVAR /**< An \ref impvar "implicit variable" (no structure is needed for this type of expression). */
} ExprType;
/** Stores an expression. An expression is a unit of code which evaluates to
* some value and typically does not cause side-effects to occur.
*
* \see createExprNode(ExprType, void *)
* \see deleteExprNode(ExprNode *) */
typedef struct {
ExprType type; /**< The type of expression stored in \a expr. */
void *expr; /**< A pointer to the particular expression structure. */
} ExprNode;
/** Stores a list of expressions. This structure allows sets of expressions to
* be grouped together.
*
* \see createExprNodeList(void)
* \see addExprNode(ExprNodeList *, ExprNode *)
* \see deleteExprNodeList(ExprNodeList *) */
typedef struct {
unsigned int num; /**< The number of ExprNode structures stored. */
ExprNode **exprs; /**< A pointer to an array of ExprNode structures. */
} ExprNodeList;
/** Stores a a block of code. A block of code consists of a set of statements.
*
* \see createBlockNode(StmtNodeList *)
* \see deleteBlockNode(BlockNode *) */
typedef struct {
StmtNodeList *stmts; /**< A pointer to the list of statements which comprise the block of code. */
} BlockNode;
/** Stores a list of blocks of code. This structure allows sets of blocks of
* code to be grouped together.
*
* \see createBlockNodeList(void)
* \see addBlockNode(BlockNodeList *, BlockNode *)
* \see deleteBlockNodeList(BlockNodeList *) */
typedef struct {
unsigned int num; /**< The number of BlockNode structures stored. */
BlockNode **blocks; /**< A pointer to an array of BlockNode structures. */
} BlockNodeList;
/** Denotes the type of a constant. */
typedef enum {
CT_INTEGER, /**< An integer value. */
CT_FLOAT, /**< A floating point decimal value. */
CT_BOOLEAN, /**< A true/false value. */
CT_STRING, /**< A character string value. */
CT_NIL /**< Represents no value. */
} ConstantType;
/** Stores the data associated with a ConstantNode structure. */
typedef union {
int i; /**< Integer data. */
float f; /**< Floating point data. */
char *s; /**< Character string data. */
} ConstantData;
/** Stores a constant value. A constant value evaluates to its contents,
* depending on its \a type.
*
* \see createBooleanConstantNode(int)
* \see createIntegerConstantNode(int)
* \see createFloatConstantNode(float)
* \see createStringConstantNode(char *)
* \see deleteConstantNode(ConstantNode *) */
typedef struct {
ConstantType type; /**< The type of the constant. */
ConstantData data; /**< The stored data of type \a type. */
} ConstantNode;
/** Stores a function definition statement. A function definition statement
* defines the prototype and contents of a function.
*
* \see createFuncDefStmtNode(IdentifierNode *, IdentifierNode *, IdentifierNodeList *, BlockNode *)
* \see deleteFuncDefStmtNode(FuncDefStmtNode *) */
typedef struct {
IdentifierNode *scope; /**< A pointer to the scope to define the function in. */
IdentifierNode *name; /**< A pointer to the name of the function. */
IdentifierNodeList *args; /**< A pointer to a list of the names of the arguments of the function. */
BlockNode *body; /**< A pointer to the block of code defined by the function. */
} FuncDefStmtNode;
/** Stores the contents of the function table. The function table contains the
* definitions of all declared functions. It is used for making sure function
* calls provide a valid arity, typechecking, however, is performed at
* runtime. */
typedef struct {
unsigned int num; /**< The number of declared functions. */
FuncDefStmtNode **funcs; /**< A pointer to an array of declared functions. */
} FunctionTable;
/** Stores the main block of code a program executes. This structure could be
* accomplished using only a BlockNode instead, but its logical importance to
* program control flow (namely, it is the first portion of code executed)
* merits its own structure.
*
* \see createMainNode(BlockNode *)
* \see deleteMainNode(MainNode *) */
typedef struct {
BlockNode *block; /**< A pointer to the block of code to execute first. */
FunctionTable *functab; /**< A pointer to the function table associated with this block of code. */
} MainNode;
/** Stores a variable type.
*
* \see createTypeNode(ConstantType)
* \see deleteTypeNode(TypeNode *) */
typedef struct {
ConstantType type; /**< The type of the variable. */
} TypeNode;
/** Stores a cast statement. A cast statement changes the type of a variable
* identified by \a target to the type given by \a newtype.
*
* \see createCastStmtNode(IdentifierNode *, TypeNode *)
* \see deleteCastStmtNode(CastStmtNode *) */
typedef struct {
IdentifierNode *target; /**< A pointer to the name of the variable whose type is to be changed to \a newtype. */
TypeNode *newtype; /**< A pointer to the type to change \a target to. */
} CastStmtNode;
/** Stores a print statement. A print statement prints its arguments to some
* output device (by default standard output).
*
* \see createPrintStmtNode(ExprNodeList *, int)
* \see deletePrintStmtNode(PrintStmtNode *) */
typedef struct {
ExprNodeList *args; /**< A pointer to the list of expressions to evaluate and print. */
int nonl; /**< Denotes an ending newline should be surpressed if not \c 0 and printed if \c 0. */
} PrintStmtNode;
/** Stores an input statement. An input statement accepts a line of input from
* the use on an input device (by default standard input) and stores it in a
* variable.
*
* \see createInputStmtNode(IdentifierNode *)
* \see deleteInputStmtNode(InputStmtNode *) */
typedef struct {
IdentifierNode *target; /**< A pointer to the name of the variable to store the input in. */
} InputStmtNode;
/** Stores an assignment statement. An assignment statement updates the value
* of a variable, \a target, to the result of an expression, \a expr.
*
* \see createAssignmentStmtNode(IdentifierNode *, ExprNode *)
* \see deleteAssignmentStmtNode(AssignmentStmtNode *) */
typedef struct {
IdentifierNode *target; /**< A pointer to the name of the variable to store the evaluated contents of \a expr into. */
ExprNode *expr; /**< A pointer to the expression to evaluate and store in \a target. */
} AssignmentStmtNode;
/** Stores a declaration statement. A declaration statement creates a new
* variable named by \a target, optionally initializing it to the evaluated
* contents of \a expr. \a scope determines which level of scope the variable
* is to be created in.
*
* \see createDeclarationStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *)
* \see deleteDeclarationStmtNode(DeclarationStmtNode *) */
typedef struct {
IdentifierNode *scope; /**< A pointer to the scope to create the variable in. */
IdentifierNode *target; /**< A pointer to the name of the variable to create. */
ExprNode *expr; /**< An optional pointer to expression to initialize \a target to. */
} DeclarationStmtNode;
/** Stores an if/then/else statement. A conditional statement checks the value
* of the \ref impvar "implicit variable" and executes \a yes if it casts to \c
* true. If the value of the \ref impvar "implicit variable" casts to \c
* false, each of the optional expressions in \a guards is evaluated and if it
* casts to true, the corresponding block in \a blocks is executes. If the
* value of the \ref impvar "implicit variable" casts to false \b and all of
* the guards cast to false, the contents of \a no is executed.
*
* \see createIfThenElseStmtNode(BlockNode *, BlockNode *, ExprNodeList *, BlockNodeList *)
* \see deleteIfThenElseStmtNode(IfThenElseStmtNode *) */
typedef struct {
BlockNode *yes; /**< A pointer to the block of code to execute if the \ref impvar "implicit variable" casts to false. */
BlockNode *no; /**< A pointer to the block of code to execute if the \ref impvar "implicit variable" casts to false \b and the evaluations of all of the \a guards cast to false. */
ExprNodeList *guards; /**< A pointer to the expressions to test if the \ref impvar "implicit variable" casts to false. */
BlockNodeList *blocks; /**< A pointer to the respective blocks of code to execute if one of the evaluated \a guards casts to true. */
} IfThenElseStmtNode;
/** Stores a switch statement. A switch statement compares the value of the
* \ref impvar "implicit variable" to each of the \a guards and executes the
* respective block of code in \a blocks if they match. If no matches are
* found between the \ref impvar "implicit variable" and one of the \a guards,
* the optional default block of code, \a def, is executed.
*
* \see createSwitchStmtNode(ExprNodeList *, BlockNodeList *, BlockNode *)
* \see deleteSwitchStmtNode(SwitchStmtNode *) */
typedef struct {
ExprNodeList *guards; /**< A pointer to the expressions to evaluate and compare to the \ref impvar "implicit variable". */
BlockNodeList *blocks; /**< A pointer to the respective blocks of code to execute if one of the \a guards matches the \ref impvar "implicit variable". */
BlockNode *def; /**< A pointer to the default block of code to execute if none of the \a guards match the \ref impvar "implicit variable". */
} SwitchStmtNode;
/** Stores a return statement. A return statement signals that the current
* function is to be returned from with value \a value.
*
* \see createReturnStmtNode(ExprNode *)
* \see deleteReturnStmtNode(ReturnStmtNode *) */
typedef struct {
ExprNode *value; /**< A pointer to the value to return. */
} ReturnStmtNode;
/** Stores a loop statement. A loop statement repeatedly executes its \a body
* while \a guard evaluates to true, executing \a update at the end of each
* cycle.
*
* \see createLoopStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *, ExprNode *, BlockNode *)
* \see deleteLoopStmtNode(LoopStmtNode *) */
typedef struct {
IdentifierNode *name; /**< A pointer to the name of the loop. */
IdentifierNode *var; /**< A pointer to the name of the variable to be updated by \a update. */
ExprNode *guard; /**< A pointer to the expression to determine if the loop will continue. */
ExprNode *update; /**< A pointer to the expression to evaluate to update \a var. */
BlockNode *body; /**< A pointer to the block of code to be executed with each iteration of the loop. */
} LoopStmtNode;
/** Stores a cast expression. A cast expression evaluates an expression and
* casts it to a particular type.
*
* \see createCastExprNode(ExprNode *, TypeNode *)
* \see deleteCastExprNode(CastExprNode *) */
typedef struct {
ExprNode *target; /**< A pointer to the expression to cast. */
TypeNode *newtype; /**< A pointer to the type to cast the copy of \a target to. */
} CastExprNode;
/** Stores a function call expression. A function call expression evaluates to
* the return value of the function defined in \a def called with the arguments
* listed in \a args.
*
* \note \a args is not an ExprNodeList because its arity is known in advance
* (because a FuncDefStmtNode for it has presumably been created) and
* thus the benefit of an ExprNodeList (easy syntax for adding new elements)
* would not be relevant.
*
* \see createFuncCallExprNode(FuncDefStmtNode *, ExprNodeList *)
* \see deleteFuncCallExprNode(FuncCallExprNode *) */
typedef struct {
FuncDefStmtNode *def; /**< A pointer to the function definition to call. */
ExprNodeList *args; /**< A pointer to a list of ExprNode structure arguments to be supplied to the function defined by \a def. */
} FuncCallExprNode;
/** Denotes the type of operation an OpExprNode performs. */
typedef enum {
OP_ADD, /**< Addition. */
OP_SUB, /**< Subtraction. */
OP_MULT, /**< Multiplication. */
OP_DIV, /**< Division. */
OP_MOD, /**< Modulo. */
OP_MAX, /**< Maximum. */
OP_MIN, /**< Minimum. */
OP_AND, /**< Logical AND. */
OP_OR, /**< Logical OR. */
OP_XOR, /**< Logical XOR. */
OP_NOT, /**< Logical NOT. */
OP_EQ, /**< Equality. */
OP_NEQ, /**< Inequality. */
OP_CAT /**< String concatenation. */
} OpType;
/** Stores an operation expression. An operation expression evaluates to
* the result of the operation performed on its arguments.
*
* \see createOpExprNode(OpType, ExprNodeList *)
* \see deleteOpExprNode(OpExprNode *) */
typedef struct {
OpType type; /**< The type of operation to perform on \a args. */
ExprNodeList *args; /**< A pointer to the arguments to perform the operation on. */
} OpExprNode;
MainNode *createMainNode(BlockNode *, FunctionTable *);
void deleteMainNode(MainNode *);
BlockNode *createBlockNode(StmtNodeList *);
void deleteBlockNode(BlockNode *);
BlockNodeList *createBlockNodeList(void);
BlockNode *addBlockNode(BlockNodeList *, BlockNode *);
void deleteBlockNodeList(BlockNodeList *);
IdentifierNode *createIdentifierNode(char *, const char *, unsigned int);
void deleteIdentifierNode(IdentifierNode *);
TypeNode *createTypeNode(ConstantType);
void deleteTypeNode(TypeNode *);
StmtNode *createStmtNode(StmtType, void *);
void deleteStmtNode(StmtNode *);
StmtNodeList *createStmtNodeList(void);
StmtNode *addStmtNode(StmtNodeList *, StmtNode *);
void deleteStmtNodeList(StmtNodeList *);
CastStmtNode *createCastStmtNode(IdentifierNode *, TypeNode *);
void deleteCastStmtNode(CastStmtNode *);
PrintStmtNode *createPrintStmtNode(ExprNodeList *, int);
void deletePrintStmtNode(PrintStmtNode *);
InputStmtNode *createInputStmtNode(IdentifierNode *);
void deleteInputStmtNode(InputStmtNode *);
AssignmentStmtNode *createAssignmentStmtNode(IdentifierNode *, ExprNode *);
void deleteAssignmentStmtNode(AssignmentStmtNode *);
DeclarationStmtNode *createDeclarationStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *);
void deleteDeclarationStmtNode(DeclarationStmtNode *);
IfThenElseStmtNode *createIfThenElseStmtNode(BlockNode *, BlockNode *, ExprNodeList *, BlockNodeList *);
void deleteIfThenElseStmtNode(IfThenElseStmtNode *);
SwitchStmtNode *createSwitchStmtNode(ExprNodeList *, BlockNodeList *, BlockNode *);
void deleteSwitchStmtNode(SwitchStmtNode *);
ReturnStmtNode *createReturnStmtNode(ExprNode *);
void deleteReturnStmtNode(ReturnStmtNode *);
LoopStmtNode *createLoopStmtNode(IdentifierNode *, IdentifierNode *, ExprNode *, ExprNode *, BlockNode *);
void deleteLoopStmtNode(LoopStmtNode *);
FuncDefStmtNode *createFuncDefStmtNode(IdentifierNode *, IdentifierNode *, IdentifierNodeList *, BlockNode *);
void deleteFuncDefStmtNode(FuncDefStmtNode *);
ExprNode *createExprNode(ExprType, void *);
void deleteExprNode(ExprNode *);
ExprNodeList *createExprNodeList(void);
ExprNode *addExprNode(ExprNodeList *, ExprNode *);
void deleteExprNodeList(ExprNodeList *);
CastExprNode *createCastExprNode(ExprNode *, TypeNode *);
void deleteCastExprNode(CastExprNode *);
FuncCallExprNode *createFuncCallExprNode(FuncDefStmtNode *, ExprNodeList *);
void deleteFuncCallExprNode(FuncCallExprNode *);
OpExprNode *createOpExprNode(OpType, ExprNodeList *);
void deleteOpExprNode(OpExprNode *);
FunctionTable *createFunctionTable(void);
FuncDefStmtNode *addFuncDefStmtNode(FunctionTable *, FuncDefStmtNode *);
void deleteFunctionTable(FunctionTable *);
FuncDefStmtNode *lookupFuncDefStmtNode(FunctionTable *, const char *);
int acceptToken(Token ***, TokenType);
int peekToken(Token ***, TokenType);
int nextToken(Token ***, TokenType);
void error(const char *, Token **);
ConstantNode *parseConstantNode(Token ***);
TypeNode *parseTypeNode(Token ***);
IdentifierNode *parseIdentifierNode(Token ***);
ExprNode *parseExprNode(Token ***, FunctionTable *);
StmtNode *parseStmtNode(Token ***, FunctionTable *);
BlockNode *parseBlockNode(Token ***, FunctionTable *);
MainNode *parseMainNode(Token **, FunctionTable *);
FunctionTable *setupFunctionTable(Token **);
ConstantNode *createBooleanConstantNode(int);
ConstantNode *createIntegerConstantNode(int);
ConstantNode *createFloatConstantNode(float);
ConstantNode *createStringConstantNode(char *);
void deleteConstantNode(ConstantNode *);
IdentifierNodeList *createIdentifierNodeList(void);
IdentifierNode *addIdentifierNode(IdentifierNodeList *, IdentifierNode *);
void deleteIdentifierNodeList(IdentifierNodeList *);
#endif /* __PARSER_H__ */

View File

@ -0,0 +1,2 @@
HAI 1.2
KTHXBYE

View File

@ -0,0 +1,2 @@
This test checks to see whether the bare minimum program--consisting only of a
main block--is correctly interpreted and produces no output.

View File

@ -0,0 +1,3 @@
HAI 1.2
VISIBLE "Lorem", VISIBLE "ipsum", VISIBLE "dolor", VISIBLE "sit"
KTHXBYE

View File

@ -0,0 +1,4 @@
Lorem
ipsum
dolor
sit

View File

@ -0,0 +1 @@
This test checks that commas separate multiple statements on the same line.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "Lorem "...
"ipsum "...
"dolor "...
"sit"
KTHXBYE

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit

View File

@ -0,0 +1,2 @@
This test checks that ellipses followed by a linefeed character (\n) join
multiple portions of a statement on separate lines.

View File

@ -0,0 +1 @@
HAI 1.2 VISIBLE "Lorem "... "ipsum "... "dolor "... "sit" KTHXBYE

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit

View File

@ -0,0 +1,2 @@
This test checks that ellipses followed by a carriage return character (\r) join
multiple portions of a statement on separate lines.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "Lorem "...
"ipsum "...
"dolor "...
"sit"
KTHXBYE

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit

View File

@ -0,0 +1,2 @@
This test checks that ellipses followed by the carriage return and linefeed
characters (\r\n) join multiple portions of a statement on separate lines.

View File

@ -0,0 +1,5 @@
HAI 1.2
VISIBLE "Lorem ipsum "...
"dolor sit"
KTHXBYE

View File

@ -0,0 +1,2 @@
This test checks that ellipses followed by a linefeed (\n) character must not be
followed by an additional linefeed character.

View File

@ -0,0 +1 @@
HAI 1.2 VISIBLE "Lorem ipsum "... "dolor sit" KTHXBYE

View File

@ -0,0 +1,2 @@
This test checks that ellipses followed by a carriage return (\r) character must
not be followed by an additional linefeed character.

View File

@ -0,0 +1,5 @@
HAI 1.2
VISIBLE "Lorem ipsum "...
"dolor sit"
KTHXBYE

View File

@ -0,0 +1,3 @@
This test checks that ellipses followed by carriage return (\r) and linefeed
(\n) characters must not be followed by additional carriage return and linefeed
characters.

View File

@ -0,0 +1 @@
KTHXBYE

View File

@ -0,0 +1 @@
This test makes sure a program begins with the HAI keyword.

View File

@ -0,0 +1,2 @@
HAI
KTHXBYE

View File

@ -0,0 +1 @@
This test makes sure a program beginning without a version generates and error.

View File

@ -0,0 +1 @@
HAI 1.2

View File

@ -0,0 +1,2 @@
This test makes sure a program ending without a KTHXBYE statement generates an
error.

View File

@ -0,0 +1,3 @@
HAI 1.2
VISIBLE "Lorem ipsum dolor sit"
KTHXBYE

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit

View File

@ -0,0 +1 @@
This test checks to see whether line indentation is ignored.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "Lorem " "ipsum " "dolor " "sit"
VISIBLE "Lorem " "ipsum " "dolor " "sit"
VISIBLE "Lorem " "ipsum " "dolor " "sit"
VISIBLE "Lorem " "ipsum " "dolor " "sit"
KTHXBYE

View File

@ -0,0 +1,4 @@
Lorem ipsum dolor sit
Lorem ipsum dolor sit
Lorem ipsum dolor sit
Lorem ipsum dolor sit

View File

@ -0,0 +1,3 @@
This test checks that whitespace in between tokens is handled properly. It
tests whitespace only, tabs only, alternating whitespace and tabs, and
alternating tabs and whitespace.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "Lorem"
VISIBLE "ipsum"
VISIBLE "dolor"
VISIBLE "sit"
KTHXBYE

View File

@ -0,0 +1,4 @@
Lorem
ipsum
dolor
sit

View File

@ -0,0 +1,2 @@
This test checks to make sure the linefeed character (\n) indicates the end of a
statement.

View File

@ -0,0 +1 @@
HAI 1.2 VISIBLE "Lorem" VISIBLE "ipsum" VISIBLE "dolor" VISIBLE "sit" KTHXBYE

View File

@ -0,0 +1,4 @@
Lorem
ipsum
dolor
sit

View File

@ -0,0 +1,2 @@
This test checks to make sure the carriage return character (\r) indicates the
end of a statement.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "Lorem"
VISIBLE "ipsum"
VISIBLE "dolor"
VISIBLE "sit"
KTHXBYE

View File

@ -0,0 +1,4 @@
Lorem
ipsum
dolor
sit

View File

@ -0,0 +1,2 @@
This test checks to make sure the carriage return and linefeed characters (\r\n)
indicate the end of a statement.

View File

@ -0,0 +1,12 @@
HAI 1.2
I HAS A var ITZ 0
IM IN YR loop
VISIBLE var
var R SUM OF var AN 1
BOTH SAEM var AN 10
O RLY?
YA RLY
GTFO
OIC
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,10 @@
0
1
2
3
4
5
6
7
8
9

View File

@ -0,0 +1 @@
This test checks that breaking from loops work correctly.

View File

@ -0,0 +1,10 @@
HAI 1.2
IM IN YR loop UPPIN YR var
VISIBLE var
BOTH SAEM var AN 9
O RLY?
YA RLY
GTFO
OIC
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,10 @@
0
1
2
3
4
5
6
7
8
9

View File

@ -0,0 +1,2 @@
This test checks that a loop with an incremented temporary variable works
correctly.

View File

@ -0,0 +1,10 @@
HAI 1.2
IM IN YR loop NERFIN YR var
VISIBLE var
BOTH SAEM var AN -9
O RLY?
YA RLY
GTFO
OIC
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,10 @@
0
-1
-2
-3
-4
-5
-6
-7
-8
-9

View File

@ -0,0 +1,2 @@
This test checks that a loop with a decremented temporary variable works
correctly.

View File

@ -0,0 +1,5 @@
HAI 1.2
IM IN YR loop UPPIN YR var TIL BOTH SAEM var AN 10
VISIBLE var
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,10 @@
0
1
2
3
4
5
6
7
8
9

View File

@ -0,0 +1 @@
This test checks that loops with an "until" ending condition work correctly.

View File

@ -0,0 +1,5 @@
HAI 1.2
IM IN YR loop NERFIN YR var WILE DIFFRINT var AN -10
VISIBLE var
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,10 @@
0
-1
-2
-3
-4
-5
-6
-7
-8
-9

View File

@ -0,0 +1 @@
This test checks that loops with a "while" ending condition work correctly.

View File

@ -0,0 +1,14 @@
HAI 1.2
HOW DUZ I plustwoin YR var
FOUND YR SUM OF var AN 2
IF U SAY SO
IM IN YR loop plustwoin YR var
VISIBLE var
BOTH SAEM var AN 10
O RLY?
YA RLY
GTFO
OIC
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,6 @@
0
2
4
6
8
10

View File

@ -0,0 +1,2 @@
This test checks that a loop with an arbitrary unary function applied to a
temporary variable works correctly.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "a"
IM IN YR loop UPPIN YR var TIL BOTH SAEM var AN 10
IM OUTTA YR loop
VISIBLE "b"
KTHXBYE

View File

@ -0,0 +1,2 @@
a
b

View File

@ -0,0 +1 @@
This test makes sure an empty loop is allowed.

View File

@ -0,0 +1,4 @@
HAI 1.2
IM IN YR loop TIL WIN
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,2 @@
This test makes sure that an error occurs if an "until" stopping condition is
used without a temporary loop variable.

View File

@ -0,0 +1,4 @@
HAI 1.2
IM IN YR loop WILE FAIL
IM OUTTA YR loop
KTHXBYE

View File

@ -0,0 +1,2 @@
This test makes sure that an error occurs if a "while" stopping condition is
used without a temporary loop variable.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "Lorem "…
"ipsum "…
"dolor "…
"sit"
KTHXBYE

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit

View File

@ -0,0 +1,3 @@
This test checks that the Unicode ellipses character (U+2026) allows line
continuation when followed by a newline character (\n). This test is encoded
in UTF-8 format with no BOM.

View File

@ -0,0 +1 @@
HAI 1.2 VISIBLE "Lorem "… "ipsum "… "dolor "… "sit" KTHXBYE

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit

View File

@ -0,0 +1,3 @@
This test checks that the Unicode ellipses character (U+2026) allows line
continuation when followed by a carriage return character (\r). This test is
encoded in UTF-8 format with no BOM.

View File

@ -0,0 +1,6 @@
HAI 1.2
VISIBLE "Lorem "…
"ipsum "…
"dolor "…
"sit"
KTHXBYE

View File

@ -0,0 +1 @@
Lorem ipsum dolor sit

View File

@ -0,0 +1,3 @@
This test checks that the Unicode ellipses character (U+2026) allows line
continuation when followed by a carriage return character (\r) and a newline
character (\n). This test is encoded in UTF-8 format with no BOM.

View File

@ -0,0 +1,3 @@
HAI 1.2
VISIBLE "ʇıs ɹoʃop ɯnsdı ɯǝɹo⅂"
KTHXBYE

View File

@ -0,0 +1 @@
ʇıs ɹoʃop ɯnsdı ɯǝɹo⅂

View File

@ -0,0 +1,2 @@
This test checks that Unicode strings are handled correctly. This test is
encoded in UTF-8 format with no BOM.

View File

@ -0,0 +1,3 @@
HAI 1.2
VISIBLE "Lorem :(0024) ipsum :(00A2) dolor :(20AC) sit :(024B62)"
KTHXBYE

View File

@ -0,0 +1 @@
Lorem $ ipsum ¢ dolor € sit 𤭢

View File

@ -0,0 +1,2 @@
This test checks that when Unicode code points are inserted into strings, they
are translated correctly.

Some files were not shown because too many files have changed in this diff Show More