Added basic array functionality and cleaned up documentation.

This commit is contained in:
Justin J. Meza 2011-06-14 23:54:12 -07:00
parent 977e3dfde5
commit a6ef5811e8
17 changed files with 5218 additions and 4813 deletions

1606
Doxyfile

File diff suppressed because it is too large Load Diff

View File

@ -13,21 +13,24 @@ testdir = ./test
all: $(TARGET) all: $(TARGET)
$(TARGET): $(OBJS) $(LIBS) $(TARGET): $(OBJS)
$(CC) $(CPPFLAGS) -o $(TARGET) $(OBJS) $(LIBS) $(CC) $(CPPFLAGS) -o $(TARGET) $(OBJS) $(LIBS)
pedantic: $(OBJS) $(LIBS) pedantic: $(OBJS) $(LIBS)
$(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -Wall -ansi -pedantic -g -o $(TARGET) $(SRCS) $(HDRS) $(LIBS) $(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -Wall -ansi -pedantic -o $(TARGET) $(SRCS) $(HDRS) $(LIBS)
lint: all lint: all
$(LINT) $(SRCS) $(LINT) $(SRCS)
debug: $(OBJS) $(LIBS)
$(CC) -g -o $(TARGET) $(SRCS) $(LIBS)
check: all check: all
@cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.3-Tests/ @cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.3-Tests
check-mem: all check-mem: all
@echo "This will take a long time! Be patient!" @echo "This will take a long time! Be patient!"
@cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.3-Tests/ @cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.3-Tests
install: all install: all
$(INSTALL) $(TARGET) $(bindir)/$(TARGET) $(INSTALL) $(TARGET) $(bindir)/$(TARGET)

10
README
View File

@ -2,7 +2,7 @@
LICENSE LICENSE
Copyright (C) 2010 Justin J. Meza Copyright (C) 2010-2011 Justin J. Meza
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -22,15 +22,15 @@ ABOUT
lci is a LOLCODE interpreter written in C and is designed to be correct, lci is a LOLCODE interpreter written in C and is designed to be correct,
portable, fast, and precisely documented. portable, fast, and precisely documented.
* correct: Every effort has been made to test lci's conformance to the - correct: Every effort has been made to test lci's conformance to the
LOLCODE language specification. Unit tests come packaged with the lci LOLCODE language specification. Unit tests come packaged with the lci
source code. source code.
* portable: lci follows the widely ported ANSI C specification allowing it - portable: lci follows the widely ported ANSI C specification allowing it
to compile on a broad range of systems. to compile on a broad range of systems.
* fast: Much effort has gone into producing simple and efficient code - fast: Much effort has gone into producing simple and efficient code
whenever possible to the extent that the above points are not whenever possible to the extent that the above points are not
compromized. compromized.
* precisely documented: lci uses Doxygen to generate literate code - precisely documented: lci uses Doxygen to generate literate code
documentation, browsable here. documentation, browsable here.
This project's homepage is at http://icanhaslolcode.org. For help, visit This project's homepage is at http://icanhaslolcode.org. For help, visit

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,15 @@
/** Structures and functions for interpreting a parse tree. The interpreter /**
* traverses a parse tree in a depth-first manner, interpreting each node it * Structures and functions for interpreting a parse tree. The interpreter
* reaches along the way. This is the last stage of the processing of a source * traverses a parse tree in a depth-first manner, interpreting each node it
* code file. * reaches along the way. This is the last stage of the processing of a source
* * code file.
* \file interpreter.h *
* * \file interpreter.h
* \author Justin J. Meza *
* * \author Justin J. Meza
* \date 2010 */ *
* \date 2010-2011
*/
#ifndef __INTERPRETER_H__ #ifndef __INTERPRETER_H__
#define __INTERPRETER_H__ #define __INTERPRETER_H__
@ -19,127 +21,221 @@
#include "parser.h" #include "parser.h"
#include "unicode.h" #include "unicode.h"
/** Gets the integer data associated with a ValueObject structure. */ /**
* Retrieves a value's integer data.
*/
#define getInteger(value) (value->data.i) #define getInteger(value) (value->data.i)
/** Gets the floating point data associated with a ValueObject structure. */
/**
* Retrieves a value's decimal data.
*/
#define getFloat(value) (value->data.f) #define getFloat(value) (value->data.f)
/** Gets the string data associated with a ValueObject structure. */
/**
* Retrieves a value's string data.
*/
#define getString(value) (value->data.s) #define getString(value) (value->data.s)
/** Gets the function definition associated with a ValueObject structure. */
/**
* Retrieves a value's function data.
*/
#define getFunction(value) (value->data.fn) #define getFunction(value) (value->data.fn)
/** Denotes the type of a value. */ /**
* Retrieves a value's array data.
*/
#define getArray(value) (value->data.a)
/**
* Represents a value type.
*/
typedef enum { typedef enum {
VT_INTEGER, /**< An integer value. */ VT_INTEGER, /**< An integer value. */
VT_FLOAT, /**< A floating point decimal value. */ VT_FLOAT, /**< A decimal value. */
VT_BOOLEAN, /**< A true/false value. */ VT_BOOLEAN, /**< A boolean value. */
VT_STRING, /**< A character string value. */ VT_STRING, /**< A string value. */
VT_NIL, /**< Represents no value. */ VT_NIL, /**< Represents no value. */
VT_FUNC /**< A function. */ VT_FUNC, /**< A function. */
VT_ARRAY /**< An array. */
} ValueType; } ValueType;
/** Stores the data associated with a ValueObject structure. */ /**
* Stores value data.
*/
typedef union { typedef union {
int i; /**< Integer data. */ int i; /**< Integer data. */
float f; /**< Floating point data. */ float f; /**< Decimal data. */
char *s; /**< Character string data. */ char *s; /**< String data. */
FuncDefStmtNode *fn; /**< Function definition. */ FuncDefStmtNode *fn; /**< Function data. */
struct scopeobject *a; /**< Array data. */
} ValueData; } ValueData;
/** Increments the semaphore of a ValueObject structure. */ /**
* Increments a value's semaphore.
*/
#define V(value) (value->semaphore++) #define V(value) (value->semaphore++)
/** Decrements the semaphore of a ValueObject structure. */ /**
* Decrements a value's semaphore.
*/
#define P(value) (value->semaphore--) #define P(value) (value->semaphore--)
/** Stores a value. /**
* * Stores a value.
* \see copyValueObject(ValueObject *) */
* \see deleteValueObject(ValueObject *) */
typedef struct { typedef struct {
ValueType type; /**< The type of value stored. */ ValueType type; /**< The type of value stored. */
ValueData data; /**< The stored data. */ ValueData data; /**< The value data. */
unsigned short semaphore; /**< A semaphore for value usage. */ unsigned short semaphore; /**< A semaphore for value usage. */
} ValueObject; } ValueObject;
/** Denotes the type of return encountered. */ /**
* Represents the return type.
*/
typedef enum { typedef enum {
RT_DEFAULT, /**< A block of code returned after evaluating all of its statements. */ RT_DEFAULT, /**< Code block completed successfully. */
RT_BREAK, /**< A block of code within a LoopStmtNode or SwitchStmtNode returned via a break statement. */ RT_BREAK, /**< Broke out of a loop or switch statement. */
RT_RETURN /**< A block of code within a FuncDefStmtNode called by a FuncCallExprNode returned (either with or without a value). */ RT_RETURN /**< Returned from a function. */
} ReturnType; } ReturnType;
/** Stores a return state. Returns are encountered when /**
* - a block of code evaluates all of its statements, * Stores return state.
* - a block of code within a LoopStmt or SwitchStmt encountered a break statement, or */
* - a block of code within a FunctionDefStmt called by a FunctionCallExpr encounters a ReturnStmt. */
typedef struct { typedef struct {
ReturnType type; /**< The type of return encountered. */ ReturnType type; /**< The type of return encountered. */
ValueObject *value; /**< The optional return value. */ ValueObject *value; /**< The optional return value. */
} ReturnObject; } ReturnObject;
/** Stores the variables in a particular scope. Scopes are arranged /**
* heirarchically from global (the ancestor of all other scopes) to local (the * Stores a set of variables hierarchically.
* temporary scope of a BlockNode). */
*
* \see createScopeObject(ScopeObject *)
* \see deleteScopeObject(ScopeObject *) */
typedef struct scopeobject { typedef struct scopeobject {
struct scopeobject *parent; /**< A pointer to the parent ScopeObject. */ struct scopeobject *parent; /**< The parent scope. */
ValueObject *impvar; /**< A pointer to the ValueObject representing the implicit variable for this scope. */ ValueObject *impvar; /**< The \ref impvar "implicit variable". */
unsigned int numvals; /**< The number of ValueObject structures in \a values. */ unsigned int numvals; /**< The number of values in the scope. */
char **names; /**< A pointer to the array of character strings naming the values in the scope. */ char **names; /**< The names of the values. */
ValueObject **values; /**< A pointer to an array of ValueObject structures in the scope. */ ValueObject **values; /**< The values in the scope. */
} ScopeObject; } ScopeObject;
char *createString(char *); /**
* \name Utilities
*
* Functions for performing helper tasks.
*/
/**@{*/
void printInterpreterError(const char *, IdentifierNode *, ScopeObject *);
char *copyString(char *);
unsigned int isDecString(const char *);
unsigned int isHexString(const char *);
char *resolveIdentifierName(IdentifierNode *, ScopeObject *);
/**@}*/
/**
* \name Value object modifiers
*
* Functions for creating, copying, and deleting value objects.
*/
/**@{*/
ValueObject *createNilValueObject(void); ValueObject *createNilValueObject(void);
ValueObject *createBooleanValueObject(int); ValueObject *createBooleanValueObject(int);
ValueObject *createIntegerValueObject(int); ValueObject *createIntegerValueObject(int);
ValueObject *createFloatValueObject(float); ValueObject *createFloatValueObject(float);
ValueObject *createStringValueObject(char *); ValueObject *createStringValueObject(char *);
ValueObject *createFunctionValueObject(FuncDefStmtNode *); ValueObject *createFunctionValueObject(FuncDefStmtNode *);
ValueObject *createArrayValueObject(ScopeObject *);
ValueObject *copyValueObject(ValueObject *); ValueObject *copyValueObject(ValueObject *);
void deleteValueObject(ValueObject *); void deleteValueObject(ValueObject *);
ReturnObject *createReturnObject(ReturnType, ValueObject *); /**@}*/
void deleteReturnObject(ReturnObject *);
char *resolveIdentifierName(IdentifierNode *, ScopeObject *); /**
* \name Scope object modifiers
*
* Functions for manipulating scope objects and their data.
*/
/**@{*/
ScopeObject *createScopeObject(ScopeObject *); ScopeObject *createScopeObject(ScopeObject *);
void deleteScopeObject(ScopeObject *); void deleteScopeObject(ScopeObject *);
ValueObject *getScopeValue(ScopeObject *, IdentifierNode *); ValueObject *createScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
ValueObject *getLocalScopeValue(ScopeObject *, IdentifierNode *); ValueObject *updateScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *, ValueObject *);
ValueObject *createScopeValue(ScopeObject *, IdentifierNode *); ValueObject *getScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
ValueObject *updateScopeValue(ScopeObject *, IdentifierNode *, ValueObject *); ValueObject *getScopeValueArray(ScopeObject *, ScopeObject *, IdentifierNode *);
void deleteScopeValue(ScopeObject *, IdentifierNode *); ValueObject *getScopeValueLocal(ScopeObject *, ScopeObject *, IdentifierNode *);
unsigned int isNumString(const char *); ScopeObject *getScopeObject(ScopeObject *, ScopeObject *, IdentifierNode *);
unsigned int isHexString(const char *); void deleteScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *); /**@}*/
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *); /**
ValueObject *castStringExplicit(ValueObject *, ScopeObject *); * \name Return object modifiers
*
* Functions for creating and deleting return objects.
*/
/**@{*/
ReturnObject *createReturnObject(ReturnType, ValueObject *);
void deleteReturnObject(ReturnObject *);
/**@}*/
/**
* \name Casts
*
* Functions for performing casts between different types of values.
*/
/**@{*/
ValueObject *castBooleanImplicit(ValueObject *, ScopeObject *); ValueObject *castBooleanImplicit(ValueObject *, ScopeObject *);
ValueObject *castIntegerImplicit(ValueObject *, ScopeObject *); ValueObject *castIntegerImplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatImplicit(ValueObject *, ScopeObject *); ValueObject *castFloatImplicit(ValueObject *, ScopeObject *);
ValueObject *castStringImplicit(ValueObject *, ScopeObject *); ValueObject *castStringImplicit(ValueObject *, ScopeObject *);
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *);
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *);
ValueObject *castStringExplicit(ValueObject *, ScopeObject *);
/**@}*/
/**
* \name Node interpreters
*
* Functions for interpreting basic parse tree nodes.
*/
/**@{*/
ValueObject *interpretExprNode(ExprNode *, ScopeObject *); ValueObject *interpretExprNode(ExprNode *, ScopeObject *);
ReturnObject *interpretStmtNode(StmtNode *, ScopeObject *); ReturnObject *interpretStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretStmtNodeList(StmtNodeList *, ScopeObject *); ReturnObject *interpretStmtNodeList(StmtNodeList *, ScopeObject *);
ReturnObject *interpretBlockNode(BlockNode *, ScopeObject *); ReturnObject *interpretBlockNode(BlockNode *, ScopeObject *);
int interpretMainNode(MainNode *); int interpretMainNode(MainNode *);
/**@}*/
/**
* \name Expression interpreters
*
* Functions for interpreting expression parse tree nodes.
*/
/**@{*/
ValueObject *interpretImpVarExprNode(ExprNode *, ScopeObject *); ValueObject *interpretImpVarExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretCastExprNode(ExprNode *, ScopeObject *); ValueObject *interpretCastExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretFuncCallExprNode(ExprNode *, ScopeObject *); ValueObject *interpretFuncCallExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretIdentifierExprNode(ExprNode *, ScopeObject *); ValueObject *interpretIdentifierExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretConstantExprNode(ExprNode *, ScopeObject *); ValueObject *interpretConstantExprNode(ExprNode *, ScopeObject *);
/**@}*/
/**
* \name Operation interpreters
*
* Functions for interpreting operation parse tree nodes.
*/
/**@{*/
ValueObject *interpretNotOpExprNode(OpExprNode *, ScopeObject *); ValueObject *interpretNotOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretArithOpExprNode(OpExprNode *, ScopeObject *); ValueObject *interpretArithOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretBoolOpExprNode(OpExprNode *, ScopeObject *); ValueObject *interpretBoolOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretEqualityOpExprNode(OpExprNode *, ScopeObject *); ValueObject *interpretEqualityOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretConcatOpExprNode(OpExprNode *, ScopeObject *); ValueObject *interpretConcatOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretOpExprNode(ExprNode *, ScopeObject *); ValueObject *interpretOpExprNode(ExprNode *, ScopeObject *);
/**@}*/
/**
* \name Statement interpreters
*
* Functions for interpreting statement parse tree nodes.
*/
/**@{*/
ReturnObject *interpretCastStmtNode(StmtNode *, ScopeObject *); ReturnObject *interpretCastStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretPrintStmtNode(StmtNode *, ScopeObject *); ReturnObject *interpretPrintStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretInputStmtNode(StmtNode *, ScopeObject *); ReturnObject *interpretInputStmtNode(StmtNode *, ScopeObject *);
@ -153,7 +249,14 @@ ReturnObject *interpretLoopStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretDeallocationStmtNode(StmtNode *, ScopeObject *); ReturnObject *interpretDeallocationStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretFuncDefStmtNode(StmtNode *, ScopeObject *); ReturnObject *interpretFuncDefStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretExprStmtNode(StmtNode *, ScopeObject *); ReturnObject *interpretExprStmtNode(StmtNode *, ScopeObject *);
/**@}*/
/**
* \name Arithmetic operations (integer-integer)
*
* Functions for performing integer-integer operations on values.
*/
/**@{*/
ValueObject *opAddIntegerInteger(ValueObject *, ValueObject *); ValueObject *opAddIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opSubIntegerInteger(ValueObject *, ValueObject *); ValueObject *opSubIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMultIntegerInteger(ValueObject *, ValueObject *); ValueObject *opMultIntegerInteger(ValueObject *, ValueObject *);
@ -161,7 +264,14 @@ ValueObject *opDivIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMaxIntegerInteger(ValueObject *, ValueObject *); ValueObject *opMaxIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMinIntegerInteger(ValueObject *, ValueObject *); ValueObject *opMinIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opModIntegerInteger(ValueObject *, ValueObject *); ValueObject *opModIntegerInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Arithmetic operations (integer-float)
*
* Functions for performing integer-float operations on values.
*/
/**@{*/
ValueObject *opAddIntegerFloat(ValueObject *, ValueObject *); ValueObject *opAddIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opSubIntegerFloat(ValueObject *, ValueObject *); ValueObject *opSubIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMultIntegerFloat(ValueObject *, ValueObject *); ValueObject *opMultIntegerFloat(ValueObject *, ValueObject *);
@ -169,7 +279,14 @@ ValueObject *opDivIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMaxIntegerFloat(ValueObject *, ValueObject *); ValueObject *opMaxIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMinIntegerFloat(ValueObject *, ValueObject *); ValueObject *opMinIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opModIntegerFloat(ValueObject *, ValueObject *); ValueObject *opModIntegerFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Arithmetic operations (float-integer)
*
* Functions for performing float-integer operations on values.
*/
/**@{*/
ValueObject *opAddFloatInteger(ValueObject *, ValueObject *); ValueObject *opAddFloatInteger(ValueObject *, ValueObject *);
ValueObject *opSubFloatInteger(ValueObject *, ValueObject *); ValueObject *opSubFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMultFloatInteger(ValueObject *, ValueObject *); ValueObject *opMultFloatInteger(ValueObject *, ValueObject *);
@ -177,7 +294,14 @@ ValueObject *opDivFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMaxFloatInteger(ValueObject *, ValueObject *); ValueObject *opMaxFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMinFloatInteger(ValueObject *, ValueObject *); ValueObject *opMinFloatInteger(ValueObject *, ValueObject *);
ValueObject *opModFloatInteger(ValueObject *, ValueObject *); ValueObject *opModFloatInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Arithmetic operations (float-float)
*
* Functions for performing float-float operations on values.
*/
/**@{*/
ValueObject *opAddFloatFloat(ValueObject *, ValueObject *); ValueObject *opAddFloatFloat(ValueObject *, ValueObject *);
ValueObject *opSubFloatFloat(ValueObject *, ValueObject *); ValueObject *opSubFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMultFloatFloat(ValueObject *, ValueObject *); ValueObject *opMultFloatFloat(ValueObject *, ValueObject *);
@ -185,26 +309,76 @@ ValueObject *opDivFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMaxFloatFloat(ValueObject *, ValueObject *); ValueObject *opMaxFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMinFloatFloat(ValueObject *, ValueObject *); ValueObject *opMinFloatFloat(ValueObject *, ValueObject *);
ValueObject *opModFloatFloat(ValueObject *, ValueObject *); ValueObject *opModFloatFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (boolean-boolean)
*
* Functions for performing boolean-boolean operations on values.
*/
/**@{*/
ValueObject *opEqBooleanBoolean(ValueObject *, ValueObject *); ValueObject *opEqBooleanBoolean(ValueObject *, ValueObject *);
ValueObject *opNeqBooleanBoolean(ValueObject *, ValueObject *); ValueObject *opNeqBooleanBoolean(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (integer-integer)
*
* Functions for performing integer-integer operations on values.
*/
/**@{*/
ValueObject *opEqIntegerInteger(ValueObject *, ValueObject *); ValueObject *opEqIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opNeqIntegerInteger(ValueObject *, ValueObject *); ValueObject *opNeqIntegerInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (integer-float)
*
* Functions for performing integer-float operations on values.
*/
/**@{*/
ValueObject *opEqIntegerFloat(ValueObject *, ValueObject *); ValueObject *opEqIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opNeqIntegerFloat(ValueObject *, ValueObject *); ValueObject *opNeqIntegerFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (float-integer)
*
* Functions for performing float-integer operations on values.
*/
/**@{*/
ValueObject *opEqFloatInteger(ValueObject *, ValueObject *); ValueObject *opEqFloatInteger(ValueObject *, ValueObject *);
ValueObject *opNeqFloatInteger(ValueObject *, ValueObject *); ValueObject *opNeqFloatInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (float-float)
*
* Functions for performing float-float operations on values.
*/
/**@{*/
ValueObject *opEqFloatFloat(ValueObject *, ValueObject *); ValueObject *opEqFloatFloat(ValueObject *, ValueObject *);
ValueObject *opNeqFloatFloat(ValueObject *, ValueObject *); ValueObject *opNeqFloatFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (string-string)
*
* Functions for performing string-string operations on values.
*/
/**@{*/
ValueObject *opEqStringString(ValueObject *, ValueObject *); ValueObject *opEqStringString(ValueObject *, ValueObject *);
ValueObject *opNeqStringString(ValueObject *, ValueObject *); ValueObject *opNeqStringString(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (nil-nil)
*
* Functions for performing nil-nil operations on values.
*/
/**@{*/
ValueObject *opEqNilNil(ValueObject *, ValueObject *); ValueObject *opEqNilNil(ValueObject *, ValueObject *);
ValueObject *opNeqNilNil(ValueObject *, ValueObject *); ValueObject *opNeqNilNil(ValueObject *, ValueObject *);
/**@}*/
#endif /* __INTERPRETER_H__ */ #endif /* __INTERPRETER_H__ */

BIN
lci.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

185
lexer.c
View File

@ -1,15 +1,19 @@
#include "lexer.h" #include "lexer.h"
/** Creates a Lexeme structure. /**
* * Creates a lexeme.
* \return A pointer to a Lexeme structure with the desired properties. *
* * \param [in] image The string that identifies the lexeme.
* \retval NULL malloc was unable to allocate memory. *
* * \param [in] fname The name of the file containing the lexeme.
* \see deleteLexeme(Lexeme *) */ *
Lexeme *createLexeme(char *image, /**< [in] An array of characters that describe the lexeme. */ * \param [in] line The line number the lexeme occurred on.
const char *fname, /**< [in] A pointer to the name of the file containing the lexeme. */ *
unsigned int line) /**< [in] The line number from the source file that the lexeme occurred on. */ * \return A new lexeme with the desired properties.
*
* \retval NULL Memory allocation failed.
*/
Lexeme *createLexeme(char *image, const char *fname, unsigned int line)
{ {
Lexeme *ret = malloc(sizeof(Lexeme)); Lexeme *ret = malloc(sizeof(Lexeme));
if (!ret) { if (!ret) {
@ -23,8 +27,11 @@ Lexeme *createLexeme(char *image, /**< [in] An array of characters that de
return NULL; return NULL;
} }
strcpy(ret->image, image); strcpy(ret->image, image);
/** \note fname is not copied because it would only one copy is stored /**
* for all Lexeme structures that share it. */ * \note \a fname is not copied because it only one copy is stored for
* all lexemes from the same file. This is simply to avoid large
* numbers of lexemes storing duplicate file name strings.
*/
ret->fname = fname; ret->fname = fname;
ret->line = line; ret->line = line;
#ifdef DEBUG #ifdef DEBUG
@ -33,29 +40,29 @@ Lexeme *createLexeme(char *image, /**< [in] An array of characters that de
return ret; return ret;
} }
/** Deletes a Lexeme structure. /**
* * Deletes a lexeme.
* \pre \a lexeme points to a Lexeme structure created by createLexeme(char *, const char *, unsigned int). *
* * \param [in,out] lexeme The lexeme to delete.
* \post The memory at \a lexeme and all of its elements will be freed. */
*
* \see createLexeme(char *, const char *, unsigned int) */
void deleteLexeme(Lexeme *lexeme) void deleteLexeme(Lexeme *lexeme)
{ {
if (!lexeme) return; if (!lexeme) return;
free(lexeme->image); free(lexeme->image);
/** \note We do not free (*lex)->fname because it is shared between many /**
* Lexeme structures and is free'd by whoever created them. */ * \note We do not free the file name because it is shared between many
* lexemes and is freed by whomever created the file name string.
*/
free(lexeme); free(lexeme);
} }
/** Creates a LexemeList structure. /**
* * Creates a list of lexemes.
* \return A pointer to a LexemeList structure with the desired properties. *
* * \return An empty lexeme list.
* \retval NULL malloc was unable to allocate memory. *
* * \retval NULL Memory allocation failed.
* \see deleteLexemeList(LexemeList *) */ */
LexemeList *createLexemeList(void) LexemeList *createLexemeList(void)
{ {
LexemeList *p = malloc(sizeof(LexemeList)); LexemeList *p = malloc(sizeof(LexemeList));
@ -68,20 +75,21 @@ LexemeList *createLexemeList(void)
return p; return p;
} }
/** Adds a Lexeme structure to a LexemeList structure. /**
* * Adds a lexeme to a list of lexemes.
* \pre \a list was created by createLexemeList(void). *
* \pre \a lexeme was created by createLexeme(char *, const char *, unsigned int). * \param [in,out] list The list of lexemes to add \a lexeme to.
* *
* \post \a lexeme will be added on to the end of \a list and the size of * \param [in] lexeme The lexeme to add to \a list.
* \a list will be updated accordingly. *
* * \post \a lexeme will be added to the end of \a list and the size of \a list
* \return A pointer to the added Lexeme structure (will be the same as * will be updated.
* \a lexeme). *
* * \return A pointer to the added lexeme (will be the same as \a lexeme).
* \retval NULL realloc was unable to allocate memory. */ *
Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList structure to add \a lex to. */ * \retval NULL Memory allocation failed.
Lexeme *lexeme) /**< [in] A pointer to the Lexeme structure to add to \a list. */ */
Lexeme *addLexeme(LexemeList *list, Lexeme *lexeme)
{ {
unsigned int newsize; unsigned int newsize;
void *mem = NULL; void *mem = NULL;
@ -98,16 +106,14 @@ Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList st
return lexeme; return lexeme;
} }
/** Deletes a LexemeList structure. /**
* * Deletes a list of lexemes.
* \pre \a list was created by createLexemeList(void) and contains *
* items added by addLexeme(LexemeList *, Lexeme *). * \param [in,out] list The lexeme list to delete.
* *
* \post The memory at \a list and any of its associated members will be * \post The memory at \a list and all of its members will be freed.
* freed. */
* void deleteLexemeList(LexemeList *list)
* \see createLexemeList(void) */
void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeList structure to delete. */
{ {
unsigned int n; unsigned int n;
if (!list) return; if (!list) return;
@ -117,37 +123,39 @@ void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeLis
free(list); free(list);
} }
/** Scans through a character buffer, removing unecessary characters and /**
* generating lexemes. Lexemes are separated by whitespace (but newline * Scans a buffer, removing unnecessary characters and grouping characters into
* characters are kept as their own lexeme). String literals are handled a * lexemes. Lexemes are strings of characters separated by whitespace (although
* bit differently: starting at the first quotation character, characters are * newline characters are considered separate lexemes). String literals are
* collected until either an unescaped quotation character is read (that is, a * handled a bit differently: Starting at the first quotation character,
* quotation character not preceeded by a colon which itself is not proceeded * characters are collected until either a non-escaped quotation character is
* by a colon) or a newline or carriage return character is read, whichever * read (i.e., a quotation character not preceded by a colon which itself is not
* comes first. This handles the odd case of strings such as "::" which print * preceded by a colon) or a newline or carriage return character is read,
* out a single colon. Also handled are the effects of commas, ellipses, and * whichever comes first. This handles the odd (but possible) case of strings
* bangs (!). * such as "::" which print out a single colon. Also handled are the effects of
* * commas, ellipses, bangs (!), and array accesses ('Z).
* \pre \a size is the number of characters starting at the memory location *
* pointed to by \a buffer. * \param [in] buffer The characters to turn into lexemes.
* *
* \return A pointer to a LexemeList structure. */ * \param [in] size The number of characters in \a buffer.
LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to tokenize. */ *
unsigned int size, /**< [in] The number of characters in \a buffer. */ * \param [in] fname The name of the file \a buffer was read from.
const char *fname) /**< [in] An array of characters representing the name of the file used to read \a buffer. */ *
* \return A list of lexemes created from the contents of \a buffer.
*/
LexemeList *scanBuffer(const char *buffer, unsigned int size, const char *fname)
{ {
const char *start = buffer; const char *start = buffer;
LexemeList *list = NULL; LexemeList *list = NULL;
unsigned int line = 1; unsigned int line = 1;
Lexeme *lex = NULL;
list = createLexemeList(); list = createLexemeList();
if (!list) return NULL; if (!list) return NULL;
while (start < buffer + size) { while (start < buffer + size) {
char *temp = NULL; char *temp = NULL;
size_t len = 1; unsigned int len = 1;
/* Comma (,) is a soft newline */ /* Comma (,) is a soft newline */
if (*start == ',') { if (*start == ',') {
lex = createLexeme("\n", fname, line); Lexeme *lex = createLexeme("\n", fname, line);
if (!lex) { if (!lex) {
deleteLexemeList(list); deleteLexemeList(list);
return NULL; return NULL;
@ -162,7 +170,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
} }
/* Bang (!) is its own lexeme */ /* Bang (!) is its own lexeme */
if (*start == '!') { if (*start == '!') {
lex = createLexeme("!", fname, line); Lexeme *lex = createLexeme("!", fname, line);
if (!lex) { if (!lex) {
deleteLexemeList(list); deleteLexemeList(list);
return NULL; return NULL;
@ -175,6 +183,21 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
start++; start++;
continue; continue;
} }
/* Apostrophe Z ('Z) is its own lexeme */
if (!strncmp(start, "'Z", 2)) {
Lexeme *lex = createLexeme("'Z", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
}
if (!addLexeme(list, lex)) {
deleteLexeme(lex);
deleteLexemeList(list);
return NULL;
}
start += 2;
continue;
}
/* Skip over leading whitespace */ /* Skip over leading whitespace */
while (isspace(*start)) { while (isspace(*start)) {
unsigned int newline = 0; unsigned int newline = 0;
@ -187,7 +210,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
newline = 1; newline = 1;
} }
if (newline) { if (newline) {
lex = createLexeme("\n", fname, line); Lexeme *lex = createLexeme("\n", fname, line);
if (!lex) { if (!lex) {
deleteLexemeList(list); deleteLexemeList(list);
return NULL; return NULL;
@ -213,7 +236,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
/* Make sure next line is not empty */ /* Make sure next line is not empty */
while (*test && isspace(*test)) { while (*test && isspace(*test)) {
if (*test == '\r' || *test == '\n') { if (*test == '\r' || *test == '\n') {
fprintf(stderr, "%s:%u: a line with continuation may not be followed by an empty line\n", fname, line); fprintf(stderr, "%s:%d: a line with continuation may not be followed by an empty line\n", fname, line);
deleteLexemeList(list); deleteLexemeList(list);
return NULL; return NULL;
} }
@ -240,7 +263,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
start++; start++;
if (start == buffer || *start == ',' || *start == '\r' || *start == '\n') if (start == buffer || *start == ',' || *start == '\r' || *start == '\n')
continue; continue;
fprintf(stderr, "%s:%u: multiple line comment may not appear on the same line as code\n", fname, line); fprintf(stderr, "%s:%d: multiple line comment may not appear on the same line as code\n", fname, line);
deleteLexemeList(list); deleteLexemeList(list);
return NULL; return NULL;
} }
@ -269,9 +292,10 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
if (start[len] && !isspace(start[len]) if (start[len] && !isspace(start[len])
&& *(start + len) != ',' && *(start + len) != ','
&& *(start + len) != '!' && *(start + len) != '!'
&& strncmp(start + len, "'Z", 2)
&& strncmp(start + len, "...", 3) && strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3)) { && strncmp(start + len, "\xE2\x80\xA6", 3)) {
fprintf(stderr, "%s:%u: expected token delimiter after string literal\n", fname, line); fprintf(stderr, "%s:%d: expected token delimiter after string literal\n", fname, line);
deleteLexemeList(list); deleteLexemeList(list);
return NULL; return NULL;
} }
@ -281,6 +305,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
while (start[len] && !isspace(start[len]) while (start[len] && !isspace(start[len])
&& *(start + len) != ',' && *(start + len) != ','
&& *(start + len) != '!' && *(start + len) != '!'
&& strncmp(start + len, "'Z", 2)
&& strncmp(start + len, "...", 3) && strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3)) && strncmp(start + len, "\xE2\x80\xA6", 3))
len++; len++;
@ -293,7 +318,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
} }
strncpy(temp, start, len); strncpy(temp, start, len);
temp[len] = '\0'; temp[len] = '\0';
lex = createLexeme(temp, fname, line); Lexeme *lex = createLexeme(temp, fname, line);
if (!lex) { if (!lex) {
free(temp); free(temp);
deleteLexemeList(list); deleteLexemeList(list);
@ -309,7 +334,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
start += len; start += len;
} }
/* Create an end-of-file lexeme */ /* Create an end-of-file lexeme */
lex = createLexeme("$", fname, line); Lexeme *lex = createLexeme("$", fname, line);
if (!lex) { if (!lex) {
deleteLexemeList(list); deleteLexemeList(list);
return NULL; return NULL;

70
lexer.h
View File

@ -1,14 +1,16 @@
/** Structures and functions for separating a character buffer into lexemes. The /**
* lexer reads through a buffer of characters (themselves typically read from * Structures and functions for separating a character buffer into lexemes --
* standard input), strips whitespace, and breaks them up into logical atoms of * groups of characters. The lexer reads through a buffer of characters
* character strings which, in turn, may be passed on to later processes (such * (themselves typically read from standard input), strips whitespace, and
* as a tokenizer). * breaks them up into logical atoms of character strings which, in turn, may be
* * passed on to later processes (such as a tokenizer).
* \file lexer.h *
* * \file lexer.h
* \author Justin J. Meza *
* * \author Justin J. Meza
* \date 2010 */ *
* \date 2010-2011
*/
#ifndef __LEXER_H__ #ifndef __LEXER_H__
#define __LEXER_H__ #define __LEXER_H__
@ -20,36 +22,44 @@
#undef DEBUG #undef DEBUG
/** Stores a lexeme. A lexeme is the smallest unit of contiguous characters, /**
* namely, it has been stripped of surrounding whitespace. * Stores a lexeme. A lexeme is a group of contiguous characters, stripped of
* * surrounding whitespace or other lexemes.
* \note This structure does not have any list structure to hold groups of it. */
* Instead, pointers to arrays of these structures are employed to allow
* for easier tokenizing.
*
* \see createLexeme(char *, unsigned int) */
typedef struct { typedef struct {
char *image; /**< An array of characters that describe the lexeme. */ char *image; /**< The string that identifies the lexeme. */
const char *fname; /**< A pointer to the name of the file containing the lexeme. */ const char *fname; /**< The name of the file containing the lexeme. */
unsigned int line; /**< The line number from the source file that the lexeme occurred on. */ unsigned int line; /**< The line number the lexeme occurred on. */
} Lexeme; } Lexeme;
/** Stores a list of lexemes. This structure allows sets of lexemes to be /**
* grouped together. * Stores a list of lexemes.
* */
* \see createLexemeList(void)
* \see addLexeme(LexemeList *, Lexeme *)
* \see deleteLexemeList(LexemeList *) */
typedef struct { typedef struct {
unsigned int num; /**< The number of Lexeme structures stored. */ unsigned int num; /**< The number of lexemes stored. */
Lexeme **lexemes; /**< A pointer to the array of Lexeme structures. */ Lexeme **lexemes; /**< The array of stored lexemes. */
} LexemeList; } LexemeList;
/**
* \name Lexeme modifiers
*
* Functions for performing helper tasks.
*/
/**@{*/
Lexeme *createLexeme(char *, const char *, unsigned int); Lexeme *createLexeme(char *, const char *, unsigned int);
void deleteLexeme(Lexeme *); void deleteLexeme(Lexeme *);
LexemeList *createLexemeList(void); LexemeList *createLexemeList(void);
Lexeme *addLexeme(LexemeList *, Lexeme*); Lexeme *addLexeme(LexemeList *, Lexeme*);
void deleteLexemeList(LexemeList *); void deleteLexemeList(LexemeList *);
/**@}*/
/**
* \name Buffer lexer
*
* Generates lexemes from a character buffer.
*/
/**@{*/
LexemeList *scanBuffer(const char *, unsigned int, const char *); LexemeList *scanBuffer(const char *, unsigned int, const char *);
/**@}*/
#endif /* __LEXER_H__ */ #endif /* __LEXER_H__ */

2
main.c
View File

@ -3,7 +3,7 @@
* \section license License * \section license License
* *
* lci - a LOLCODE interpreter written in C. * lci - a LOLCODE interpreter written in C.
* Copyright (C) 2010 Justin J. Meza * Copyright (C) 2010-2011 Justin J. Meza
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by

2244
parser.c

File diff suppressed because it is too large Load Diff

975
parser.h

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,6 @@ done
# Remove options, leave arguments # Remove options, leave arguments
shift $((OPTIND - 1)) shift $((OPTIND - 1))
find $2 -name *.lol | sort | xargs -n 1 ./testFile.sh$OPTS $1 && echo "Passed all tests!" && exit 0 find $2 -name *.lol | sort -t'/' -n -k1 -k2 -k3 -k4 -k5 -k6 -k7 -k8 -k9 -k10 | xargs -n 1 ./testFile.sh$OPTS $1 && echo "Passed all tests!" && exit 0
exit 1 exit 1

View File

@ -61,7 +61,7 @@ then
test ! $QUIET && printf "Found output file ($OUTFILE)!\n" test ! $QUIET && printf "Found output file ($OUTFILE)!\n"
fi fi
# Run the test # Run the test
TMPFILE=$(mktemp) && test ! $QUIET && printf "Using temporary output file ($TMPFILE)...\n" TMPFILE=$(mktemp /tmp/temp.XXXX) && test ! $QUIET && printf "Using temporary output file ($TMPFILE)...\n"
eval "$MEMCHK $PROGRAM $TESTFILE $IN > $TMPFILE" eval "$MEMCHK $PROGRAM $TESTFILE $IN > $TMPFILE"
RESULT=$? RESULT=$?
# Check that program exited normally # Check that program exited normally

View File

@ -12,6 +12,7 @@ static const char *keywords[] = {
"NUMBAR", /* TT_NUMBAR */ "NUMBAR", /* TT_NUMBAR */
"TROOF", /* TT_TROOF */ "TROOF", /* TT_TROOF */
"YARN", /* TT_YARN */ "YARN", /* TT_YARN */
"BUKKIT", /* TT_BUKKIT */
"", /* TT_EOF */ "", /* TT_EOF */
"", /* TT_NEWLINE */ "", /* TT_NEWLINE */
"HAI", /* TT_HAI */ "HAI", /* TT_HAI */
@ -67,23 +68,27 @@ static const char *keywords[] = {
"IF U SAY SO", /* TT_IFUSAYSO */ "IF U SAY SO", /* TT_IFUSAYSO */
"FOUND YR", /* TT_FOUNDYR */ "FOUND YR", /* TT_FOUNDYR */
"SRS", /* TT_SRS */ "SRS", /* TT_SRS */
"'Z", /* TT_APOSTROPHEZ */
"BUKKIT", /* TT_BUKKIT */
"" /* TT_ENDOFTOKENS */ "" /* TT_ENDOFTOKENS */
}; };
/** Checks if a string of characters follows the format for an integer. /**
* Specifically, it checks if the string of characters matches the regular * Checks if a string follows the format for an integer. Specifically, it
* expression: [-]?[1-9][0-9]* | 0 * checks if the string matches the regular expression: (-?[1-9][0-9]*|0).
* *
* \retval 0 The string of characters is not an integer. * \param [in] image The string to check.
* \retval 1 The string of characters is an integer. *
* * \retval 0 \a image does not match the pattern for an integer.
* \see isFloat(const char *) *
* \see isString(const char *) * \retval 1 \a image matches the pattern for an integer.
* \see isIdentifier(const char *) */ */
int isInteger(const char *image) /**< [in] The string of characters to compare. */ int isInteger(const char *image)
{ {
const char *cur = image; const char *cur = image;
if (*cur == '-' || (isdigit(*cur) && *cur != '0') || (*cur == '0' && *(cur + 1) == '\0')) { if (*cur == '-'
|| (isdigit(*cur) && *cur != '0')
|| (*cur == '0' && *(cur + 1) == '\0')) {
cur++; cur++;
while (isdigit(*cur)) cur++; while (isdigit(*cur)) cur++;
if (*cur == '\0') return 1; if (*cur == '\0') return 1;
@ -91,17 +96,17 @@ int isInteger(const char *image) /**< [in] The string of characters to compare.
return 0; return 0;
} }
/** Checks if a string of characters follows the format for a floating /**
* point decimal. Specifically, it checks if the string of characters matches * Checks if a string follows the format for a decimal. Specifically, it checks
* the regular expression: [-]?[0-9].[0-9]* * if the string matches the regular expression: (-?[0-9].[0-9]*).
* *
* \retval 0 The string of characters is not a floating point decimal. * \param [in] image The string to check.
* \retval 1 The string of characters is a floating point decimal. *
* * \retval 0 \a image does not match the pattern for a decimal.
* \see isInteger(const char *) *
* \see isString(const char *) * \retval 1 \a image matches the pattern for a decimal.
* \see isIdentifier(const char *) */ */
int isFloat(const char *image) /**< [in] The string of characters to compare. */ int isFloat(const char *image)
{ {
const char *cur = image; const char *cur = image;
if (*cur == '-' || isdigit(*cur)) { if (*cur == '-' || isdigit(*cur)) {
@ -116,33 +121,33 @@ int isFloat(const char *image) /**< [in] The string of characters to compare. */
return 0; return 0;
} }
/** Checks if a string of characters follows the format for a string. /**
* Specifically, it checks if the string of characters begins and ends with a * Checks if a string follows the format for a string literal. Specifically, it
* quote character. * checks if the string matches the regular expression: (".*").
* *
* \retval 0 The string of characters is not a string. * \param [in] image The string to check.
* \retval 1 The string of characters is a string. *
* * \retval 0 \a image does not match the pattern for a string.
* \see isInteger(const char *) *
* \see isFloat(const char *) * \retval 1 \a image matches the pattern for a string.
* \see isIdentifier(const char *) */ */
int isString(const char *image) /**< [in] The string of characters to compare. */ int isString(const char *image)
{ {
size_t len = strlen(image); size_t len = strlen(image);
return (len >= 2 && image[0] == '"' && image[len - 1] == '"'); return (len >= 2 && image[0] == '"' && image[len - 1] == '"');
} }
/** Checks if a string of characters follows the format for an identifier. /**
* Specifically, it checks if the string of characters matches the regular * Checks if a string follows the format for an identifier. Specifically, it
* expression: [a-zA-Z][a-zA-Z0-9_]* * checks if the string matches the regular expression: ([a-zA-Z][a-zA-Z0-9_]*).
* *
* \retval 0 The string of characters is not an identifier. * \param image [in] The string to check.
* \retval 1 The string of characters is an identifier. *
* * \retval 0 \a image does not match the pattern for an identifier.
* \see isInteger(const char *) *
* \see isFloat(const char *) * \retval 1 \a image matches the pattern for an identifier.
* \see isString(const char *) */ */
int isIdentifier(const char *image) /**< [in] The string of characters to compare. */ int isIdentifier(const char *image)
{ {
const char *cur = image; const char *cur = image;
/* First character must be alphabetic */ /* First character must be alphabetic */
@ -155,17 +160,25 @@ int isIdentifier(const char *image) /**< [in] The string of characters to compar
return 1; return 1;
} }
/** Creates a Token structure. /**
* * Creates a token.
* \return A pointer to a Token structure with the desired properties. *
* * \param [in] type The type of token to create.
* \retval NULL malloc was unable to allocate memory. *
* * \param [in] image The string that represents the token.
* \see deleteToken(Token *) */ *
Token *createToken(TokenType type, /**< [in] The type of token to create. */ * \param [in] fname The name of the file containing the token.
const char *image, /**< [in] The characters from the source file that represent the token. */ *
const char *fname, /**< [in] A pointer to the name of the file containing the token. */ * \param [in] line The number of the line containing the token.
unsigned int line) /**< [in] The line number from the source file that the token occurred on. */ *
* \return A pointer to a new token with the desired properties.
*
* \retval NULL Memory allocation failed.
*/
Token *createToken(TokenType type,
const char *image,
const char *fname,
unsigned int line)
{ {
Token *ret = malloc(sizeof(Token)); Token *ret = malloc(sizeof(Token));
if (!ret) { if (!ret) {
@ -180,20 +193,22 @@ Token *createToken(TokenType type, /**< [in] The type of token to create. */
return NULL; return NULL;
} }
strcpy(ret->image, image); strcpy(ret->image, image);
/** \note fname is not copied because it would only one copy is stored /**
* for all Token structures that share it. */ * \note fname is not copied because only one copy is stored for all
* Token structures that share it.
*/
ret->fname = fname; ret->fname = fname;
ret->line = line; ret->line = line;
return ret; return ret;
} }
/** Deletes a Token structure. /**
* * Deletes a token.
* \pre \a token points to a Token structure created by createToken(TokenType, const char *, const char *, unsigned int). *
* * \param [in,out] token The token to delete.
* \post The memory at \a token and all of its elements will be freed. *
* * \post The memory at \a token and all of its members will be freed.
* \see createToken(TokenType, const char *, const char *, unsigned int) */ */
void deleteToken(Token *token) void deleteToken(Token *token)
{ {
if (!token) return; if (!token) return;
@ -201,22 +216,25 @@ void deleteToken(Token *token)
free(token); free(token);
} }
/** Adds a Token to an array of Token structures. /**
* * Adds a token to a list.
* \note \a list may be NULL in which case a new list is created. *
* * \param [in,out] list The list of tokens to add \a token to.
* \pre \a num is the number of elements in \a list. *
* * \param [in,out] num The number of tokens in \a list.
* \post \a token will be added on to the end of \a list and the value at \a num *
* will be updated accordingly. * \param [in] token The token to add to \a list.
* *
* \retval 0 realloc was unable to allocate memory. * \post \a token will be added to the end of \a list and the size of \a list
* \retval 1 \a node was added to \a list. * will be updated.
* *
* \see deleteTokens(Token **) */ * \retval 0 Memory allocation failed.
int addToken(Token ***list, /**< [in,out] A pointer to a pointer to an array of Token structures to add the new Token onto. */ *
unsigned int *num, /**< [in,out] A pointer to the number of elements in \a list. */ * \retval 1 \a token was added to \a list.
Token *token) /**< [in] A pointer to the Token structure to add to \a list. */ */
int addToken(Token ***list,
unsigned int *num,
Token *token)
{ {
unsigned int newsize = *num + 1; unsigned int newsize = *num + 1;
void *mem = realloc(*list, sizeof(Token *) * newsize); void *mem = realloc(*list, sizeof(Token *) * newsize);
@ -233,14 +251,14 @@ int addToken(Token ***list, /**< [in,out] A pointer to a pointer to an array
return 1; return 1;
} }
/** Deletes an array of Token structures. /**
* * Deletes a list of tokens.
* \pre \a list was created by and contains items added by addToken(Token ***, unsigned int *, Token *). *
* * \param list [in,out] The list of tokens to delete.
* \post The memory at \a list and all of its elements will be freed. *
* * \post The memory at \a list and all of its members will be freed.
* \see addToken(Token ***, unsigned int *, Token *) */ */
void deleteTokens(Token **list) /**< [in,out] A pointer to an array of Token structures to be deleted. */ void deleteTokens(Token **list)
{ {
Token **tok = list; Token **tok = list;
while (*tok) { while (*tok) {
@ -250,20 +268,28 @@ void deleteTokens(Token **list) /**< [in,out] A pointer to an array of Token str
free(list); free(list);
} }
/** Tries to match a sequence of lexemes. Scans through \a lexemes starting at /**
* \a start and tries to match space-delimited lexemes from \a match. * Matches lexemes against a string. Traverses \a lexemes starting at \a start
* * and compares lexeme images to space-delimited substrings from \a match.
* \pre \a lexemes was created by scanBuffer(const char *, unsigned int, const char *). *
* * \param lexemes [in] The list of lexemes to match from.
* \return The number of lexemes matched. */ *
unsigned int acceptLexemes(LexemeList *lexemes, /**< [in] A pointer to a LexemeList structure to match lexemes from. */ * \param start [in] The index within \a lexemes to start matching at.
unsigned int start, /**< [in] The position within \a lexemes to start matching at. */ *
const char *match) /**< [in] A pointer to a character array describing the sequence of lexemes to match. */ * \param match [in] A string of space-delimited substrings to match.
*
* \return The number of lexemes matched.
*/
unsigned int acceptLexemes(LexemeList *lexemes,
unsigned int start,
const char *match)
{ {
unsigned int offset = 0; unsigned int offset = 0;
unsigned int n; unsigned int n;
unsigned int i; unsigned int i;
for (n = 0, i = 0; match[n] || lexemes->lexemes[start + offset]->image[i]; n++) { for (n = 0, i = 0;
match[n] || lexemes->lexemes[start + offset]->image[i];
n++) {
if (match[n] == ' ') { if (match[n] == ' ') {
offset++; offset++;
i = 0; i = 0;
@ -276,49 +302,59 @@ unsigned int acceptLexemes(LexemeList *lexemes, /**< [in] A pointer to a LexemeL
return offset + 1; return offset + 1;
} }
/** Checks if a sequence of lexemes is a keyword. \a lexemes is searched /**
* starting at \a start for keywords. If one is found, the appropriate Token * Checks if the next lexemes in a list comprise a keyword and, if so, generates
* structure is created and returned and the value of \a start is incremented * a new token representing that keyword. Specifically, \a lexemes is searched,
* by the number of lexemes matched minus one. * starting at \a start for keywords. If one is found, an appropriate token is
* * created and returned and \a start is incremented by the number of lexemes
* \pre \a lexemes was created by scanBuffer(const char *, unsigned int, const char *). * matched minus one.
* *
* \post If a keyword is not found, \a start will be unmodified. Otherwise, * \param lexemes [in] A list of lexemes to search for keywords in.
* \a start will be incremented by the number of lexemes matched minus *
* one. * \param start [in,out] The position within \a lexemes to begin searching for
* * keywords.
* \return A pointer to a newly created keyword Token structure. *
* * \post If a keyword is not found, \a start will not be modified. Otherwise,
* \retval NULL No keywords were matched or there was an error allocating * \a start will be incremented by the number of lexemes matched minus one.
* memory. */ *
Token *isKeyword(LexemeList *lexemes, /**< [in] A pointer to a LexemeList structure to search for keywords in. */ * \return A pointer to the token containing the matched keyword.
unsigned int *start) /**< [in,out] A pointer to the position within \a lexemes to start checking at. */ *
* \retval NULL No keywords were found or there was an error allocating memory.
*/
Token *isKeyword(LexemeList *lexemes,
unsigned int *start)
{ {
Token *token = NULL; Token *token = NULL;
TokenType type; TokenType type;
const char *fname = lexemes->lexemes[*start]->fname; const char *fname = lexemes->lexemes[*start]->fname;
unsigned int line = lexemes->lexemes[*start]->line; unsigned int line = lexemes->lexemes[*start]->line;
/* For each keyword, */
for (type = 0; type != TT_ENDOFTOKENS; type++) { for (type = 0; type != TT_ENDOFTOKENS; type++) {
unsigned int num = acceptLexemes(lexemes, *start, keywords[type]); /* Check if the start of lexemes match */
unsigned int num = acceptLexemes(lexemes,
*start, keywords[type]);
if (!num) continue; if (!num) continue;
/* If so, create a new token for the keyword */
token = createToken(type, keywords[type], fname, line); token = createToken(type, keywords[type], fname, line);
/* And advance the start */
*start += (num - 1); *start += (num - 1);
break; break;
} }
return token; return token;
} }
/** Converts a list of lexemes into tokens. Additionally parses the literal /**
* values of integers, floating point decimals, and strings. * Converts a list of lexemes into tokens. Also parses integers, floats, and
* * strings into tokens with semantic meaning.
* \pre \a list was created by scanBuffer(const char *, unsigned int, const char *). *
* * \param list [in] A list of lexemes to tokenize.
* \return A pointer to an array of Token structures representing the tokenized *
* form of the input lexeme stream. * \return A list of tokens generated from \a list.
* *
* \retval NULL An unrecognized token was encountered or memory allocation * \retval NULL An unrecognized token was encounteres or memory allocation
* failed. */ * failed.
Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList structure to tokenize. */ */
Token **tokenizeLexemes(LexemeList *list)
{ {
void *mem = NULL; void *mem = NULL;
Token **ret = NULL; Token **ret = NULL;
@ -368,8 +404,8 @@ Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList st
continue; continue;
} }
/* Newline */ /* Newline */
/* Note that the spec is unclear as to whether a command *must* follow /* Note that the spec is unclear as to whether a command *must*
* a comma. For now, we let commas end a line. */ * follow a comma. For now, we let commas end a line. */
else if (!strcmp(image, "\n")) { else if (!strcmp(image, "\n")) {
/* Note that we ignore any initial newlines */ /* Note that we ignore any initial newlines */
if (retsize < 1) { if (retsize < 1) {
@ -392,8 +428,8 @@ Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList st
else if ((token = isKeyword(list, &n))) { else if ((token = isKeyword(list, &n))) {
} }
/* Identifier */ /* Identifier */
/* This must be placed after keyword parsing because most /* This must be placed after keyword parsing or else most
* keywords look like identifiers. */ * keywords would be tokenized as identifiers. */
else if (isIdentifier(image)) { else if (isIdentifier(image)) {
token = createToken(TT_IDENTIFIER, image, fname, line); token = createToken(TT_IDENTIFIER, image, fname, line);
} }

View File

@ -1,14 +1,16 @@
/** Structures and functions for grouping lexemes into tokens. The tokenizer /**
* reads through an array of lexemes (generated by the lexer) and groups them * Structures and functions for grouping lexemes into tokens. The tokenizer
* into tokens based on their structure. In addition, some lexemes with * reads through an array of lexemes (generated by the lexer) and groups them
* semantic meaning (such as integers, floats, strings, and booleans) will have * into tokens based on their structure. In addition, some lexemes with
* their values extracted and stored. * semantic meaning (such as integers, floats, strings, and booleans) will have
* * their values extracted and stored.
* \file tokenizer.h *
* * \file tokenizer.h
* \author Justin J. Meza *
* * \author Justin J. Meza
* \date 2010 */ *
* \date 2010-2011
*/
#ifndef __TOKENIZER_H__ #ifndef __TOKENIZER_H__
#define __TOKENIZER_H__ #define __TOKENIZER_H__
@ -21,107 +23,138 @@
#undef DEBUG #undef DEBUG
/** Denotes the type of token present. All of the token type names are /**
* self-explainatory and correspond to either the semantic type of token data * Represents a token type. All of the token type names correspond to either
* (in the case of TT_INTEGER, TT_FLOAT, TT_STRING, or TT_IDENTIFIER) or the * the semantic type of token data or the lexemes which make up the particular
* lexemes which make up the particular token. * token.
* *
* \note Remember to update the keywords array with the token image. */ * \note Remember to update the keywords array (in the tokens C file) with the
* token image.
*/
typedef enum { typedef enum {
TT_INTEGER, TT_INTEGER, /**< Integer literal. */
TT_FLOAT, TT_FLOAT, /**< Decimal literal. */
TT_STRING, TT_STRING, /**< String literal. */
TT_IDENTIFIER, TT_IDENTIFIER, /**< Identifier literal. */
TT_BOOLEAN, TT_BOOLEAN, /**< Boolean literal. */
TT_IT, TT_IT, /**< \ref impvar "Implicit variable". */
TT_NOOB, TT_NOOB, /**< Nil keyword. */
TT_NUMBR, TT_NUMBR, /**< Integer keyword. */
TT_NUMBAR, TT_NUMBAR, /**< Decimal keyword. */
TT_TROOF, TT_TROOF, /**< Boolean keyword. */
TT_YARN, TT_YARN, /**< String keyword. */
TT_EOF, TT_BUKKIT, /**< Array. */
TT_NEWLINE, TT_EOF, /**< End of file. */
TT_HAI, TT_NEWLINE, /**< Newline. */
TT_KTHXBYE, TT_HAI, /**< Beginning of main block. */
TT_HASA, TT_KTHXBYE, /**< End of main block. */
TT_ITZA, TT_HASA, /**< Variable declaration. */
TT_ITZ, TT_ITZA, /**< Variable type initialization. */
TT_RNOOB, TT_ITZ, /**< Variable value initialization. */
TT_R, TT_RNOOB, /**< Deallocation. */
TT_ANYR, TT_R, /**< Assignment. */
TT_AN, TT_ANYR, /**< User-defined function argument separator. */
TT_SUMOF, TT_AN, /**< Built-in function argument separator. */
TT_DIFFOF, TT_SUMOF, /**< Addition. */
TT_PRODUKTOF, TT_DIFFOF, /**< Subtraction. */
TT_QUOSHUNTOF, TT_PRODUKTOF, /**< Multiplication. */
TT_MODOF, TT_QUOSHUNTOF, /**< Division. */
TT_BIGGROF, TT_MODOF, /**< Modulo. */
TT_SMALLROF, TT_BIGGROF, /**< Greater than. */
TT_BOTHOF, TT_SMALLROF, /**< Less than. */
TT_EITHEROF, TT_BOTHOF, /**< Logical AND. */
TT_WONOF, TT_EITHEROF, /**< Logical OR. */
TT_NOT, TT_WONOF, /**< Logical XOR. */
TT_MKAY, TT_NOT, /**< Logical NOT. */
TT_ALLOF, TT_MKAY, /**< Infinite arity argument delimiter. */
TT_ANYOF, TT_ALLOF, /**< Infinite arity logical AND. */
TT_BOTHSAEM, TT_ANYOF, /**< Infinite arity logical OR. */
TT_DIFFRINT, TT_BOTHSAEM, /**< Equality. */
TT_MAEK, TT_DIFFRINT, /**< Inequality. */
TT_A, TT_MAEK, /**< Cast. */
TT_ISNOWA, TT_A, /**< Cast target separator. */
TT_VISIBLE, TT_ISNOWA, /**< In-place cast. */
TT_SMOOSH, TT_VISIBLE, /**< Print. */
TT_BANG, TT_SMOOSH, /**< String concatenation. */
TT_GIMMEH, TT_BANG, /**< Exclamation point (!) */
TT_ORLY, TT_GIMMEH, /**< Input. */
TT_YARLY, TT_ORLY, /**< Conditional. */
TT_MEBBE, TT_YARLY, /**< True branch. */
TT_NOWAI, TT_MEBBE, /**< Else branch. */
TT_OIC, TT_NOWAI, /**< False branch. */
TT_WTF, TT_OIC, /**< Conditional and switch delimiter. */
TT_OMG, TT_WTF, /**< Switch. */
TT_OMGWTF, TT_OMG, /**< Case. */
TT_GTFO, TT_OMGWTF, /**< Default case. */
TT_IMINYR, TT_GTFO, /**< Break or return without value. */
TT_UPPIN, TT_IMINYR, /**< Loop beginning. */
TT_NERFIN, TT_UPPIN, /**< Auto increment loop variable. */
TT_YR, TT_NERFIN, /**< Auto decrement loop variable. */
TT_TIL, TT_YR, /**< Function name delimiter. */
TT_WILE, TT_TIL, /**< Do until. */
TT_IMOUTTAYR, TT_WILE, /**< Do while. */
TT_HOWIZ, TT_IMOUTTAYR, /**< Loop ending. */
TT_IZ, TT_HOWIZ, /**< Function definition beginning. */
TT_IFUSAYSO, TT_IZ, /**< Function scope delimiter. */
TT_FOUNDYR, TT_IFUSAYSO, /**< Function definition end. */
TT_SRS, TT_FOUNDYR, /**< Return with value. */
TT_ENDOFTOKENS TT_SRS, /**< Indirect variable access. */
TT_APOSTROPHEZ, /**< Array slot access ('Z). */
TT_ENDOFTOKENS /**< The end of this enum -- don't move it! */
} TokenType; } TokenType;
/** Stores the data associated with a Token structure. */ /**
* Stores token data with semantic meaning.
*/
typedef union { typedef union {
int i; /**< Integer data. */ int i; /**< Integer data. */
float f; /**< Floating point data. */ float f; /**< Decimal data. */
} TokenData; } TokenData;
/** Stores a token and any value parsed by the tokenizer. */ /**
* Stores a token type and any parsed values.
*/
typedef struct { typedef struct {
TokenType type; /**< The type of token. */ TokenType type; /**< The type of token. */
TokenData data; /**< The stored data of type \a type. */ TokenData data; /**< The stored data of type \a type. */
char *image; /**< The array of characters from the lexer which correspond to the token. */ char *image; /**< The characters that comprise the token. */
const char *fname; /**< A pointer to the name of the file containing the token. */ const char *fname; /**< The name of the file containing the token. */
unsigned int line; /**< The line number from the source file that the token occurred on. */ unsigned int line; /**< The line number the token was on. */
} Token; } Token;
/**
* \name Utilities
*
* Functions for performing helper tasks.
*/
/**@{*/
int isInteger(const char *); int isInteger(const char *);
int isFloat(const char *); int isFloat(const char *);
int isString(const char *); int isString(const char *);
int isIdentifier(const char *); int isIdentifier(const char *);
Token *isKeyword(LexemeList *, unsigned int *);
/**@}*/
/**
* \name Token modifiers
*
* Functions for creating and deleting tokens.
*/
/**@{*/
Token *createToken(TokenType, const char *, const char *, unsigned int); Token *createToken(TokenType, const char *, const char *, unsigned int);
void deleteToken(Token *); void deleteToken(Token *);
int addToken(Token ***, unsigned int *, Token*); int addToken(Token ***, unsigned int *, Token*);
void deleteTokens(Token **); void deleteTokens(Token **);
unsigned int acceptLexemes(LexemeList *, unsigned int, const char *); unsigned int acceptLexemes(LexemeList *, unsigned int, const char *);
Token *isKeyword(LexemeList *, unsigned int *); /**@}*/
/**
* \name Lexeme tokenizer
*
* Generates tokens from lexemes.
*/
/**@{*/
Token **tokenizeLexemes(LexemeList *); Token **tokenizeLexemes(LexemeList *);
/**@}*/
#endif /* __TOKENIZER_H__ */ #endif /* __TOKENIZER_H__ */

View File

@ -36860,15 +36860,22 @@ static const long codepoints[] = {
#define NUM_UNICODE 18426 #define NUM_UNICODE 18426
/** Performs a binary search on an array of strings. /**
* * Performs a binary search on an array of strings.
* \return The index of the matching entry, if found. *
* * \param [in] strings The array of string to search.
* \retval -1 The entry does not exist in the array. */ * \param [in] start The index to start searching at.
int binarySearch(const char **strings, /**< [in] A pointer to an array of character strings to search through. */ * \param [in] end The index to end searching at.
int start, /**< [in] The start of the range to search through. */ * \param [in] find The string to search for.
int end, /**< [in] The end of the range to search through. */ *
const char *find) /**< [in] The entry to search for. */ * \return The index of the matching string, if found.
*
* \retval -1 The string was not found in the array.
*/
int binarySearch(const char **strings,
int start,
int end,
const char *find)
{ {
int midpoint; int midpoint;
int cmp; int cmp;
@ -36884,12 +36891,16 @@ int binarySearch(const char **strings, /**< [in] A pointer to an array of charac
return -1; return -1;
} }
/** Converts a Unicode normative name to a Unicode code point. /**
* * Converts a Unicode normative name to a Unicode code point.
* \return The Unicode code point corresponding to the given Unicode name. *
* * \param [in] name The Unicode normative name to convert.
* \retval -1 An invalid Unicode normative name was supplied. */ *
long convertNormativeNameToCodePoint(const char *name) /**< [in] A pointer to a string of characters representing the Unicode normative name desired. */ * \return The Unicode code point corresponding to \a name.
*
* \retval -1 An invalid Unicode normative name was supplied.
*/
long convertNormativeNameToCodePoint(const char *name)
{ {
int index = binarySearch(names, 0, NUM_UNICODE - 1, name); int index = binarySearch(names, 0, NUM_UNICODE - 1, name);
if (index < 0) { if (index < 0) {
@ -36900,14 +36911,18 @@ long convertNormativeNameToCodePoint(const char *name) /**< [in] A pointer to a
return codepoints[index]; return codepoints[index];
} }
/** Converts the bits in a long integer representing a Unicode code point to a /**
* series of one or more bytes representing a UTF-8 character. * Converts a Unicode code point to a UTF-8 character.
* *
* \return The number of characters in the converted multi-byte character. * \param [in] codepoint The Unicode code point to convert to UTF-8.
* * \param [out] out A pointer to the location to store the UTF-8 character.
* \retval 0 An invalid Unicode code point was supplied. */ *
size_t convertCodePointToUTF8(unsigned long codepoint, /**< [in] The Unicode code point to convert to UTF-8. */ * \return The length of the converted multi-byte UTF-8 character.
char *out) /**< [out] A pointer to the location to store the resulting UTF-8 bytes. */ *
* \retval 0 An invalid Unicode code point was supplied.
*/
size_t convertCodePointToUTF8(unsigned long codepoint,
char *out)
{ {
/* Out of range */ /* Out of range */
if (codepoint > 0x10FFFF) { if (codepoint > 0x10FFFF) {

View File

@ -1,11 +1,13 @@
/** Data and functions for converting from Unicode normative names to /**
* code points. * Data and functions for converting from Unicode normative names to Unicode
* * code points.
* \file unicode.h *
* * \file unicode.h
* \author Justin J. Meza *
* * \author Justin J. Meza
* \date 2010 */ *
* \date 2010-2011
*/
#ifndef __UNICODE_H__ #ifndef __UNICODE_H__
#define __UNICODE_H__ #define __UNICODE_H__