Added basic array functionality and cleaned up documentation.

This commit is contained in:
Justin J. Meza 2011-06-14 23:54:12 -07:00
parent 977e3dfde5
commit a6ef5811e8
17 changed files with 5218 additions and 4813 deletions

1606
Doxyfile

File diff suppressed because it is too large Load Diff

View File

@ -13,21 +13,24 @@ testdir = ./test
all: $(TARGET)
$(TARGET): $(OBJS) $(LIBS)
$(TARGET): $(OBJS)
$(CC) $(CPPFLAGS) -o $(TARGET) $(OBJS) $(LIBS)
pedantic: $(OBJS) $(LIBS)
$(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -Wall -ansi -pedantic -g -o $(TARGET) $(SRCS) $(HDRS) $(LIBS)
$(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -Wall -ansi -pedantic -o $(TARGET) $(SRCS) $(HDRS) $(LIBS)
lint: all
$(LINT) $(SRCS)
debug: $(OBJS) $(LIBS)
$(CC) -g -o $(TARGET) $(SRCS) $(LIBS)
check: all
@cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.3-Tests/
@cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.3-Tests
check-mem: all
@echo "This will take a long time! Be patient!"
@cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.3-Tests/
@cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.3-Tests
install: all
$(INSTALL) $(TARGET) $(bindir)/$(TARGET)

10
README
View File

@ -2,7 +2,7 @@
LICENSE
Copyright (C) 2010 Justin J. Meza
Copyright (C) 2010-2011 Justin J. Meza
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -22,15 +22,15 @@ ABOUT
lci is a LOLCODE interpreter written in C and is designed to be correct,
portable, fast, and precisely documented.
* correct: Every effort has been made to test lci's conformance to the
- correct: Every effort has been made to test lci's conformance to the
LOLCODE language specification. Unit tests come packaged with the lci
source code.
* portable: lci follows the widely ported ANSI C specification allowing it
- portable: lci follows the widely ported ANSI C specification allowing it
to compile on a broad range of systems.
* fast: Much effort has gone into producing simple and efficient code
- fast: Much effort has gone into producing simple and efficient code
whenever possible to the extent that the above points are not
compromized.
* precisely documented: lci uses Doxygen to generate literate code
- precisely documented: lci uses Doxygen to generate literate code
documentation, browsable here.
This project's homepage is at http://icanhaslolcode.org. For help, visit

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,15 @@
/** Structures and functions for interpreting a parse tree. The interpreter
* traverses a parse tree in a depth-first manner, interpreting each node it
* reaches along the way. This is the last stage of the processing of a source
* code file.
*
* \file interpreter.h
*
* \author Justin J. Meza
*
* \date 2010 */
/**
* Structures and functions for interpreting a parse tree. The interpreter
* traverses a parse tree in a depth-first manner, interpreting each node it
* reaches along the way. This is the last stage of the processing of a source
* code file.
*
* \file interpreter.h
*
* \author Justin J. Meza
*
* \date 2010-2011
*/
#ifndef __INTERPRETER_H__
#define __INTERPRETER_H__
@ -19,127 +21,221 @@
#include "parser.h"
#include "unicode.h"
/** Gets the integer data associated with a ValueObject structure. */
/**
* Retrieves a value's integer data.
*/
#define getInteger(value) (value->data.i)
/** Gets the floating point data associated with a ValueObject structure. */
/**
* Retrieves a value's decimal data.
*/
#define getFloat(value) (value->data.f)
/** Gets the string data associated with a ValueObject structure. */
/**
* Retrieves a value's string data.
*/
#define getString(value) (value->data.s)
/** Gets the function definition associated with a ValueObject structure. */
/**
* Retrieves a value's function data.
*/
#define getFunction(value) (value->data.fn)
/** Denotes the type of a value. */
/**
* Retrieves a value's array data.
*/
#define getArray(value) (value->data.a)
/**
* Represents a value type.
*/
typedef enum {
VT_INTEGER, /**< An integer value. */
VT_FLOAT, /**< A floating point decimal value. */
VT_BOOLEAN, /**< A true/false value. */
VT_STRING, /**< A character string value. */
VT_FLOAT, /**< A decimal value. */
VT_BOOLEAN, /**< A boolean value. */
VT_STRING, /**< A string value. */
VT_NIL, /**< Represents no value. */
VT_FUNC /**< A function. */
VT_FUNC, /**< A function. */
VT_ARRAY /**< An array. */
} ValueType;
/** Stores the data associated with a ValueObject structure. */
/**
* Stores value data.
*/
typedef union {
int i; /**< Integer data. */
float f; /**< Floating point data. */
char *s; /**< Character string data. */
FuncDefStmtNode *fn; /**< Function definition. */
int i; /**< Integer data. */
float f; /**< Decimal data. */
char *s; /**< String data. */
FuncDefStmtNode *fn; /**< Function data. */
struct scopeobject *a; /**< Array data. */
} ValueData;
/** Increments the semaphore of a ValueObject structure. */
/**
* Increments a value's semaphore.
*/
#define V(value) (value->semaphore++)
/** Decrements the semaphore of a ValueObject structure. */
/**
* Decrements a value's semaphore.
*/
#define P(value) (value->semaphore--)
/** Stores a value.
*
* \see copyValueObject(ValueObject *)
* \see deleteValueObject(ValueObject *) */
/**
* Stores a value.
*/
typedef struct {
ValueType type; /**< The type of value stored. */
ValueData data; /**< The stored data. */
ValueData data; /**< The value data. */
unsigned short semaphore; /**< A semaphore for value usage. */
} ValueObject;
/** Denotes the type of return encountered. */
/**
* Represents the return type.
*/
typedef enum {
RT_DEFAULT, /**< A block of code returned after evaluating all of its statements. */
RT_BREAK, /**< A block of code within a LoopStmtNode or SwitchStmtNode returned via a break statement. */
RT_RETURN /**< A block of code within a FuncDefStmtNode called by a FuncCallExprNode returned (either with or without a value). */
RT_DEFAULT, /**< Code block completed successfully. */
RT_BREAK, /**< Broke out of a loop or switch statement. */
RT_RETURN /**< Returned from a function. */
} ReturnType;
/** Stores a return state. Returns are encountered when
* - a block of code evaluates all of its statements,
* - a block of code within a LoopStmt or SwitchStmt encountered a break statement, or
* - a block of code within a FunctionDefStmt called by a FunctionCallExpr encounters a ReturnStmt. */
/**
* Stores return state.
*/
typedef struct {
ReturnType type; /**< The type of return encountered. */
ValueObject *value; /**< The optional return value. */
} ReturnObject;
/** Stores the variables in a particular scope. Scopes are arranged
* heirarchically from global (the ancestor of all other scopes) to local (the
* temporary scope of a BlockNode).
*
* \see createScopeObject(ScopeObject *)
* \see deleteScopeObject(ScopeObject *) */
/**
* Stores a set of variables hierarchically.
*/
typedef struct scopeobject {
struct scopeobject *parent; /**< A pointer to the parent ScopeObject. */
ValueObject *impvar; /**< A pointer to the ValueObject representing the implicit variable for this scope. */
unsigned int numvals; /**< The number of ValueObject structures in \a values. */
char **names; /**< A pointer to the array of character strings naming the values in the scope. */
ValueObject **values; /**< A pointer to an array of ValueObject structures in the scope. */
struct scopeobject *parent; /**< The parent scope. */
ValueObject *impvar; /**< The \ref impvar "implicit variable". */
unsigned int numvals; /**< The number of values in the scope. */
char **names; /**< The names of the values. */
ValueObject **values; /**< The values in the scope. */
} ScopeObject;
char *createString(char *);
/**
* \name Utilities
*
* Functions for performing helper tasks.
*/
/**@{*/
void printInterpreterError(const char *, IdentifierNode *, ScopeObject *);
char *copyString(char *);
unsigned int isDecString(const char *);
unsigned int isHexString(const char *);
char *resolveIdentifierName(IdentifierNode *, ScopeObject *);
/**@}*/
/**
* \name Value object modifiers
*
* Functions for creating, copying, and deleting value objects.
*/
/**@{*/
ValueObject *createNilValueObject(void);
ValueObject *createBooleanValueObject(int);
ValueObject *createIntegerValueObject(int);
ValueObject *createFloatValueObject(float);
ValueObject *createStringValueObject(char *);
ValueObject *createFunctionValueObject(FuncDefStmtNode *);
ValueObject *createArrayValueObject(ScopeObject *);
ValueObject *copyValueObject(ValueObject *);
void deleteValueObject(ValueObject *);
ReturnObject *createReturnObject(ReturnType, ValueObject *);
void deleteReturnObject(ReturnObject *);
char *resolveIdentifierName(IdentifierNode *, ScopeObject *);
/**@}*/
/**
* \name Scope object modifiers
*
* Functions for manipulating scope objects and their data.
*/
/**@{*/
ScopeObject *createScopeObject(ScopeObject *);
void deleteScopeObject(ScopeObject *);
ValueObject *getScopeValue(ScopeObject *, IdentifierNode *);
ValueObject *getLocalScopeValue(ScopeObject *, IdentifierNode *);
ValueObject *createScopeValue(ScopeObject *, IdentifierNode *);
ValueObject *updateScopeValue(ScopeObject *, IdentifierNode *, ValueObject *);
void deleteScopeValue(ScopeObject *, IdentifierNode *);
unsigned int isNumString(const char *);
unsigned int isHexString(const char *);
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *);
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *);
ValueObject *castStringExplicit(ValueObject *, ScopeObject *);
ValueObject *createScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
ValueObject *updateScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *, ValueObject *);
ValueObject *getScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
ValueObject *getScopeValueArray(ScopeObject *, ScopeObject *, IdentifierNode *);
ValueObject *getScopeValueLocal(ScopeObject *, ScopeObject *, IdentifierNode *);
ScopeObject *getScopeObject(ScopeObject *, ScopeObject *, IdentifierNode *);
void deleteScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
/**@}*/
/**
* \name Return object modifiers
*
* Functions for creating and deleting return objects.
*/
/**@{*/
ReturnObject *createReturnObject(ReturnType, ValueObject *);
void deleteReturnObject(ReturnObject *);
/**@}*/
/**
* \name Casts
*
* Functions for performing casts between different types of values.
*/
/**@{*/
ValueObject *castBooleanImplicit(ValueObject *, ScopeObject *);
ValueObject *castIntegerImplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatImplicit(ValueObject *, ScopeObject *);
ValueObject *castStringImplicit(ValueObject *, ScopeObject *);
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *);
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *);
ValueObject *castStringExplicit(ValueObject *, ScopeObject *);
/**@}*/
/**
* \name Node interpreters
*
* Functions for interpreting basic parse tree nodes.
*/
/**@{*/
ValueObject *interpretExprNode(ExprNode *, ScopeObject *);
ReturnObject *interpretStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretStmtNodeList(StmtNodeList *, ScopeObject *);
ReturnObject *interpretBlockNode(BlockNode *, ScopeObject *);
int interpretMainNode(MainNode *);
/**@}*/
/**
* \name Expression interpreters
*
* Functions for interpreting expression parse tree nodes.
*/
/**@{*/
ValueObject *interpretImpVarExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretCastExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretFuncCallExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretIdentifierExprNode(ExprNode *, ScopeObject *);
ValueObject *interpretConstantExprNode(ExprNode *, ScopeObject *);
/**@}*/
/**
* \name Operation interpreters
*
* Functions for interpreting operation parse tree nodes.
*/
/**@{*/
ValueObject *interpretNotOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretArithOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretBoolOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretEqualityOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretConcatOpExprNode(OpExprNode *, ScopeObject *);
ValueObject *interpretOpExprNode(ExprNode *, ScopeObject *);
/**@}*/
/**
* \name Statement interpreters
*
* Functions for interpreting statement parse tree nodes.
*/
/**@{*/
ReturnObject *interpretCastStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretPrintStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretInputStmtNode(StmtNode *, ScopeObject *);
@ -153,7 +249,14 @@ ReturnObject *interpretLoopStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretDeallocationStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretFuncDefStmtNode(StmtNode *, ScopeObject *);
ReturnObject *interpretExprStmtNode(StmtNode *, ScopeObject *);
/**@}*/
/**
* \name Arithmetic operations (integer-integer)
*
* Functions for performing integer-integer operations on values.
*/
/**@{*/
ValueObject *opAddIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opSubIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMultIntegerInteger(ValueObject *, ValueObject *);
@ -161,7 +264,14 @@ ValueObject *opDivIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMaxIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opMinIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opModIntegerInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Arithmetic operations (integer-float)
*
* Functions for performing integer-float operations on values.
*/
/**@{*/
ValueObject *opAddIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opSubIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMultIntegerFloat(ValueObject *, ValueObject *);
@ -169,7 +279,14 @@ ValueObject *opDivIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMaxIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opMinIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opModIntegerFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Arithmetic operations (float-integer)
*
* Functions for performing float-integer operations on values.
*/
/**@{*/
ValueObject *opAddFloatInteger(ValueObject *, ValueObject *);
ValueObject *opSubFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMultFloatInteger(ValueObject *, ValueObject *);
@ -177,7 +294,14 @@ ValueObject *opDivFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMaxFloatInteger(ValueObject *, ValueObject *);
ValueObject *opMinFloatInteger(ValueObject *, ValueObject *);
ValueObject *opModFloatInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Arithmetic operations (float-float)
*
* Functions for performing float-float operations on values.
*/
/**@{*/
ValueObject *opAddFloatFloat(ValueObject *, ValueObject *);
ValueObject *opSubFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMultFloatFloat(ValueObject *, ValueObject *);
@ -185,26 +309,76 @@ ValueObject *opDivFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMaxFloatFloat(ValueObject *, ValueObject *);
ValueObject *opMinFloatFloat(ValueObject *, ValueObject *);
ValueObject *opModFloatFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (boolean-boolean)
*
* Functions for performing boolean-boolean operations on values.
*/
/**@{*/
ValueObject *opEqBooleanBoolean(ValueObject *, ValueObject *);
ValueObject *opNeqBooleanBoolean(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (integer-integer)
*
* Functions for performing integer-integer operations on values.
*/
/**@{*/
ValueObject *opEqIntegerInteger(ValueObject *, ValueObject *);
ValueObject *opNeqIntegerInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (integer-float)
*
* Functions for performing integer-float operations on values.
*/
/**@{*/
ValueObject *opEqIntegerFloat(ValueObject *, ValueObject *);
ValueObject *opNeqIntegerFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (float-integer)
*
* Functions for performing float-integer operations on values.
*/
/**@{*/
ValueObject *opEqFloatInteger(ValueObject *, ValueObject *);
ValueObject *opNeqFloatInteger(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (float-float)
*
* Functions for performing float-float operations on values.
*/
/**@{*/
ValueObject *opEqFloatFloat(ValueObject *, ValueObject *);
ValueObject *opNeqFloatFloat(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (string-string)
*
* Functions for performing string-string operations on values.
*/
/**@{*/
ValueObject *opEqStringString(ValueObject *, ValueObject *);
ValueObject *opNeqStringString(ValueObject *, ValueObject *);
/**@}*/
/**
* \name Equality operations (nil-nil)
*
* Functions for performing nil-nil operations on values.
*/
/**@{*/
ValueObject *opEqNilNil(ValueObject *, ValueObject *);
ValueObject *opNeqNilNil(ValueObject *, ValueObject *);
/**@}*/
#endif /* __INTERPRETER_H__ */

BIN
lci.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

185
lexer.c
View File

@ -1,15 +1,19 @@
#include "lexer.h"
/** Creates a Lexeme structure.
*
* \return A pointer to a Lexeme structure with the desired properties.
*
* \retval NULL malloc was unable to allocate memory.
*
* \see deleteLexeme(Lexeme *) */
Lexeme *createLexeme(char *image, /**< [in] An array of characters that describe the lexeme. */
const char *fname, /**< [in] A pointer to the name of the file containing the lexeme. */
unsigned int line) /**< [in] The line number from the source file that the lexeme occurred on. */
/**
* Creates a lexeme.
*
* \param [in] image The string that identifies the lexeme.
*
* \param [in] fname The name of the file containing the lexeme.
*
* \param [in] line The line number the lexeme occurred on.
*
* \return A new lexeme with the desired properties.
*
* \retval NULL Memory allocation failed.
*/
Lexeme *createLexeme(char *image, const char *fname, unsigned int line)
{
Lexeme *ret = malloc(sizeof(Lexeme));
if (!ret) {
@ -23,8 +27,11 @@ Lexeme *createLexeme(char *image, /**< [in] An array of characters that de
return NULL;
}
strcpy(ret->image, image);
/** \note fname is not copied because it would only one copy is stored
* for all Lexeme structures that share it. */
/**
* \note \a fname is not copied because it only one copy is stored for
* all lexemes from the same file. This is simply to avoid large
* numbers of lexemes storing duplicate file name strings.
*/
ret->fname = fname;
ret->line = line;
#ifdef DEBUG
@ -33,29 +40,29 @@ Lexeme *createLexeme(char *image, /**< [in] An array of characters that de
return ret;
}
/** Deletes a Lexeme structure.
*
* \pre \a lexeme points to a Lexeme structure created by createLexeme(char *, const char *, unsigned int).
*
* \post The memory at \a lexeme and all of its elements will be freed.
*
* \see createLexeme(char *, const char *, unsigned int) */
/**
* Deletes a lexeme.
*
* \param [in,out] lexeme The lexeme to delete.
*/
void deleteLexeme(Lexeme *lexeme)
{
if (!lexeme) return;
free(lexeme->image);
/** \note We do not free (*lex)->fname because it is shared between many
* Lexeme structures and is free'd by whoever created them. */
/**
* \note We do not free the file name because it is shared between many
* lexemes and is freed by whomever created the file name string.
*/
free(lexeme);
}
/** Creates a LexemeList structure.
*
* \return A pointer to a LexemeList structure with the desired properties.
*
* \retval NULL malloc was unable to allocate memory.
*
* \see deleteLexemeList(LexemeList *) */
/**
* Creates a list of lexemes.
*
* \return An empty lexeme list.
*
* \retval NULL Memory allocation failed.
*/
LexemeList *createLexemeList(void)
{
LexemeList *p = malloc(sizeof(LexemeList));
@ -68,20 +75,21 @@ LexemeList *createLexemeList(void)
return p;
}
/** Adds a Lexeme structure to a LexemeList structure.
*
* \pre \a list was created by createLexemeList(void).
* \pre \a lexeme was created by createLexeme(char *, const char *, unsigned int).
*
* \post \a lexeme will be added on to the end of \a list and the size of
* \a list will be updated accordingly.
*
* \return A pointer to the added Lexeme structure (will be the same as
* \a lexeme).
*
* \retval NULL realloc was unable to allocate memory. */
Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList structure to add \a lex to. */
Lexeme *lexeme) /**< [in] A pointer to the Lexeme structure to add to \a list. */
/**
* Adds a lexeme to a list of lexemes.
*
* \param [in,out] list The list of lexemes to add \a lexeme to.
*
* \param [in] lexeme The lexeme to add to \a list.
*
* \post \a lexeme will be added to the end of \a list and the size of \a list
* will be updated.
*
* \return A pointer to the added lexeme (will be the same as \a lexeme).
*
* \retval NULL Memory allocation failed.
*/
Lexeme *addLexeme(LexemeList *list, Lexeme *lexeme)
{
unsigned int newsize;
void *mem = NULL;
@ -98,16 +106,14 @@ Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList st
return lexeme;
}
/** Deletes a LexemeList structure.
*
* \pre \a list was created by createLexemeList(void) and contains
* items added by addLexeme(LexemeList *, Lexeme *).
*
* \post The memory at \a list and any of its associated members will be
* freed.
*
* \see createLexemeList(void) */
void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeList structure to delete. */
/**
* Deletes a list of lexemes.
*
* \param [in,out] list The lexeme list to delete.
*
* \post The memory at \a list and all of its members will be freed.
*/
void deleteLexemeList(LexemeList *list)
{
unsigned int n;
if (!list) return;
@ -117,37 +123,39 @@ void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeLis
free(list);
}
/** Scans through a character buffer, removing unecessary characters and
* generating lexemes. Lexemes are separated by whitespace (but newline
* characters are kept as their own lexeme). String literals are handled a
* bit differently: starting at the first quotation character, characters are
* collected until either an unescaped quotation character is read (that is, a
* quotation character not preceeded by a colon which itself is not proceeded
* by a colon) or a newline or carriage return character is read, whichever
* comes first. This handles the odd case of strings such as "::" which print
* out a single colon. Also handled are the effects of commas, ellipses, and
* bangs (!).
*
* \pre \a size is the number of characters starting at the memory location
* pointed to by \a buffer.
*
* \return A pointer to a LexemeList structure. */
LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to tokenize. */
unsigned int size, /**< [in] The number of characters in \a buffer. */
const char *fname) /**< [in] An array of characters representing the name of the file used to read \a buffer. */
/**
* Scans a buffer, removing unnecessary characters and grouping characters into
* lexemes. Lexemes are strings of characters separated by whitespace (although
* newline characters are considered separate lexemes). String literals are
* handled a bit differently: Starting at the first quotation character,
* characters are collected until either a non-escaped quotation character is
* read (i.e., a quotation character not preceded by a colon which itself is not
* preceded by a colon) or a newline or carriage return character is read,
* whichever comes first. This handles the odd (but possible) case of strings
* such as "::" which print out a single colon. Also handled are the effects of
* commas, ellipses, bangs (!), and array accesses ('Z).
*
* \param [in] buffer The characters to turn into lexemes.
*
* \param [in] size The number of characters in \a buffer.
*
* \param [in] fname The name of the file \a buffer was read from.
*
* \return A list of lexemes created from the contents of \a buffer.
*/
LexemeList *scanBuffer(const char *buffer, unsigned int size, const char *fname)
{
const char *start = buffer;
LexemeList *list = NULL;
unsigned int line = 1;
Lexeme *lex = NULL;
list = createLexemeList();
if (!list) return NULL;
while (start < buffer + size) {
char *temp = NULL;
size_t len = 1;
unsigned int len = 1;
/* Comma (,) is a soft newline */
if (*start == ',') {
lex = createLexeme("\n", fname, line);
Lexeme *lex = createLexeme("\n", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
@ -162,7 +170,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
}
/* Bang (!) is its own lexeme */
if (*start == '!') {
lex = createLexeme("!", fname, line);
Lexeme *lex = createLexeme("!", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
@ -175,6 +183,21 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
start++;
continue;
}
/* Apostrophe Z ('Z) is its own lexeme */
if (!strncmp(start, "'Z", 2)) {
Lexeme *lex = createLexeme("'Z", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
}
if (!addLexeme(list, lex)) {
deleteLexeme(lex);
deleteLexemeList(list);
return NULL;
}
start += 2;
continue;
}
/* Skip over leading whitespace */
while (isspace(*start)) {
unsigned int newline = 0;
@ -187,7 +210,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
newline = 1;
}
if (newline) {
lex = createLexeme("\n", fname, line);
Lexeme *lex = createLexeme("\n", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;
@ -213,7 +236,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
/* Make sure next line is not empty */
while (*test && isspace(*test)) {
if (*test == '\r' || *test == '\n') {
fprintf(stderr, "%s:%u: a line with continuation may not be followed by an empty line\n", fname, line);
fprintf(stderr, "%s:%d: a line with continuation may not be followed by an empty line\n", fname, line);
deleteLexemeList(list);
return NULL;
}
@ -240,7 +263,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
start++;
if (start == buffer || *start == ',' || *start == '\r' || *start == '\n')
continue;
fprintf(stderr, "%s:%u: multiple line comment may not appear on the same line as code\n", fname, line);
fprintf(stderr, "%s:%d: multiple line comment may not appear on the same line as code\n", fname, line);
deleteLexemeList(list);
return NULL;
}
@ -269,9 +292,10 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
if (start[len] && !isspace(start[len])
&& *(start + len) != ','
&& *(start + len) != '!'
&& strncmp(start + len, "'Z", 2)
&& strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3)) {
fprintf(stderr, "%s:%u: expected token delimiter after string literal\n", fname, line);
fprintf(stderr, "%s:%d: expected token delimiter after string literal\n", fname, line);
deleteLexemeList(list);
return NULL;
}
@ -281,6 +305,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
while (start[len] && !isspace(start[len])
&& *(start + len) != ','
&& *(start + len) != '!'
&& strncmp(start + len, "'Z", 2)
&& strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3))
len++;
@ -293,7 +318,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
}
strncpy(temp, start, len);
temp[len] = '\0';
lex = createLexeme(temp, fname, line);
Lexeme *lex = createLexeme(temp, fname, line);
if (!lex) {
free(temp);
deleteLexemeList(list);
@ -309,7 +334,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
start += len;
}
/* Create an end-of-file lexeme */
lex = createLexeme("$", fname, line);
Lexeme *lex = createLexeme("$", fname, line);
if (!lex) {
deleteLexemeList(list);
return NULL;

70
lexer.h
View File

@ -1,14 +1,16 @@
/** Structures and functions for separating a character buffer into lexemes. The
* lexer reads through a buffer of characters (themselves typically read from
* standard input), strips whitespace, and breaks them up into logical atoms of
* character strings which, in turn, may be passed on to later processes (such
* as a tokenizer).
*
* \file lexer.h
*
* \author Justin J. Meza
*
* \date 2010 */
/**
* Structures and functions for separating a character buffer into lexemes --
* groups of characters. The lexer reads through a buffer of characters
* (themselves typically read from standard input), strips whitespace, and
* breaks them up into logical atoms of character strings which, in turn, may be
* passed on to later processes (such as a tokenizer).
*
* \file lexer.h
*
* \author Justin J. Meza
*
* \date 2010-2011
*/
#ifndef __LEXER_H__
#define __LEXER_H__
@ -20,36 +22,44 @@
#undef DEBUG
/** Stores a lexeme. A lexeme is the smallest unit of contiguous characters,
* namely, it has been stripped of surrounding whitespace.
*
* \note This structure does not have any list structure to hold groups of it.
* Instead, pointers to arrays of these structures are employed to allow
* for easier tokenizing.
*
* \see createLexeme(char *, unsigned int) */
/**
* Stores a lexeme. A lexeme is a group of contiguous characters, stripped of
* surrounding whitespace or other lexemes.
*/
typedef struct {
char *image; /**< An array of characters that describe the lexeme. */
const char *fname; /**< A pointer to the name of the file containing the lexeme. */
unsigned int line; /**< The line number from the source file that the lexeme occurred on. */
char *image; /**< The string that identifies the lexeme. */
const char *fname; /**< The name of the file containing the lexeme. */
unsigned int line; /**< The line number the lexeme occurred on. */
} Lexeme;
/** Stores a list of lexemes. This structure allows sets of lexemes to be
* grouped together.
*
* \see createLexemeList(void)
* \see addLexeme(LexemeList *, Lexeme *)
* \see deleteLexemeList(LexemeList *) */
/**
* Stores a list of lexemes.
*/
typedef struct {
unsigned int num; /**< The number of Lexeme structures stored. */
Lexeme **lexemes; /**< A pointer to the array of Lexeme structures. */
unsigned int num; /**< The number of lexemes stored. */
Lexeme **lexemes; /**< The array of stored lexemes. */
} LexemeList;
/**
* \name Lexeme modifiers
*
* Functions for performing helper tasks.
*/
/**@{*/
Lexeme *createLexeme(char *, const char *, unsigned int);
void deleteLexeme(Lexeme *);
LexemeList *createLexemeList(void);
Lexeme *addLexeme(LexemeList *, Lexeme*);
void deleteLexemeList(LexemeList *);
/**@}*/
/**
* \name Buffer lexer
*
* Generates lexemes from a character buffer.
*/
/**@{*/
LexemeList *scanBuffer(const char *, unsigned int, const char *);
/**@}*/
#endif /* __LEXER_H__ */

2
main.c
View File

@ -3,7 +3,7 @@
* \section license License
*
* lci - a LOLCODE interpreter written in C.
* Copyright (C) 2010 Justin J. Meza
* Copyright (C) 2010-2011 Justin J. Meza
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

2244
parser.c

File diff suppressed because it is too large Load Diff

975
parser.h

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,6 @@ done
# Remove options, leave arguments
shift $((OPTIND - 1))
find $2 -name *.lol | sort | xargs -n 1 ./testFile.sh$OPTS $1 && echo "Passed all tests!" && exit 0
find $2 -name *.lol | sort -t'/' -n -k1 -k2 -k3 -k4 -k5 -k6 -k7 -k8 -k9 -k10 | xargs -n 1 ./testFile.sh$OPTS $1 && echo "Passed all tests!" && exit 0
exit 1

View File

@ -61,7 +61,7 @@ then
test ! $QUIET && printf "Found output file ($OUTFILE)!\n"
fi
# Run the test
TMPFILE=$(mktemp) && test ! $QUIET && printf "Using temporary output file ($TMPFILE)...\n"
TMPFILE=$(mktemp /tmp/temp.XXXX) && test ! $QUIET && printf "Using temporary output file ($TMPFILE)...\n"
eval "$MEMCHK $PROGRAM $TESTFILE $IN > $TMPFILE"
RESULT=$?
# Check that program exited normally

View File

@ -12,6 +12,7 @@ static const char *keywords[] = {
"NUMBAR", /* TT_NUMBAR */
"TROOF", /* TT_TROOF */
"YARN", /* TT_YARN */
"BUKKIT", /* TT_BUKKIT */
"", /* TT_EOF */
"", /* TT_NEWLINE */
"HAI", /* TT_HAI */
@ -67,23 +68,27 @@ static const char *keywords[] = {
"IF U SAY SO", /* TT_IFUSAYSO */
"FOUND YR", /* TT_FOUNDYR */
"SRS", /* TT_SRS */
"'Z", /* TT_APOSTROPHEZ */
"BUKKIT", /* TT_BUKKIT */
"" /* TT_ENDOFTOKENS */
};
/** Checks if a string of characters follows the format for an integer.
* Specifically, it checks if the string of characters matches the regular
* expression: [-]?[1-9][0-9]* | 0
*
* \retval 0 The string of characters is not an integer.
* \retval 1 The string of characters is an integer.
*
* \see isFloat(const char *)
* \see isString(const char *)
* \see isIdentifier(const char *) */
int isInteger(const char *image) /**< [in] The string of characters to compare. */
/**
* Checks if a string follows the format for an integer. Specifically, it
* checks if the string matches the regular expression: (-?[1-9][0-9]*|0).
*
* \param [in] image The string to check.
*
* \retval 0 \a image does not match the pattern for an integer.
*
* \retval 1 \a image matches the pattern for an integer.
*/
int isInteger(const char *image)
{
const char *cur = image;
if (*cur == '-' || (isdigit(*cur) && *cur != '0') || (*cur == '0' && *(cur + 1) == '\0')) {
if (*cur == '-'
|| (isdigit(*cur) && *cur != '0')
|| (*cur == '0' && *(cur + 1) == '\0')) {
cur++;
while (isdigit(*cur)) cur++;
if (*cur == '\0') return 1;
@ -91,17 +96,17 @@ int isInteger(const char *image) /**< [in] The string of characters to compare.
return 0;
}
/** Checks if a string of characters follows the format for a floating
* point decimal. Specifically, it checks if the string of characters matches
* the regular expression: [-]?[0-9].[0-9]*
*
* \retval 0 The string of characters is not a floating point decimal.
* \retval 1 The string of characters is a floating point decimal.
*
* \see isInteger(const char *)
* \see isString(const char *)
* \see isIdentifier(const char *) */
int isFloat(const char *image) /**< [in] The string of characters to compare. */
/**
* Checks if a string follows the format for a decimal. Specifically, it checks
* if the string matches the regular expression: (-?[0-9].[0-9]*).
*
* \param [in] image The string to check.
*
* \retval 0 \a image does not match the pattern for a decimal.
*
* \retval 1 \a image matches the pattern for a decimal.
*/
int isFloat(const char *image)
{
const char *cur = image;
if (*cur == '-' || isdigit(*cur)) {
@ -116,33 +121,33 @@ int isFloat(const char *image) /**< [in] The string of characters to compare. */
return 0;
}
/** Checks if a string of characters follows the format for a string.
* Specifically, it checks if the string of characters begins and ends with a
* quote character.
*
* \retval 0 The string of characters is not a string.
* \retval 1 The string of characters is a string.
*
* \see isInteger(const char *)
* \see isFloat(const char *)
* \see isIdentifier(const char *) */
int isString(const char *image) /**< [in] The string of characters to compare. */
/**
* Checks if a string follows the format for a string literal. Specifically, it
* checks if the string matches the regular expression: (".*").
*
* \param [in] image The string to check.
*
* \retval 0 \a image does not match the pattern for a string.
*
* \retval 1 \a image matches the pattern for a string.
*/
int isString(const char *image)
{
size_t len = strlen(image);
return (len >= 2 && image[0] == '"' && image[len - 1] == '"');
}
/** Checks if a string of characters follows the format for an identifier.
* Specifically, it checks if the string of characters matches the regular
* expression: [a-zA-Z][a-zA-Z0-9_]*
*
* \retval 0 The string of characters is not an identifier.
* \retval 1 The string of characters is an identifier.
*
* \see isInteger(const char *)
* \see isFloat(const char *)
* \see isString(const char *) */
int isIdentifier(const char *image) /**< [in] The string of characters to compare. */
/**
* Checks if a string follows the format for an identifier. Specifically, it
* checks if the string matches the regular expression: ([a-zA-Z][a-zA-Z0-9_]*).
*
* \param image [in] The string to check.
*
* \retval 0 \a image does not match the pattern for an identifier.
*
* \retval 1 \a image matches the pattern for an identifier.
*/
int isIdentifier(const char *image)
{
const char *cur = image;
/* First character must be alphabetic */
@ -155,17 +160,25 @@ int isIdentifier(const char *image) /**< [in] The string of characters to compar
return 1;
}
/** Creates a Token structure.
*
* \return A pointer to a Token structure with the desired properties.
*
* \retval NULL malloc was unable to allocate memory.
*
* \see deleteToken(Token *) */
Token *createToken(TokenType type, /**< [in] The type of token to create. */
const char *image, /**< [in] The characters from the source file that represent the token. */
const char *fname, /**< [in] A pointer to the name of the file containing the token. */
unsigned int line) /**< [in] The line number from the source file that the token occurred on. */
/**
* Creates a token.
*
* \param [in] type The type of token to create.
*
* \param [in] image The string that represents the token.
*
* \param [in] fname The name of the file containing the token.
*
* \param [in] line The number of the line containing the token.
*
* \return A pointer to a new token with the desired properties.
*
* \retval NULL Memory allocation failed.
*/
Token *createToken(TokenType type,
const char *image,
const char *fname,
unsigned int line)
{
Token *ret = malloc(sizeof(Token));
if (!ret) {
@ -180,20 +193,22 @@ Token *createToken(TokenType type, /**< [in] The type of token to create. */
return NULL;
}
strcpy(ret->image, image);
/** \note fname is not copied because it would only one copy is stored
* for all Token structures that share it. */
/**
* \note fname is not copied because only one copy is stored for all
* Token structures that share it.
*/
ret->fname = fname;
ret->line = line;
return ret;
}
/** Deletes a Token structure.
*
* \pre \a token points to a Token structure created by createToken(TokenType, const char *, const char *, unsigned int).
*
* \post The memory at \a token and all of its elements will be freed.
*
* \see createToken(TokenType, const char *, const char *, unsigned int) */
/**
* Deletes a token.
*
* \param [in,out] token The token to delete.
*
* \post The memory at \a token and all of its members will be freed.
*/
void deleteToken(Token *token)
{
if (!token) return;
@ -201,22 +216,25 @@ void deleteToken(Token *token)
free(token);
}
/** Adds a Token to an array of Token structures.
*
* \note \a list may be NULL in which case a new list is created.
*
* \pre \a num is the number of elements in \a list.
*
* \post \a token will be added on to the end of \a list and the value at \a num
* will be updated accordingly.
*
* \retval 0 realloc was unable to allocate memory.
* \retval 1 \a node was added to \a list.
*
* \see deleteTokens(Token **) */
int addToken(Token ***list, /**< [in,out] A pointer to a pointer to an array of Token structures to add the new Token onto. */
unsigned int *num, /**< [in,out] A pointer to the number of elements in \a list. */
Token *token) /**< [in] A pointer to the Token structure to add to \a list. */
/**
* Adds a token to a list.
*
* \param [in,out] list The list of tokens to add \a token to.
*
* \param [in,out] num The number of tokens in \a list.
*
* \param [in] token The token to add to \a list.
*
* \post \a token will be added to the end of \a list and the size of \a list
* will be updated.
*
* \retval 0 Memory allocation failed.
*
* \retval 1 \a token was added to \a list.
*/
int addToken(Token ***list,
unsigned int *num,
Token *token)
{
unsigned int newsize = *num + 1;
void *mem = realloc(*list, sizeof(Token *) * newsize);
@ -233,14 +251,14 @@ int addToken(Token ***list, /**< [in,out] A pointer to a pointer to an array
return 1;
}
/** Deletes an array of Token structures.
*
* \pre \a list was created by and contains items added by addToken(Token ***, unsigned int *, Token *).
*
* \post The memory at \a list and all of its elements will be freed.
*
* \see addToken(Token ***, unsigned int *, Token *) */
void deleteTokens(Token **list) /**< [in,out] A pointer to an array of Token structures to be deleted. */
/**
* Deletes a list of tokens.
*
* \param list [in,out] The list of tokens to delete.
*
* \post The memory at \a list and all of its members will be freed.
*/
void deleteTokens(Token **list)
{
Token **tok = list;
while (*tok) {
@ -250,20 +268,28 @@ void deleteTokens(Token **list) /**< [in,out] A pointer to an array of Token str
free(list);
}
/** Tries to match a sequence of lexemes. Scans through \a lexemes starting at
* \a start and tries to match space-delimited lexemes from \a match.
*
* \pre \a lexemes was created by scanBuffer(const char *, unsigned int, const char *).
*
* \return The number of lexemes matched. */
unsigned int acceptLexemes(LexemeList *lexemes, /**< [in] A pointer to a LexemeList structure to match lexemes from. */
unsigned int start, /**< [in] The position within \a lexemes to start matching at. */
const char *match) /**< [in] A pointer to a character array describing the sequence of lexemes to match. */
/**
* Matches lexemes against a string. Traverses \a lexemes starting at \a start
* and compares lexeme images to space-delimited substrings from \a match.
*
* \param lexemes [in] The list of lexemes to match from.
*
* \param start [in] The index within \a lexemes to start matching at.
*
* \param match [in] A string of space-delimited substrings to match.
*
* \return The number of lexemes matched.
*/
unsigned int acceptLexemes(LexemeList *lexemes,
unsigned int start,
const char *match)
{
unsigned int offset = 0;
unsigned int n;
unsigned int i;
for (n = 0, i = 0; match[n] || lexemes->lexemes[start + offset]->image[i]; n++) {
for (n = 0, i = 0;
match[n] || lexemes->lexemes[start + offset]->image[i];
n++) {
if (match[n] == ' ') {
offset++;
i = 0;
@ -276,49 +302,59 @@ unsigned int acceptLexemes(LexemeList *lexemes, /**< [in] A pointer to a LexemeL
return offset + 1;
}
/** Checks if a sequence of lexemes is a keyword. \a lexemes is searched
* starting at \a start for keywords. If one is found, the appropriate Token
* structure is created and returned and the value of \a start is incremented
* by the number of lexemes matched minus one.
*
* \pre \a lexemes was created by scanBuffer(const char *, unsigned int, const char *).
*
* \post If a keyword is not found, \a start will be unmodified. Otherwise,
* \a start will be incremented by the number of lexemes matched minus
* one.
*
* \return A pointer to a newly created keyword Token structure.
*
* \retval NULL No keywords were matched or there was an error allocating
* memory. */
Token *isKeyword(LexemeList *lexemes, /**< [in] A pointer to a LexemeList structure to search for keywords in. */
unsigned int *start) /**< [in,out] A pointer to the position within \a lexemes to start checking at. */
/**
* Checks if the next lexemes in a list comprise a keyword and, if so, generates
* a new token representing that keyword. Specifically, \a lexemes is searched,
* starting at \a start for keywords. If one is found, an appropriate token is
* created and returned and \a start is incremented by the number of lexemes
* matched minus one.
*
* \param lexemes [in] A list of lexemes to search for keywords in.
*
* \param start [in,out] The position within \a lexemes to begin searching for
* keywords.
*
* \post If a keyword is not found, \a start will not be modified. Otherwise,
* \a start will be incremented by the number of lexemes matched minus one.
*
* \return A pointer to the token containing the matched keyword.
*
* \retval NULL No keywords were found or there was an error allocating memory.
*/
Token *isKeyword(LexemeList *lexemes,
unsigned int *start)
{
Token *token = NULL;
TokenType type;
const char *fname = lexemes->lexemes[*start]->fname;
unsigned int line = lexemes->lexemes[*start]->line;
/* For each keyword, */
for (type = 0; type != TT_ENDOFTOKENS; type++) {
unsigned int num = acceptLexemes(lexemes, *start, keywords[type]);
/* Check if the start of lexemes match */
unsigned int num = acceptLexemes(lexemes,
*start, keywords[type]);
if (!num) continue;
/* If so, create a new token for the keyword */
token = createToken(type, keywords[type], fname, line);
/* And advance the start */
*start += (num - 1);
break;
}
return token;
}
/** Converts a list of lexemes into tokens. Additionally parses the literal
* values of integers, floating point decimals, and strings.
*
* \pre \a list was created by scanBuffer(const char *, unsigned int, const char *).
*
* \return A pointer to an array of Token structures representing the tokenized
* form of the input lexeme stream.
*
* \retval NULL An unrecognized token was encountered or memory allocation
* failed. */
Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList structure to tokenize. */
/**
* Converts a list of lexemes into tokens. Also parses integers, floats, and
* strings into tokens with semantic meaning.
*
* \param list [in] A list of lexemes to tokenize.
*
* \return A list of tokens generated from \a list.
*
* \retval NULL An unrecognized token was encounteres or memory allocation
* failed.
*/
Token **tokenizeLexemes(LexemeList *list)
{
void *mem = NULL;
Token **ret = NULL;
@ -368,8 +404,8 @@ Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList st
continue;
}
/* Newline */
/* Note that the spec is unclear as to whether a command *must* follow
* a comma. For now, we let commas end a line. */
/* Note that the spec is unclear as to whether a command *must*
* follow a comma. For now, we let commas end a line. */
else if (!strcmp(image, "\n")) {
/* Note that we ignore any initial newlines */
if (retsize < 1) {
@ -392,8 +428,8 @@ Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList st
else if ((token = isKeyword(list, &n))) {
}
/* Identifier */
/* This must be placed after keyword parsing because most
* keywords look like identifiers. */
/* This must be placed after keyword parsing or else most
* keywords would be tokenized as identifiers. */
else if (isIdentifier(image)) {
token = createToken(TT_IDENTIFIER, image, fname, line);
}

View File

@ -1,14 +1,16 @@
/** Structures and functions for grouping lexemes into tokens. The tokenizer
* reads through an array of lexemes (generated by the lexer) and groups them
* into tokens based on their structure. In addition, some lexemes with
* semantic meaning (such as integers, floats, strings, and booleans) will have
* their values extracted and stored.
*
* \file tokenizer.h
*
* \author Justin J. Meza
*
* \date 2010 */
/**
* Structures and functions for grouping lexemes into tokens. The tokenizer
* reads through an array of lexemes (generated by the lexer) and groups them
* into tokens based on their structure. In addition, some lexemes with
* semantic meaning (such as integers, floats, strings, and booleans) will have
* their values extracted and stored.
*
* \file tokenizer.h
*
* \author Justin J. Meza
*
* \date 2010-2011
*/
#ifndef __TOKENIZER_H__
#define __TOKENIZER_H__
@ -21,107 +23,138 @@
#undef DEBUG
/** Denotes the type of token present. All of the token type names are
* self-explainatory and correspond to either the semantic type of token data
* (in the case of TT_INTEGER, TT_FLOAT, TT_STRING, or TT_IDENTIFIER) or the
* lexemes which make up the particular token.
*
* \note Remember to update the keywords array with the token image. */
/**
* Represents a token type. All of the token type names correspond to either
* the semantic type of token data or the lexemes which make up the particular
* token.
*
* \note Remember to update the keywords array (in the tokens C file) with the
* token image.
*/
typedef enum {
TT_INTEGER,
TT_FLOAT,
TT_STRING,
TT_IDENTIFIER,
TT_BOOLEAN,
TT_IT,
TT_NOOB,
TT_NUMBR,
TT_NUMBAR,
TT_TROOF,
TT_YARN,
TT_EOF,
TT_NEWLINE,
TT_HAI,
TT_KTHXBYE,
TT_HASA,
TT_ITZA,
TT_ITZ,
TT_RNOOB,
TT_R,
TT_ANYR,
TT_AN,
TT_SUMOF,
TT_DIFFOF,
TT_PRODUKTOF,
TT_QUOSHUNTOF,
TT_MODOF,
TT_BIGGROF,
TT_SMALLROF,
TT_BOTHOF,
TT_EITHEROF,
TT_WONOF,
TT_NOT,
TT_MKAY,
TT_ALLOF,
TT_ANYOF,
TT_BOTHSAEM,
TT_DIFFRINT,
TT_MAEK,
TT_A,
TT_ISNOWA,
TT_VISIBLE,
TT_SMOOSH,
TT_BANG,
TT_GIMMEH,
TT_ORLY,
TT_YARLY,
TT_MEBBE,
TT_NOWAI,
TT_OIC,
TT_WTF,
TT_OMG,
TT_OMGWTF,
TT_GTFO,
TT_IMINYR,
TT_UPPIN,
TT_NERFIN,
TT_YR,
TT_TIL,
TT_WILE,
TT_IMOUTTAYR,
TT_HOWIZ,
TT_IZ,
TT_IFUSAYSO,
TT_FOUNDYR,
TT_SRS,
TT_ENDOFTOKENS
TT_INTEGER, /**< Integer literal. */
TT_FLOAT, /**< Decimal literal. */
TT_STRING, /**< String literal. */
TT_IDENTIFIER, /**< Identifier literal. */
TT_BOOLEAN, /**< Boolean literal. */
TT_IT, /**< \ref impvar "Implicit variable". */
TT_NOOB, /**< Nil keyword. */
TT_NUMBR, /**< Integer keyword. */
TT_NUMBAR, /**< Decimal keyword. */
TT_TROOF, /**< Boolean keyword. */
TT_YARN, /**< String keyword. */
TT_BUKKIT, /**< Array. */
TT_EOF, /**< End of file. */
TT_NEWLINE, /**< Newline. */
TT_HAI, /**< Beginning of main block. */
TT_KTHXBYE, /**< End of main block. */
TT_HASA, /**< Variable declaration. */
TT_ITZA, /**< Variable type initialization. */
TT_ITZ, /**< Variable value initialization. */
TT_RNOOB, /**< Deallocation. */
TT_R, /**< Assignment. */
TT_ANYR, /**< User-defined function argument separator. */
TT_AN, /**< Built-in function argument separator. */
TT_SUMOF, /**< Addition. */
TT_DIFFOF, /**< Subtraction. */
TT_PRODUKTOF, /**< Multiplication. */
TT_QUOSHUNTOF, /**< Division. */
TT_MODOF, /**< Modulo. */
TT_BIGGROF, /**< Greater than. */
TT_SMALLROF, /**< Less than. */
TT_BOTHOF, /**< Logical AND. */
TT_EITHEROF, /**< Logical OR. */
TT_WONOF, /**< Logical XOR. */
TT_NOT, /**< Logical NOT. */
TT_MKAY, /**< Infinite arity argument delimiter. */
TT_ALLOF, /**< Infinite arity logical AND. */
TT_ANYOF, /**< Infinite arity logical OR. */
TT_BOTHSAEM, /**< Equality. */
TT_DIFFRINT, /**< Inequality. */
TT_MAEK, /**< Cast. */
TT_A, /**< Cast target separator. */
TT_ISNOWA, /**< In-place cast. */
TT_VISIBLE, /**< Print. */
TT_SMOOSH, /**< String concatenation. */
TT_BANG, /**< Exclamation point (!) */
TT_GIMMEH, /**< Input. */
TT_ORLY, /**< Conditional. */
TT_YARLY, /**< True branch. */
TT_MEBBE, /**< Else branch. */
TT_NOWAI, /**< False branch. */
TT_OIC, /**< Conditional and switch delimiter. */
TT_WTF, /**< Switch. */
TT_OMG, /**< Case. */
TT_OMGWTF, /**< Default case. */
TT_GTFO, /**< Break or return without value. */
TT_IMINYR, /**< Loop beginning. */
TT_UPPIN, /**< Auto increment loop variable. */
TT_NERFIN, /**< Auto decrement loop variable. */
TT_YR, /**< Function name delimiter. */
TT_TIL, /**< Do until. */
TT_WILE, /**< Do while. */
TT_IMOUTTAYR, /**< Loop ending. */
TT_HOWIZ, /**< Function definition beginning. */
TT_IZ, /**< Function scope delimiter. */
TT_IFUSAYSO, /**< Function definition end. */
TT_FOUNDYR, /**< Return with value. */
TT_SRS, /**< Indirect variable access. */
TT_APOSTROPHEZ, /**< Array slot access ('Z). */
TT_ENDOFTOKENS /**< The end of this enum -- don't move it! */
} TokenType;
/** Stores the data associated with a Token structure. */
/**
* Stores token data with semantic meaning.
*/
typedef union {
int i; /**< Integer data. */
float f; /**< Floating point data. */
float f; /**< Decimal data. */
} TokenData;
/** Stores a token and any value parsed by the tokenizer. */
/**
* Stores a token type and any parsed values.
*/
typedef struct {
TokenType type; /**< The type of token. */
TokenData data; /**< The stored data of type \a type. */
char *image; /**< The array of characters from the lexer which correspond to the token. */
const char *fname; /**< A pointer to the name of the file containing the token. */
unsigned int line; /**< The line number from the source file that the token occurred on. */
char *image; /**< The characters that comprise the token. */
const char *fname; /**< The name of the file containing the token. */
unsigned int line; /**< The line number the token was on. */
} Token;
/**
* \name Utilities
*
* Functions for performing helper tasks.
*/
/**@{*/
int isInteger(const char *);
int isFloat(const char *);
int isString(const char *);
int isIdentifier(const char *);
Token *isKeyword(LexemeList *, unsigned int *);
/**@}*/
/**
* \name Token modifiers
*
* Functions for creating and deleting tokens.
*/
/**@{*/
Token *createToken(TokenType, const char *, const char *, unsigned int);
void deleteToken(Token *);
int addToken(Token ***, unsigned int *, Token*);
void deleteTokens(Token **);
unsigned int acceptLexemes(LexemeList *, unsigned int, const char *);
Token *isKeyword(LexemeList *, unsigned int *);
/**@}*/
/**
* \name Lexeme tokenizer
*
* Generates tokens from lexemes.
*/
/**@{*/
Token **tokenizeLexemes(LexemeList *);
/**@}*/
#endif /* __TOKENIZER_H__ */

View File

@ -36860,15 +36860,22 @@ static const long codepoints[] = {
#define NUM_UNICODE 18426
/** Performs a binary search on an array of strings.
*
* \return The index of the matching entry, if found.
*
* \retval -1 The entry does not exist in the array. */
int binarySearch(const char **strings, /**< [in] A pointer to an array of character strings to search through. */
int start, /**< [in] The start of the range to search through. */
int end, /**< [in] The end of the range to search through. */
const char *find) /**< [in] The entry to search for. */
/**
* Performs a binary search on an array of strings.
*
* \param [in] strings The array of string to search.
* \param [in] start The index to start searching at.
* \param [in] end The index to end searching at.
* \param [in] find The string to search for.
*
* \return The index of the matching string, if found.
*
* \retval -1 The string was not found in the array.
*/
int binarySearch(const char **strings,
int start,
int end,
const char *find)
{
int midpoint;
int cmp;
@ -36884,12 +36891,16 @@ int binarySearch(const char **strings, /**< [in] A pointer to an array of charac
return -1;
}
/** Converts a Unicode normative name to a Unicode code point.
*
* \return The Unicode code point corresponding to the given Unicode name.
*
* \retval -1 An invalid Unicode normative name was supplied. */
long convertNormativeNameToCodePoint(const char *name) /**< [in] A pointer to a string of characters representing the Unicode normative name desired. */
/**
* Converts a Unicode normative name to a Unicode code point.
*
* \param [in] name The Unicode normative name to convert.
*
* \return The Unicode code point corresponding to \a name.
*
* \retval -1 An invalid Unicode normative name was supplied.
*/
long convertNormativeNameToCodePoint(const char *name)
{
int index = binarySearch(names, 0, NUM_UNICODE - 1, name);
if (index < 0) {
@ -36900,14 +36911,18 @@ long convertNormativeNameToCodePoint(const char *name) /**< [in] A pointer to a
return codepoints[index];
}
/** Converts the bits in a long integer representing a Unicode code point to a
* series of one or more bytes representing a UTF-8 character.
*
* \return The number of characters in the converted multi-byte character.
*
* \retval 0 An invalid Unicode code point was supplied. */
size_t convertCodePointToUTF8(unsigned long codepoint, /**< [in] The Unicode code point to convert to UTF-8. */
char *out) /**< [out] A pointer to the location to store the resulting UTF-8 bytes. */
/**
* Converts a Unicode code point to a UTF-8 character.
*
* \param [in] codepoint The Unicode code point to convert to UTF-8.
* \param [out] out A pointer to the location to store the UTF-8 character.
*
* \return The length of the converted multi-byte UTF-8 character.
*
* \retval 0 An invalid Unicode code point was supplied.
*/
size_t convertCodePointToUTF8(unsigned long codepoint,
char *out)
{
/* Out of range */
if (codepoint > 0x10FFFF) {

View File

@ -1,11 +1,13 @@
/** Data and functions for converting from Unicode normative names to
* code points.
*
* \file unicode.h
*
* \author Justin J. Meza
*
* \date 2010 */
/**
* Data and functions for converting from Unicode normative names to Unicode
* code points.
*
* \file unicode.h
*
* \author Justin J. Meza
*
* \date 2010-2011
*/
#ifndef __UNICODE_H__
#define __UNICODE_H__