Added basic array functionality and cleaned up documentation.
This commit is contained in:
parent
977e3dfde5
commit
a6ef5811e8
11
Makefile
11
Makefile
|
@ -13,21 +13,24 @@ testdir = ./test
|
|||
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJS) $(LIBS)
|
||||
$(TARGET): $(OBJS)
|
||||
$(CC) $(CPPFLAGS) -o $(TARGET) $(OBJS) $(LIBS)
|
||||
|
||||
pedantic: $(OBJS) $(LIBS)
|
||||
$(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -Wall -ansi -pedantic -g -o $(TARGET) $(SRCS) $(HDRS) $(LIBS)
|
||||
$(CC) -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wundef -Wall -ansi -pedantic -o $(TARGET) $(SRCS) $(HDRS) $(LIBS)
|
||||
|
||||
lint: all
|
||||
$(LINT) $(SRCS)
|
||||
|
||||
debug: $(OBJS) $(LIBS)
|
||||
$(CC) -g -o $(TARGET) $(SRCS) $(LIBS)
|
||||
|
||||
check: all
|
||||
@cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.3-Tests/
|
||||
@cd $(testdir) && ./testDir.sh -q ../$(TARGET) 1.3-Tests
|
||||
|
||||
check-mem: all
|
||||
@echo "This will take a long time! Be patient!"
|
||||
@cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.3-Tests/
|
||||
@cd $(testdir) && ./testDir.sh -q -m ../$(TARGET) 1.3-Tests
|
||||
|
||||
install: all
|
||||
$(INSTALL) $(TARGET) $(bindir)/$(TARGET)
|
||||
|
|
10
README
10
README
|
@ -2,7 +2,7 @@
|
|||
|
||||
LICENSE
|
||||
|
||||
Copyright (C) 2010 Justin J. Meza
|
||||
Copyright (C) 2010-2011 Justin J. Meza
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,15 +22,15 @@ ABOUT
|
|||
lci is a LOLCODE interpreter written in C and is designed to be correct,
|
||||
portable, fast, and precisely documented.
|
||||
|
||||
* correct: Every effort has been made to test lci's conformance to the
|
||||
- correct: Every effort has been made to test lci's conformance to the
|
||||
LOLCODE language specification. Unit tests come packaged with the lci
|
||||
source code.
|
||||
* portable: lci follows the widely ported ANSI C specification allowing it
|
||||
- portable: lci follows the widely ported ANSI C specification allowing it
|
||||
to compile on a broad range of systems.
|
||||
* fast: Much effort has gone into producing simple and efficient code
|
||||
- fast: Much effort has gone into producing simple and efficient code
|
||||
whenever possible to the extent that the above points are not
|
||||
compromized.
|
||||
* precisely documented: lci uses Doxygen to generate literate code
|
||||
- precisely documented: lci uses Doxygen to generate literate code
|
||||
documentation, browsable here.
|
||||
|
||||
This project's homepage is at http://icanhaslolcode.org. For help, visit
|
||||
|
|
4026
interpreter.c
4026
interpreter.c
File diff suppressed because it is too large
Load Diff
304
interpreter.h
304
interpreter.h
|
@ -1,13 +1,15 @@
|
|||
/** Structures and functions for interpreting a parse tree. The interpreter
|
||||
* traverses a parse tree in a depth-first manner, interpreting each node it
|
||||
* reaches along the way. This is the last stage of the processing of a source
|
||||
* code file.
|
||||
*
|
||||
* \file interpreter.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010 */
|
||||
/**
|
||||
* Structures and functions for interpreting a parse tree. The interpreter
|
||||
* traverses a parse tree in a depth-first manner, interpreting each node it
|
||||
* reaches along the way. This is the last stage of the processing of a source
|
||||
* code file.
|
||||
*
|
||||
* \file interpreter.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010-2011
|
||||
*/
|
||||
|
||||
#ifndef __INTERPRETER_H__
|
||||
#define __INTERPRETER_H__
|
||||
|
@ -19,127 +21,221 @@
|
|||
#include "parser.h"
|
||||
#include "unicode.h"
|
||||
|
||||
/** Gets the integer data associated with a ValueObject structure. */
|
||||
/**
|
||||
* Retrieves a value's integer data.
|
||||
*/
|
||||
#define getInteger(value) (value->data.i)
|
||||
/** Gets the floating point data associated with a ValueObject structure. */
|
||||
|
||||
/**
|
||||
* Retrieves a value's decimal data.
|
||||
*/
|
||||
#define getFloat(value) (value->data.f)
|
||||
/** Gets the string data associated with a ValueObject structure. */
|
||||
|
||||
/**
|
||||
* Retrieves a value's string data.
|
||||
*/
|
||||
#define getString(value) (value->data.s)
|
||||
/** Gets the function definition associated with a ValueObject structure. */
|
||||
|
||||
/**
|
||||
* Retrieves a value's function data.
|
||||
*/
|
||||
#define getFunction(value) (value->data.fn)
|
||||
|
||||
/** Denotes the type of a value. */
|
||||
/**
|
||||
* Retrieves a value's array data.
|
||||
*/
|
||||
#define getArray(value) (value->data.a)
|
||||
|
||||
/**
|
||||
* Represents a value type.
|
||||
*/
|
||||
typedef enum {
|
||||
VT_INTEGER, /**< An integer value. */
|
||||
VT_FLOAT, /**< A floating point decimal value. */
|
||||
VT_BOOLEAN, /**< A true/false value. */
|
||||
VT_STRING, /**< A character string value. */
|
||||
VT_FLOAT, /**< A decimal value. */
|
||||
VT_BOOLEAN, /**< A boolean value. */
|
||||
VT_STRING, /**< A string value. */
|
||||
VT_NIL, /**< Represents no value. */
|
||||
VT_FUNC /**< A function. */
|
||||
VT_FUNC, /**< A function. */
|
||||
VT_ARRAY /**< An array. */
|
||||
} ValueType;
|
||||
|
||||
/** Stores the data associated with a ValueObject structure. */
|
||||
/**
|
||||
* Stores value data.
|
||||
*/
|
||||
typedef union {
|
||||
int i; /**< Integer data. */
|
||||
float f; /**< Floating point data. */
|
||||
char *s; /**< Character string data. */
|
||||
FuncDefStmtNode *fn; /**< Function definition. */
|
||||
int i; /**< Integer data. */
|
||||
float f; /**< Decimal data. */
|
||||
char *s; /**< String data. */
|
||||
FuncDefStmtNode *fn; /**< Function data. */
|
||||
struct scopeobject *a; /**< Array data. */
|
||||
} ValueData;
|
||||
|
||||
/** Increments the semaphore of a ValueObject structure. */
|
||||
/**
|
||||
* Increments a value's semaphore.
|
||||
*/
|
||||
#define V(value) (value->semaphore++)
|
||||
|
||||
/** Decrements the semaphore of a ValueObject structure. */
|
||||
/**
|
||||
* Decrements a value's semaphore.
|
||||
*/
|
||||
#define P(value) (value->semaphore--)
|
||||
|
||||
/** Stores a value.
|
||||
*
|
||||
* \see copyValueObject(ValueObject *)
|
||||
* \see deleteValueObject(ValueObject *) */
|
||||
/**
|
||||
* Stores a value.
|
||||
*/
|
||||
typedef struct {
|
||||
ValueType type; /**< The type of value stored. */
|
||||
ValueData data; /**< The stored data. */
|
||||
ValueData data; /**< The value data. */
|
||||
unsigned short semaphore; /**< A semaphore for value usage. */
|
||||
} ValueObject;
|
||||
|
||||
/** Denotes the type of return encountered. */
|
||||
/**
|
||||
* Represents the return type.
|
||||
*/
|
||||
typedef enum {
|
||||
RT_DEFAULT, /**< A block of code returned after evaluating all of its statements. */
|
||||
RT_BREAK, /**< A block of code within a LoopStmtNode or SwitchStmtNode returned via a break statement. */
|
||||
RT_RETURN /**< A block of code within a FuncDefStmtNode called by a FuncCallExprNode returned (either with or without a value). */
|
||||
RT_DEFAULT, /**< Code block completed successfully. */
|
||||
RT_BREAK, /**< Broke out of a loop or switch statement. */
|
||||
RT_RETURN /**< Returned from a function. */
|
||||
} ReturnType;
|
||||
|
||||
/** Stores a return state. Returns are encountered when
|
||||
* - a block of code evaluates all of its statements,
|
||||
* - a block of code within a LoopStmt or SwitchStmt encountered a break statement, or
|
||||
* - a block of code within a FunctionDefStmt called by a FunctionCallExpr encounters a ReturnStmt. */
|
||||
/**
|
||||
* Stores return state.
|
||||
*/
|
||||
typedef struct {
|
||||
ReturnType type; /**< The type of return encountered. */
|
||||
ValueObject *value; /**< The optional return value. */
|
||||
} ReturnObject;
|
||||
|
||||
/** Stores the variables in a particular scope. Scopes are arranged
|
||||
* heirarchically from global (the ancestor of all other scopes) to local (the
|
||||
* temporary scope of a BlockNode).
|
||||
*
|
||||
* \see createScopeObject(ScopeObject *)
|
||||
* \see deleteScopeObject(ScopeObject *) */
|
||||
/**
|
||||
* Stores a set of variables hierarchically.
|
||||
*/
|
||||
typedef struct scopeobject {
|
||||
struct scopeobject *parent; /**< A pointer to the parent ScopeObject. */
|
||||
ValueObject *impvar; /**< A pointer to the ValueObject representing the implicit variable for this scope. */
|
||||
unsigned int numvals; /**< The number of ValueObject structures in \a values. */
|
||||
char **names; /**< A pointer to the array of character strings naming the values in the scope. */
|
||||
ValueObject **values; /**< A pointer to an array of ValueObject structures in the scope. */
|
||||
struct scopeobject *parent; /**< The parent scope. */
|
||||
ValueObject *impvar; /**< The \ref impvar "implicit variable". */
|
||||
unsigned int numvals; /**< The number of values in the scope. */
|
||||
char **names; /**< The names of the values. */
|
||||
ValueObject **values; /**< The values in the scope. */
|
||||
} ScopeObject;
|
||||
|
||||
char *createString(char *);
|
||||
/**
|
||||
* \name Utilities
|
||||
*
|
||||
* Functions for performing helper tasks.
|
||||
*/
|
||||
/**@{*/
|
||||
void printInterpreterError(const char *, IdentifierNode *, ScopeObject *);
|
||||
char *copyString(char *);
|
||||
unsigned int isDecString(const char *);
|
||||
unsigned int isHexString(const char *);
|
||||
char *resolveIdentifierName(IdentifierNode *, ScopeObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Value object modifiers
|
||||
*
|
||||
* Functions for creating, copying, and deleting value objects.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *createNilValueObject(void);
|
||||
ValueObject *createBooleanValueObject(int);
|
||||
ValueObject *createIntegerValueObject(int);
|
||||
ValueObject *createFloatValueObject(float);
|
||||
ValueObject *createStringValueObject(char *);
|
||||
ValueObject *createFunctionValueObject(FuncDefStmtNode *);
|
||||
ValueObject *createArrayValueObject(ScopeObject *);
|
||||
ValueObject *copyValueObject(ValueObject *);
|
||||
void deleteValueObject(ValueObject *);
|
||||
ReturnObject *createReturnObject(ReturnType, ValueObject *);
|
||||
void deleteReturnObject(ReturnObject *);
|
||||
char *resolveIdentifierName(IdentifierNode *, ScopeObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Scope object modifiers
|
||||
*
|
||||
* Functions for manipulating scope objects and their data.
|
||||
*/
|
||||
/**@{*/
|
||||
ScopeObject *createScopeObject(ScopeObject *);
|
||||
void deleteScopeObject(ScopeObject *);
|
||||
ValueObject *getScopeValue(ScopeObject *, IdentifierNode *);
|
||||
ValueObject *getLocalScopeValue(ScopeObject *, IdentifierNode *);
|
||||
ValueObject *createScopeValue(ScopeObject *, IdentifierNode *);
|
||||
ValueObject *updateScopeValue(ScopeObject *, IdentifierNode *, ValueObject *);
|
||||
void deleteScopeValue(ScopeObject *, IdentifierNode *);
|
||||
unsigned int isNumString(const char *);
|
||||
unsigned int isHexString(const char *);
|
||||
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castStringExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *createScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
|
||||
ValueObject *updateScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *, ValueObject *);
|
||||
ValueObject *getScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
|
||||
ValueObject *getScopeValueArray(ScopeObject *, ScopeObject *, IdentifierNode *);
|
||||
ValueObject *getScopeValueLocal(ScopeObject *, ScopeObject *, IdentifierNode *);
|
||||
ScopeObject *getScopeObject(ScopeObject *, ScopeObject *, IdentifierNode *);
|
||||
void deleteScopeValue(ScopeObject *, ScopeObject *, IdentifierNode *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Return object modifiers
|
||||
*
|
||||
* Functions for creating and deleting return objects.
|
||||
*/
|
||||
/**@{*/
|
||||
ReturnObject *createReturnObject(ReturnType, ValueObject *);
|
||||
void deleteReturnObject(ReturnObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Casts
|
||||
*
|
||||
* Functions for performing casts between different types of values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *castBooleanImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castIntegerImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castFloatImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castStringImplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castBooleanExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castIntegerExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castFloatExplicit(ValueObject *, ScopeObject *);
|
||||
ValueObject *castStringExplicit(ValueObject *, ScopeObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Node interpreters
|
||||
*
|
||||
* Functions for interpreting basic parse tree nodes.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *interpretExprNode(ExprNode *, ScopeObject *);
|
||||
ReturnObject *interpretStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretStmtNodeList(StmtNodeList *, ScopeObject *);
|
||||
ReturnObject *interpretBlockNode(BlockNode *, ScopeObject *);
|
||||
int interpretMainNode(MainNode *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Expression interpreters
|
||||
*
|
||||
* Functions for interpreting expression parse tree nodes.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *interpretImpVarExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretCastExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretFuncCallExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretIdentifierExprNode(ExprNode *, ScopeObject *);
|
||||
ValueObject *interpretConstantExprNode(ExprNode *, ScopeObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Operation interpreters
|
||||
*
|
||||
* Functions for interpreting operation parse tree nodes.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *interpretNotOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretArithOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretBoolOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretEqualityOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretConcatOpExprNode(OpExprNode *, ScopeObject *);
|
||||
ValueObject *interpretOpExprNode(ExprNode *, ScopeObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Statement interpreters
|
||||
*
|
||||
* Functions for interpreting statement parse tree nodes.
|
||||
*/
|
||||
/**@{*/
|
||||
ReturnObject *interpretCastStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretPrintStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretInputStmtNode(StmtNode *, ScopeObject *);
|
||||
|
@ -153,7 +249,14 @@ ReturnObject *interpretLoopStmtNode(StmtNode *, ScopeObject *);
|
|||
ReturnObject *interpretDeallocationStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretFuncDefStmtNode(StmtNode *, ScopeObject *);
|
||||
ReturnObject *interpretExprStmtNode(StmtNode *, ScopeObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Arithmetic operations (integer-integer)
|
||||
*
|
||||
* Functions for performing integer-integer operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opAddIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultIntegerInteger(ValueObject *, ValueObject *);
|
||||
|
@ -161,7 +264,14 @@ ValueObject *opDivIntegerInteger(ValueObject *, ValueObject *);
|
|||
ValueObject *opMaxIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opModIntegerInteger(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Arithmetic operations (integer-float)
|
||||
*
|
||||
* Functions for performing integer-float operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opAddIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultIntegerFloat(ValueObject *, ValueObject *);
|
||||
|
@ -169,7 +279,14 @@ ValueObject *opDivIntegerFloat(ValueObject *, ValueObject *);
|
|||
ValueObject *opMaxIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opModIntegerFloat(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Arithmetic operations (float-integer)
|
||||
*
|
||||
* Functions for performing float-integer operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opAddFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultFloatInteger(ValueObject *, ValueObject *);
|
||||
|
@ -177,7 +294,14 @@ ValueObject *opDivFloatInteger(ValueObject *, ValueObject *);
|
|||
ValueObject *opMaxFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opModFloatInteger(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Arithmetic operations (float-float)
|
||||
*
|
||||
* Functions for performing float-float operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opAddFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opSubFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMultFloatFloat(ValueObject *, ValueObject *);
|
||||
|
@ -185,26 +309,76 @@ ValueObject *opDivFloatFloat(ValueObject *, ValueObject *);
|
|||
ValueObject *opMaxFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opMinFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opModFloatFloat(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Equality operations (boolean-boolean)
|
||||
*
|
||||
* Functions for performing boolean-boolean operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opEqBooleanBoolean(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqBooleanBoolean(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Equality operations (integer-integer)
|
||||
*
|
||||
* Functions for performing integer-integer operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opEqIntegerInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqIntegerInteger(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Equality operations (integer-float)
|
||||
*
|
||||
* Functions for performing integer-float operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opEqIntegerFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqIntegerFloat(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Equality operations (float-integer)
|
||||
*
|
||||
* Functions for performing float-integer operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opEqFloatInteger(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqFloatInteger(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Equality operations (float-float)
|
||||
*
|
||||
* Functions for performing float-float operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opEqFloatFloat(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqFloatFloat(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Equality operations (string-string)
|
||||
*
|
||||
* Functions for performing string-string operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opEqStringString(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqStringString(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Equality operations (nil-nil)
|
||||
*
|
||||
* Functions for performing nil-nil operations on values.
|
||||
*/
|
||||
/**@{*/
|
||||
ValueObject *opEqNilNil(ValueObject *, ValueObject *);
|
||||
ValueObject *opNeqNilNil(ValueObject *, ValueObject *);
|
||||
/**@}*/
|
||||
|
||||
#endif /* __INTERPRETER_H__ */
|
||||
|
|
185
lexer.c
185
lexer.c
|
@ -1,15 +1,19 @@
|
|||
#include "lexer.h"
|
||||
|
||||
/** Creates a Lexeme structure.
|
||||
*
|
||||
* \return A pointer to a Lexeme structure with the desired properties.
|
||||
*
|
||||
* \retval NULL malloc was unable to allocate memory.
|
||||
*
|
||||
* \see deleteLexeme(Lexeme *) */
|
||||
Lexeme *createLexeme(char *image, /**< [in] An array of characters that describe the lexeme. */
|
||||
const char *fname, /**< [in] A pointer to the name of the file containing the lexeme. */
|
||||
unsigned int line) /**< [in] The line number from the source file that the lexeme occurred on. */
|
||||
/**
|
||||
* Creates a lexeme.
|
||||
*
|
||||
* \param [in] image The string that identifies the lexeme.
|
||||
*
|
||||
* \param [in] fname The name of the file containing the lexeme.
|
||||
*
|
||||
* \param [in] line The line number the lexeme occurred on.
|
||||
*
|
||||
* \return A new lexeme with the desired properties.
|
||||
*
|
||||
* \retval NULL Memory allocation failed.
|
||||
*/
|
||||
Lexeme *createLexeme(char *image, const char *fname, unsigned int line)
|
||||
{
|
||||
Lexeme *ret = malloc(sizeof(Lexeme));
|
||||
if (!ret) {
|
||||
|
@ -23,8 +27,11 @@ Lexeme *createLexeme(char *image, /**< [in] An array of characters that de
|
|||
return NULL;
|
||||
}
|
||||
strcpy(ret->image, image);
|
||||
/** \note fname is not copied because it would only one copy is stored
|
||||
* for all Lexeme structures that share it. */
|
||||
/**
|
||||
* \note \a fname is not copied because it only one copy is stored for
|
||||
* all lexemes from the same file. This is simply to avoid large
|
||||
* numbers of lexemes storing duplicate file name strings.
|
||||
*/
|
||||
ret->fname = fname;
|
||||
ret->line = line;
|
||||
#ifdef DEBUG
|
||||
|
@ -33,29 +40,29 @@ Lexeme *createLexeme(char *image, /**< [in] An array of characters that de
|
|||
return ret;
|
||||
}
|
||||
|
||||
/** Deletes a Lexeme structure.
|
||||
*
|
||||
* \pre \a lexeme points to a Lexeme structure created by createLexeme(char *, const char *, unsigned int).
|
||||
*
|
||||
* \post The memory at \a lexeme and all of its elements will be freed.
|
||||
*
|
||||
* \see createLexeme(char *, const char *, unsigned int) */
|
||||
/**
|
||||
* Deletes a lexeme.
|
||||
*
|
||||
* \param [in,out] lexeme The lexeme to delete.
|
||||
*/
|
||||
void deleteLexeme(Lexeme *lexeme)
|
||||
{
|
||||
if (!lexeme) return;
|
||||
free(lexeme->image);
|
||||
/** \note We do not free (*lex)->fname because it is shared between many
|
||||
* Lexeme structures and is free'd by whoever created them. */
|
||||
/**
|
||||
* \note We do not free the file name because it is shared between many
|
||||
* lexemes and is freed by whomever created the file name string.
|
||||
*/
|
||||
free(lexeme);
|
||||
}
|
||||
|
||||
/** Creates a LexemeList structure.
|
||||
*
|
||||
* \return A pointer to a LexemeList structure with the desired properties.
|
||||
*
|
||||
* \retval NULL malloc was unable to allocate memory.
|
||||
*
|
||||
* \see deleteLexemeList(LexemeList *) */
|
||||
/**
|
||||
* Creates a list of lexemes.
|
||||
*
|
||||
* \return An empty lexeme list.
|
||||
*
|
||||
* \retval NULL Memory allocation failed.
|
||||
*/
|
||||
LexemeList *createLexemeList(void)
|
||||
{
|
||||
LexemeList *p = malloc(sizeof(LexemeList));
|
||||
|
@ -68,20 +75,21 @@ LexemeList *createLexemeList(void)
|
|||
return p;
|
||||
}
|
||||
|
||||
/** Adds a Lexeme structure to a LexemeList structure.
|
||||
*
|
||||
* \pre \a list was created by createLexemeList(void).
|
||||
* \pre \a lexeme was created by createLexeme(char *, const char *, unsigned int).
|
||||
*
|
||||
* \post \a lexeme will be added on to the end of \a list and the size of
|
||||
* \a list will be updated accordingly.
|
||||
*
|
||||
* \return A pointer to the added Lexeme structure (will be the same as
|
||||
* \a lexeme).
|
||||
*
|
||||
* \retval NULL realloc was unable to allocate memory. */
|
||||
Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList structure to add \a lex to. */
|
||||
Lexeme *lexeme) /**< [in] A pointer to the Lexeme structure to add to \a list. */
|
||||
/**
|
||||
* Adds a lexeme to a list of lexemes.
|
||||
*
|
||||
* \param [in,out] list The list of lexemes to add \a lexeme to.
|
||||
*
|
||||
* \param [in] lexeme The lexeme to add to \a list.
|
||||
*
|
||||
* \post \a lexeme will be added to the end of \a list and the size of \a list
|
||||
* will be updated.
|
||||
*
|
||||
* \return A pointer to the added lexeme (will be the same as \a lexeme).
|
||||
*
|
||||
* \retval NULL Memory allocation failed.
|
||||
*/
|
||||
Lexeme *addLexeme(LexemeList *list, Lexeme *lexeme)
|
||||
{
|
||||
unsigned int newsize;
|
||||
void *mem = NULL;
|
||||
|
@ -98,16 +106,14 @@ Lexeme *addLexeme(LexemeList *list, /**< [in,out] A pointer to the LexemeList st
|
|||
return lexeme;
|
||||
}
|
||||
|
||||
/** Deletes a LexemeList structure.
|
||||
*
|
||||
* \pre \a list was created by createLexemeList(void) and contains
|
||||
* items added by addLexeme(LexemeList *, Lexeme *).
|
||||
*
|
||||
* \post The memory at \a list and any of its associated members will be
|
||||
* freed.
|
||||
*
|
||||
* \see createLexemeList(void) */
|
||||
void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeList structure to delete. */
|
||||
/**
|
||||
* Deletes a list of lexemes.
|
||||
*
|
||||
* \param [in,out] list The lexeme list to delete.
|
||||
*
|
||||
* \post The memory at \a list and all of its members will be freed.
|
||||
*/
|
||||
void deleteLexemeList(LexemeList *list)
|
||||
{
|
||||
unsigned int n;
|
||||
if (!list) return;
|
||||
|
@ -117,37 +123,39 @@ void deleteLexemeList(LexemeList *list) /**< [in,out] A pointer to the LexemeLis
|
|||
free(list);
|
||||
}
|
||||
|
||||
/** Scans through a character buffer, removing unecessary characters and
|
||||
* generating lexemes. Lexemes are separated by whitespace (but newline
|
||||
* characters are kept as their own lexeme). String literals are handled a
|
||||
* bit differently: starting at the first quotation character, characters are
|
||||
* collected until either an unescaped quotation character is read (that is, a
|
||||
* quotation character not preceeded by a colon which itself is not proceeded
|
||||
* by a colon) or a newline or carriage return character is read, whichever
|
||||
* comes first. This handles the odd case of strings such as "::" which print
|
||||
* out a single colon. Also handled are the effects of commas, ellipses, and
|
||||
* bangs (!).
|
||||
*
|
||||
* \pre \a size is the number of characters starting at the memory location
|
||||
* pointed to by \a buffer.
|
||||
*
|
||||
* \return A pointer to a LexemeList structure. */
|
||||
LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to tokenize. */
|
||||
unsigned int size, /**< [in] The number of characters in \a buffer. */
|
||||
const char *fname) /**< [in] An array of characters representing the name of the file used to read \a buffer. */
|
||||
/**
|
||||
* Scans a buffer, removing unnecessary characters and grouping characters into
|
||||
* lexemes. Lexemes are strings of characters separated by whitespace (although
|
||||
* newline characters are considered separate lexemes). String literals are
|
||||
* handled a bit differently: Starting at the first quotation character,
|
||||
* characters are collected until either a non-escaped quotation character is
|
||||
* read (i.e., a quotation character not preceded by a colon which itself is not
|
||||
* preceded by a colon) or a newline or carriage return character is read,
|
||||
* whichever comes first. This handles the odd (but possible) case of strings
|
||||
* such as "::" which print out a single colon. Also handled are the effects of
|
||||
* commas, ellipses, bangs (!), and array accesses ('Z).
|
||||
*
|
||||
* \param [in] buffer The characters to turn into lexemes.
|
||||
*
|
||||
* \param [in] size The number of characters in \a buffer.
|
||||
*
|
||||
* \param [in] fname The name of the file \a buffer was read from.
|
||||
*
|
||||
* \return A list of lexemes created from the contents of \a buffer.
|
||||
*/
|
||||
LexemeList *scanBuffer(const char *buffer, unsigned int size, const char *fname)
|
||||
{
|
||||
const char *start = buffer;
|
||||
LexemeList *list = NULL;
|
||||
unsigned int line = 1;
|
||||
Lexeme *lex = NULL;
|
||||
list = createLexemeList();
|
||||
if (!list) return NULL;
|
||||
while (start < buffer + size) {
|
||||
char *temp = NULL;
|
||||
size_t len = 1;
|
||||
unsigned int len = 1;
|
||||
/* Comma (,) is a soft newline */
|
||||
if (*start == ',') {
|
||||
lex = createLexeme("\n", fname, line);
|
||||
Lexeme *lex = createLexeme("\n", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
|
@ -162,7 +170,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
}
|
||||
/* Bang (!) is its own lexeme */
|
||||
if (*start == '!') {
|
||||
lex = createLexeme("!", fname, line);
|
||||
Lexeme *lex = createLexeme("!", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
|
@ -175,6 +183,21 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
start++;
|
||||
continue;
|
||||
}
|
||||
/* Apostrophe Z ('Z) is its own lexeme */
|
||||
if (!strncmp(start, "'Z", 2)) {
|
||||
Lexeme *lex = createLexeme("'Z", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
if (!addLexeme(list, lex)) {
|
||||
deleteLexeme(lex);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
start += 2;
|
||||
continue;
|
||||
}
|
||||
/* Skip over leading whitespace */
|
||||
while (isspace(*start)) {
|
||||
unsigned int newline = 0;
|
||||
|
@ -187,7 +210,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
newline = 1;
|
||||
}
|
||||
if (newline) {
|
||||
lex = createLexeme("\n", fname, line);
|
||||
Lexeme *lex = createLexeme("\n", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
|
@ -213,7 +236,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
/* Make sure next line is not empty */
|
||||
while (*test && isspace(*test)) {
|
||||
if (*test == '\r' || *test == '\n') {
|
||||
fprintf(stderr, "%s:%u: a line with continuation may not be followed by an empty line\n", fname, line);
|
||||
fprintf(stderr, "%s:%d: a line with continuation may not be followed by an empty line\n", fname, line);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -240,7 +263,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
start++;
|
||||
if (start == buffer || *start == ',' || *start == '\r' || *start == '\n')
|
||||
continue;
|
||||
fprintf(stderr, "%s:%u: multiple line comment may not appear on the same line as code\n", fname, line);
|
||||
fprintf(stderr, "%s:%d: multiple line comment may not appear on the same line as code\n", fname, line);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -269,9 +292,10 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
if (start[len] && !isspace(start[len])
|
||||
&& *(start + len) != ','
|
||||
&& *(start + len) != '!'
|
||||
&& strncmp(start + len, "'Z", 2)
|
||||
&& strncmp(start + len, "...", 3)
|
||||
&& strncmp(start + len, "\xE2\x80\xA6", 3)) {
|
||||
fprintf(stderr, "%s:%u: expected token delimiter after string literal\n", fname, line);
|
||||
fprintf(stderr, "%s:%d: expected token delimiter after string literal\n", fname, line);
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -281,6 +305,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
while (start[len] && !isspace(start[len])
|
||||
&& *(start + len) != ','
|
||||
&& *(start + len) != '!'
|
||||
&& strncmp(start + len, "'Z", 2)
|
||||
&& strncmp(start + len, "...", 3)
|
||||
&& strncmp(start + len, "\xE2\x80\xA6", 3))
|
||||
len++;
|
||||
|
@ -293,7 +318,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
}
|
||||
strncpy(temp, start, len);
|
||||
temp[len] = '\0';
|
||||
lex = createLexeme(temp, fname, line);
|
||||
Lexeme *lex = createLexeme(temp, fname, line);
|
||||
if (!lex) {
|
||||
free(temp);
|
||||
deleteLexemeList(list);
|
||||
|
@ -309,7 +334,7 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
|
|||
start += len;
|
||||
}
|
||||
/* Create an end-of-file lexeme */
|
||||
lex = createLexeme("$", fname, line);
|
||||
Lexeme *lex = createLexeme("$", fname, line);
|
||||
if (!lex) {
|
||||
deleteLexemeList(list);
|
||||
return NULL;
|
||||
|
|
70
lexer.h
70
lexer.h
|
@ -1,14 +1,16 @@
|
|||
/** Structures and functions for separating a character buffer into lexemes. The
|
||||
* lexer reads through a buffer of characters (themselves typically read from
|
||||
* standard input), strips whitespace, and breaks them up into logical atoms of
|
||||
* character strings which, in turn, may be passed on to later processes (such
|
||||
* as a tokenizer).
|
||||
*
|
||||
* \file lexer.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010 */
|
||||
/**
|
||||
* Structures and functions for separating a character buffer into lexemes --
|
||||
* groups of characters. The lexer reads through a buffer of characters
|
||||
* (themselves typically read from standard input), strips whitespace, and
|
||||
* breaks them up into logical atoms of character strings which, in turn, may be
|
||||
* passed on to later processes (such as a tokenizer).
|
||||
*
|
||||
* \file lexer.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010-2011
|
||||
*/
|
||||
|
||||
#ifndef __LEXER_H__
|
||||
#define __LEXER_H__
|
||||
|
@ -20,36 +22,44 @@
|
|||
|
||||
#undef DEBUG
|
||||
|
||||
/** Stores a lexeme. A lexeme is the smallest unit of contiguous characters,
|
||||
* namely, it has been stripped of surrounding whitespace.
|
||||
*
|
||||
* \note This structure does not have any list structure to hold groups of it.
|
||||
* Instead, pointers to arrays of these structures are employed to allow
|
||||
* for easier tokenizing.
|
||||
*
|
||||
* \see createLexeme(char *, unsigned int) */
|
||||
/**
|
||||
* Stores a lexeme. A lexeme is a group of contiguous characters, stripped of
|
||||
* surrounding whitespace or other lexemes.
|
||||
*/
|
||||
typedef struct {
|
||||
char *image; /**< An array of characters that describe the lexeme. */
|
||||
const char *fname; /**< A pointer to the name of the file containing the lexeme. */
|
||||
unsigned int line; /**< The line number from the source file that the lexeme occurred on. */
|
||||
char *image; /**< The string that identifies the lexeme. */
|
||||
const char *fname; /**< The name of the file containing the lexeme. */
|
||||
unsigned int line; /**< The line number the lexeme occurred on. */
|
||||
} Lexeme;
|
||||
|
||||
/** Stores a list of lexemes. This structure allows sets of lexemes to be
|
||||
* grouped together.
|
||||
*
|
||||
* \see createLexemeList(void)
|
||||
* \see addLexeme(LexemeList *, Lexeme *)
|
||||
* \see deleteLexemeList(LexemeList *) */
|
||||
/**
|
||||
* Stores a list of lexemes.
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned int num; /**< The number of Lexeme structures stored. */
|
||||
Lexeme **lexemes; /**< A pointer to the array of Lexeme structures. */
|
||||
unsigned int num; /**< The number of lexemes stored. */
|
||||
Lexeme **lexemes; /**< The array of stored lexemes. */
|
||||
} LexemeList;
|
||||
|
||||
/**
|
||||
* \name Lexeme modifiers
|
||||
*
|
||||
* Functions for performing helper tasks.
|
||||
*/
|
||||
/**@{*/
|
||||
Lexeme *createLexeme(char *, const char *, unsigned int);
|
||||
void deleteLexeme(Lexeme *);
|
||||
LexemeList *createLexemeList(void);
|
||||
Lexeme *addLexeme(LexemeList *, Lexeme*);
|
||||
void deleteLexemeList(LexemeList *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Buffer lexer
|
||||
*
|
||||
* Generates lexemes from a character buffer.
|
||||
*/
|
||||
/**@{*/
|
||||
LexemeList *scanBuffer(const char *, unsigned int, const char *);
|
||||
/**@}*/
|
||||
|
||||
#endif /* __LEXER_H__ */
|
||||
|
|
2
main.c
2
main.c
|
@ -3,7 +3,7 @@
|
|||
* \section license License
|
||||
*
|
||||
* lci - a LOLCODE interpreter written in C.
|
||||
* Copyright (C) 2010 Justin J. Meza
|
||||
* Copyright (C) 2010-2011 Justin J. Meza
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -20,6 +20,6 @@ done
|
|||
# Remove options, leave arguments
|
||||
shift $((OPTIND - 1))
|
||||
|
||||
find $2 -name *.lol | sort | xargs -n 1 ./testFile.sh$OPTS $1 && echo "Passed all tests!" && exit 0
|
||||
find $2 -name *.lol | sort -t'/' -n -k1 -k2 -k3 -k4 -k5 -k6 -k7 -k8 -k9 -k10 | xargs -n 1 ./testFile.sh$OPTS $1 && echo "Passed all tests!" && exit 0
|
||||
|
||||
exit 1
|
||||
|
|
|
@ -61,7 +61,7 @@ then
|
|||
test ! $QUIET && printf "Found output file ($OUTFILE)!\n"
|
||||
fi
|
||||
# Run the test
|
||||
TMPFILE=$(mktemp) && test ! $QUIET && printf "Using temporary output file ($TMPFILE)...\n"
|
||||
TMPFILE=$(mktemp /tmp/temp.XXXX) && test ! $QUIET && printf "Using temporary output file ($TMPFILE)...\n"
|
||||
eval "$MEMCHK $PROGRAM $TESTFILE $IN > $TMPFILE"
|
||||
RESULT=$?
|
||||
# Check that program exited normally
|
||||
|
|
300
tokenizer.c
300
tokenizer.c
|
@ -12,6 +12,7 @@ static const char *keywords[] = {
|
|||
"NUMBAR", /* TT_NUMBAR */
|
||||
"TROOF", /* TT_TROOF */
|
||||
"YARN", /* TT_YARN */
|
||||
"BUKKIT", /* TT_BUKKIT */
|
||||
"", /* TT_EOF */
|
||||
"", /* TT_NEWLINE */
|
||||
"HAI", /* TT_HAI */
|
||||
|
@ -67,23 +68,27 @@ static const char *keywords[] = {
|
|||
"IF U SAY SO", /* TT_IFUSAYSO */
|
||||
"FOUND YR", /* TT_FOUNDYR */
|
||||
"SRS", /* TT_SRS */
|
||||
"'Z", /* TT_APOSTROPHEZ */
|
||||
"BUKKIT", /* TT_BUKKIT */
|
||||
"" /* TT_ENDOFTOKENS */
|
||||
};
|
||||
|
||||
/** Checks if a string of characters follows the format for an integer.
|
||||
* Specifically, it checks if the string of characters matches the regular
|
||||
* expression: [-]?[1-9][0-9]* | 0
|
||||
*
|
||||
* \retval 0 The string of characters is not an integer.
|
||||
* \retval 1 The string of characters is an integer.
|
||||
*
|
||||
* \see isFloat(const char *)
|
||||
* \see isString(const char *)
|
||||
* \see isIdentifier(const char *) */
|
||||
int isInteger(const char *image) /**< [in] The string of characters to compare. */
|
||||
/**
|
||||
* Checks if a string follows the format for an integer. Specifically, it
|
||||
* checks if the string matches the regular expression: (-?[1-9][0-9]*|0).
|
||||
*
|
||||
* \param [in] image The string to check.
|
||||
*
|
||||
* \retval 0 \a image does not match the pattern for an integer.
|
||||
*
|
||||
* \retval 1 \a image matches the pattern for an integer.
|
||||
*/
|
||||
int isInteger(const char *image)
|
||||
{
|
||||
const char *cur = image;
|
||||
if (*cur == '-' || (isdigit(*cur) && *cur != '0') || (*cur == '0' && *(cur + 1) == '\0')) {
|
||||
if (*cur == '-'
|
||||
|| (isdigit(*cur) && *cur != '0')
|
||||
|| (*cur == '0' && *(cur + 1) == '\0')) {
|
||||
cur++;
|
||||
while (isdigit(*cur)) cur++;
|
||||
if (*cur == '\0') return 1;
|
||||
|
@ -91,17 +96,17 @@ int isInteger(const char *image) /**< [in] The string of characters to compare.
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** Checks if a string of characters follows the format for a floating
|
||||
* point decimal. Specifically, it checks if the string of characters matches
|
||||
* the regular expression: [-]?[0-9].[0-9]*
|
||||
*
|
||||
* \retval 0 The string of characters is not a floating point decimal.
|
||||
* \retval 1 The string of characters is a floating point decimal.
|
||||
*
|
||||
* \see isInteger(const char *)
|
||||
* \see isString(const char *)
|
||||
* \see isIdentifier(const char *) */
|
||||
int isFloat(const char *image) /**< [in] The string of characters to compare. */
|
||||
/**
|
||||
* Checks if a string follows the format for a decimal. Specifically, it checks
|
||||
* if the string matches the regular expression: (-?[0-9].[0-9]*).
|
||||
*
|
||||
* \param [in] image The string to check.
|
||||
*
|
||||
* \retval 0 \a image does not match the pattern for a decimal.
|
||||
*
|
||||
* \retval 1 \a image matches the pattern for a decimal.
|
||||
*/
|
||||
int isFloat(const char *image)
|
||||
{
|
||||
const char *cur = image;
|
||||
if (*cur == '-' || isdigit(*cur)) {
|
||||
|
@ -116,33 +121,33 @@ int isFloat(const char *image) /**< [in] The string of characters to compare. */
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** Checks if a string of characters follows the format for a string.
|
||||
* Specifically, it checks if the string of characters begins and ends with a
|
||||
* quote character.
|
||||
*
|
||||
* \retval 0 The string of characters is not a string.
|
||||
* \retval 1 The string of characters is a string.
|
||||
*
|
||||
* \see isInteger(const char *)
|
||||
* \see isFloat(const char *)
|
||||
* \see isIdentifier(const char *) */
|
||||
int isString(const char *image) /**< [in] The string of characters to compare. */
|
||||
/**
|
||||
* Checks if a string follows the format for a string literal. Specifically, it
|
||||
* checks if the string matches the regular expression: (".*").
|
||||
*
|
||||
* \param [in] image The string to check.
|
||||
*
|
||||
* \retval 0 \a image does not match the pattern for a string.
|
||||
*
|
||||
* \retval 1 \a image matches the pattern for a string.
|
||||
*/
|
||||
int isString(const char *image)
|
||||
{
|
||||
size_t len = strlen(image);
|
||||
return (len >= 2 && image[0] == '"' && image[len - 1] == '"');
|
||||
}
|
||||
|
||||
/** Checks if a string of characters follows the format for an identifier.
|
||||
* Specifically, it checks if the string of characters matches the regular
|
||||
* expression: [a-zA-Z][a-zA-Z0-9_]*
|
||||
*
|
||||
* \retval 0 The string of characters is not an identifier.
|
||||
* \retval 1 The string of characters is an identifier.
|
||||
*
|
||||
* \see isInteger(const char *)
|
||||
* \see isFloat(const char *)
|
||||
* \see isString(const char *) */
|
||||
int isIdentifier(const char *image) /**< [in] The string of characters to compare. */
|
||||
/**
|
||||
* Checks if a string follows the format for an identifier. Specifically, it
|
||||
* checks if the string matches the regular expression: ([a-zA-Z][a-zA-Z0-9_]*).
|
||||
*
|
||||
* \param image [in] The string to check.
|
||||
*
|
||||
* \retval 0 \a image does not match the pattern for an identifier.
|
||||
*
|
||||
* \retval 1 \a image matches the pattern for an identifier.
|
||||
*/
|
||||
int isIdentifier(const char *image)
|
||||
{
|
||||
const char *cur = image;
|
||||
/* First character must be alphabetic */
|
||||
|
@ -155,17 +160,25 @@ int isIdentifier(const char *image) /**< [in] The string of characters to compar
|
|||
return 1;
|
||||
}
|
||||
|
||||
/** Creates a Token structure.
|
||||
*
|
||||
* \return A pointer to a Token structure with the desired properties.
|
||||
*
|
||||
* \retval NULL malloc was unable to allocate memory.
|
||||
*
|
||||
* \see deleteToken(Token *) */
|
||||
Token *createToken(TokenType type, /**< [in] The type of token to create. */
|
||||
const char *image, /**< [in] The characters from the source file that represent the token. */
|
||||
const char *fname, /**< [in] A pointer to the name of the file containing the token. */
|
||||
unsigned int line) /**< [in] The line number from the source file that the token occurred on. */
|
||||
/**
|
||||
* Creates a token.
|
||||
*
|
||||
* \param [in] type The type of token to create.
|
||||
*
|
||||
* \param [in] image The string that represents the token.
|
||||
*
|
||||
* \param [in] fname The name of the file containing the token.
|
||||
*
|
||||
* \param [in] line The number of the line containing the token.
|
||||
*
|
||||
* \return A pointer to a new token with the desired properties.
|
||||
*
|
||||
* \retval NULL Memory allocation failed.
|
||||
*/
|
||||
Token *createToken(TokenType type,
|
||||
const char *image,
|
||||
const char *fname,
|
||||
unsigned int line)
|
||||
{
|
||||
Token *ret = malloc(sizeof(Token));
|
||||
if (!ret) {
|
||||
|
@ -180,20 +193,22 @@ Token *createToken(TokenType type, /**< [in] The type of token to create. */
|
|||
return NULL;
|
||||
}
|
||||
strcpy(ret->image, image);
|
||||
/** \note fname is not copied because it would only one copy is stored
|
||||
* for all Token structures that share it. */
|
||||
/**
|
||||
* \note fname is not copied because only one copy is stored for all
|
||||
* Token structures that share it.
|
||||
*/
|
||||
ret->fname = fname;
|
||||
ret->line = line;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Deletes a Token structure.
|
||||
*
|
||||
* \pre \a token points to a Token structure created by createToken(TokenType, const char *, const char *, unsigned int).
|
||||
*
|
||||
* \post The memory at \a token and all of its elements will be freed.
|
||||
*
|
||||
* \see createToken(TokenType, const char *, const char *, unsigned int) */
|
||||
/**
|
||||
* Deletes a token.
|
||||
*
|
||||
* \param [in,out] token The token to delete.
|
||||
*
|
||||
* \post The memory at \a token and all of its members will be freed.
|
||||
*/
|
||||
void deleteToken(Token *token)
|
||||
{
|
||||
if (!token) return;
|
||||
|
@ -201,22 +216,25 @@ void deleteToken(Token *token)
|
|||
free(token);
|
||||
}
|
||||
|
||||
/** Adds a Token to an array of Token structures.
|
||||
*
|
||||
* \note \a list may be NULL in which case a new list is created.
|
||||
*
|
||||
* \pre \a num is the number of elements in \a list.
|
||||
*
|
||||
* \post \a token will be added on to the end of \a list and the value at \a num
|
||||
* will be updated accordingly.
|
||||
*
|
||||
* \retval 0 realloc was unable to allocate memory.
|
||||
* \retval 1 \a node was added to \a list.
|
||||
*
|
||||
* \see deleteTokens(Token **) */
|
||||
int addToken(Token ***list, /**< [in,out] A pointer to a pointer to an array of Token structures to add the new Token onto. */
|
||||
unsigned int *num, /**< [in,out] A pointer to the number of elements in \a list. */
|
||||
Token *token) /**< [in] A pointer to the Token structure to add to \a list. */
|
||||
/**
|
||||
* Adds a token to a list.
|
||||
*
|
||||
* \param [in,out] list The list of tokens to add \a token to.
|
||||
*
|
||||
* \param [in,out] num The number of tokens in \a list.
|
||||
*
|
||||
* \param [in] token The token to add to \a list.
|
||||
*
|
||||
* \post \a token will be added to the end of \a list and the size of \a list
|
||||
* will be updated.
|
||||
*
|
||||
* \retval 0 Memory allocation failed.
|
||||
*
|
||||
* \retval 1 \a token was added to \a list.
|
||||
*/
|
||||
int addToken(Token ***list,
|
||||
unsigned int *num,
|
||||
Token *token)
|
||||
{
|
||||
unsigned int newsize = *num + 1;
|
||||
void *mem = realloc(*list, sizeof(Token *) * newsize);
|
||||
|
@ -233,14 +251,14 @@ int addToken(Token ***list, /**< [in,out] A pointer to a pointer to an array
|
|||
return 1;
|
||||
}
|
||||
|
||||
/** Deletes an array of Token structures.
|
||||
*
|
||||
* \pre \a list was created by and contains items added by addToken(Token ***, unsigned int *, Token *).
|
||||
*
|
||||
* \post The memory at \a list and all of its elements will be freed.
|
||||
*
|
||||
* \see addToken(Token ***, unsigned int *, Token *) */
|
||||
void deleteTokens(Token **list) /**< [in,out] A pointer to an array of Token structures to be deleted. */
|
||||
/**
|
||||
* Deletes a list of tokens.
|
||||
*
|
||||
* \param list [in,out] The list of tokens to delete.
|
||||
*
|
||||
* \post The memory at \a list and all of its members will be freed.
|
||||
*/
|
||||
void deleteTokens(Token **list)
|
||||
{
|
||||
Token **tok = list;
|
||||
while (*tok) {
|
||||
|
@ -250,20 +268,28 @@ void deleteTokens(Token **list) /**< [in,out] A pointer to an array of Token str
|
|||
free(list);
|
||||
}
|
||||
|
||||
/** Tries to match a sequence of lexemes. Scans through \a lexemes starting at
|
||||
* \a start and tries to match space-delimited lexemes from \a match.
|
||||
*
|
||||
* \pre \a lexemes was created by scanBuffer(const char *, unsigned int, const char *).
|
||||
*
|
||||
* \return The number of lexemes matched. */
|
||||
unsigned int acceptLexemes(LexemeList *lexemes, /**< [in] A pointer to a LexemeList structure to match lexemes from. */
|
||||
unsigned int start, /**< [in] The position within \a lexemes to start matching at. */
|
||||
const char *match) /**< [in] A pointer to a character array describing the sequence of lexemes to match. */
|
||||
/**
|
||||
* Matches lexemes against a string. Traverses \a lexemes starting at \a start
|
||||
* and compares lexeme images to space-delimited substrings from \a match.
|
||||
*
|
||||
* \param lexemes [in] The list of lexemes to match from.
|
||||
*
|
||||
* \param start [in] The index within \a lexemes to start matching at.
|
||||
*
|
||||
* \param match [in] A string of space-delimited substrings to match.
|
||||
*
|
||||
* \return The number of lexemes matched.
|
||||
*/
|
||||
unsigned int acceptLexemes(LexemeList *lexemes,
|
||||
unsigned int start,
|
||||
const char *match)
|
||||
{
|
||||
unsigned int offset = 0;
|
||||
unsigned int n;
|
||||
unsigned int i;
|
||||
for (n = 0, i = 0; match[n] || lexemes->lexemes[start + offset]->image[i]; n++) {
|
||||
for (n = 0, i = 0;
|
||||
match[n] || lexemes->lexemes[start + offset]->image[i];
|
||||
n++) {
|
||||
if (match[n] == ' ') {
|
||||
offset++;
|
||||
i = 0;
|
||||
|
@ -276,49 +302,59 @@ unsigned int acceptLexemes(LexemeList *lexemes, /**< [in] A pointer to a LexemeL
|
|||
return offset + 1;
|
||||
}
|
||||
|
||||
/** Checks if a sequence of lexemes is a keyword. \a lexemes is searched
|
||||
* starting at \a start for keywords. If one is found, the appropriate Token
|
||||
* structure is created and returned and the value of \a start is incremented
|
||||
* by the number of lexemes matched minus one.
|
||||
*
|
||||
* \pre \a lexemes was created by scanBuffer(const char *, unsigned int, const char *).
|
||||
*
|
||||
* \post If a keyword is not found, \a start will be unmodified. Otherwise,
|
||||
* \a start will be incremented by the number of lexemes matched minus
|
||||
* one.
|
||||
*
|
||||
* \return A pointer to a newly created keyword Token structure.
|
||||
*
|
||||
* \retval NULL No keywords were matched or there was an error allocating
|
||||
* memory. */
|
||||
Token *isKeyword(LexemeList *lexemes, /**< [in] A pointer to a LexemeList structure to search for keywords in. */
|
||||
unsigned int *start) /**< [in,out] A pointer to the position within \a lexemes to start checking at. */
|
||||
/**
|
||||
* Checks if the next lexemes in a list comprise a keyword and, if so, generates
|
||||
* a new token representing that keyword. Specifically, \a lexemes is searched,
|
||||
* starting at \a start for keywords. If one is found, an appropriate token is
|
||||
* created and returned and \a start is incremented by the number of lexemes
|
||||
* matched minus one.
|
||||
*
|
||||
* \param lexemes [in] A list of lexemes to search for keywords in.
|
||||
*
|
||||
* \param start [in,out] The position within \a lexemes to begin searching for
|
||||
* keywords.
|
||||
*
|
||||
* \post If a keyword is not found, \a start will not be modified. Otherwise,
|
||||
* \a start will be incremented by the number of lexemes matched minus one.
|
||||
*
|
||||
* \return A pointer to the token containing the matched keyword.
|
||||
*
|
||||
* \retval NULL No keywords were found or there was an error allocating memory.
|
||||
*/
|
||||
Token *isKeyword(LexemeList *lexemes,
|
||||
unsigned int *start)
|
||||
{
|
||||
Token *token = NULL;
|
||||
TokenType type;
|
||||
const char *fname = lexemes->lexemes[*start]->fname;
|
||||
unsigned int line = lexemes->lexemes[*start]->line;
|
||||
/* For each keyword, */
|
||||
for (type = 0; type != TT_ENDOFTOKENS; type++) {
|
||||
unsigned int num = acceptLexemes(lexemes, *start, keywords[type]);
|
||||
/* Check if the start of lexemes match */
|
||||
unsigned int num = acceptLexemes(lexemes,
|
||||
*start, keywords[type]);
|
||||
if (!num) continue;
|
||||
/* If so, create a new token for the keyword */
|
||||
token = createToken(type, keywords[type], fname, line);
|
||||
/* And advance the start */
|
||||
*start += (num - 1);
|
||||
break;
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
/** Converts a list of lexemes into tokens. Additionally parses the literal
|
||||
* values of integers, floating point decimals, and strings.
|
||||
*
|
||||
* \pre \a list was created by scanBuffer(const char *, unsigned int, const char *).
|
||||
*
|
||||
* \return A pointer to an array of Token structures representing the tokenized
|
||||
* form of the input lexeme stream.
|
||||
*
|
||||
* \retval NULL An unrecognized token was encountered or memory allocation
|
||||
* failed. */
|
||||
Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList structure to tokenize. */
|
||||
/**
|
||||
* Converts a list of lexemes into tokens. Also parses integers, floats, and
|
||||
* strings into tokens with semantic meaning.
|
||||
*
|
||||
* \param list [in] A list of lexemes to tokenize.
|
||||
*
|
||||
* \return A list of tokens generated from \a list.
|
||||
*
|
||||
* \retval NULL An unrecognized token was encounteres or memory allocation
|
||||
* failed.
|
||||
*/
|
||||
Token **tokenizeLexemes(LexemeList *list)
|
||||
{
|
||||
void *mem = NULL;
|
||||
Token **ret = NULL;
|
||||
|
@ -368,8 +404,8 @@ Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList st
|
|||
continue;
|
||||
}
|
||||
/* Newline */
|
||||
/* Note that the spec is unclear as to whether a command *must* follow
|
||||
* a comma. For now, we let commas end a line. */
|
||||
/* Note that the spec is unclear as to whether a command *must*
|
||||
* follow a comma. For now, we let commas end a line. */
|
||||
else if (!strcmp(image, "\n")) {
|
||||
/* Note that we ignore any initial newlines */
|
||||
if (retsize < 1) {
|
||||
|
@ -392,8 +428,8 @@ Token **tokenizeLexemes(LexemeList *list) /**< [in] A pointer to a LexemeList st
|
|||
else if ((token = isKeyword(list, &n))) {
|
||||
}
|
||||
/* Identifier */
|
||||
/* This must be placed after keyword parsing because most
|
||||
* keywords look like identifiers. */
|
||||
/* This must be placed after keyword parsing or else most
|
||||
* keywords would be tokenized as identifiers. */
|
||||
else if (isIdentifier(image)) {
|
||||
token = createToken(TT_IDENTIFIER, image, fname, line);
|
||||
}
|
||||
|
|
215
tokenizer.h
215
tokenizer.h
|
@ -1,14 +1,16 @@
|
|||
/** Structures and functions for grouping lexemes into tokens. The tokenizer
|
||||
* reads through an array of lexemes (generated by the lexer) and groups them
|
||||
* into tokens based on their structure. In addition, some lexemes with
|
||||
* semantic meaning (such as integers, floats, strings, and booleans) will have
|
||||
* their values extracted and stored.
|
||||
*
|
||||
* \file tokenizer.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010 */
|
||||
/**
|
||||
* Structures and functions for grouping lexemes into tokens. The tokenizer
|
||||
* reads through an array of lexemes (generated by the lexer) and groups them
|
||||
* into tokens based on their structure. In addition, some lexemes with
|
||||
* semantic meaning (such as integers, floats, strings, and booleans) will have
|
||||
* their values extracted and stored.
|
||||
*
|
||||
* \file tokenizer.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010-2011
|
||||
*/
|
||||
|
||||
#ifndef __TOKENIZER_H__
|
||||
#define __TOKENIZER_H__
|
||||
|
@ -21,107 +23,138 @@
|
|||
|
||||
#undef DEBUG
|
||||
|
||||
/** Denotes the type of token present. All of the token type names are
|
||||
* self-explainatory and correspond to either the semantic type of token data
|
||||
* (in the case of TT_INTEGER, TT_FLOAT, TT_STRING, or TT_IDENTIFIER) or the
|
||||
* lexemes which make up the particular token.
|
||||
*
|
||||
* \note Remember to update the keywords array with the token image. */
|
||||
/**
|
||||
* Represents a token type. All of the token type names correspond to either
|
||||
* the semantic type of token data or the lexemes which make up the particular
|
||||
* token.
|
||||
*
|
||||
* \note Remember to update the keywords array (in the tokens C file) with the
|
||||
* token image.
|
||||
*/
|
||||
typedef enum {
|
||||
TT_INTEGER,
|
||||
TT_FLOAT,
|
||||
TT_STRING,
|
||||
TT_IDENTIFIER,
|
||||
TT_BOOLEAN,
|
||||
TT_IT,
|
||||
TT_NOOB,
|
||||
TT_NUMBR,
|
||||
TT_NUMBAR,
|
||||
TT_TROOF,
|
||||
TT_YARN,
|
||||
TT_EOF,
|
||||
TT_NEWLINE,
|
||||
TT_HAI,
|
||||
TT_KTHXBYE,
|
||||
TT_HASA,
|
||||
TT_ITZA,
|
||||
TT_ITZ,
|
||||
TT_RNOOB,
|
||||
TT_R,
|
||||
TT_ANYR,
|
||||
TT_AN,
|
||||
TT_SUMOF,
|
||||
TT_DIFFOF,
|
||||
TT_PRODUKTOF,
|
||||
TT_QUOSHUNTOF,
|
||||
TT_MODOF,
|
||||
TT_BIGGROF,
|
||||
TT_SMALLROF,
|
||||
TT_BOTHOF,
|
||||
TT_EITHEROF,
|
||||
TT_WONOF,
|
||||
TT_NOT,
|
||||
TT_MKAY,
|
||||
TT_ALLOF,
|
||||
TT_ANYOF,
|
||||
TT_BOTHSAEM,
|
||||
TT_DIFFRINT,
|
||||
TT_MAEK,
|
||||
TT_A,
|
||||
TT_ISNOWA,
|
||||
TT_VISIBLE,
|
||||
TT_SMOOSH,
|
||||
TT_BANG,
|
||||
TT_GIMMEH,
|
||||
TT_ORLY,
|
||||
TT_YARLY,
|
||||
TT_MEBBE,
|
||||
TT_NOWAI,
|
||||
TT_OIC,
|
||||
TT_WTF,
|
||||
TT_OMG,
|
||||
TT_OMGWTF,
|
||||
TT_GTFO,
|
||||
TT_IMINYR,
|
||||
TT_UPPIN,
|
||||
TT_NERFIN,
|
||||
TT_YR,
|
||||
TT_TIL,
|
||||
TT_WILE,
|
||||
TT_IMOUTTAYR,
|
||||
TT_HOWIZ,
|
||||
TT_IZ,
|
||||
TT_IFUSAYSO,
|
||||
TT_FOUNDYR,
|
||||
TT_SRS,
|
||||
TT_ENDOFTOKENS
|
||||
TT_INTEGER, /**< Integer literal. */
|
||||
TT_FLOAT, /**< Decimal literal. */
|
||||
TT_STRING, /**< String literal. */
|
||||
TT_IDENTIFIER, /**< Identifier literal. */
|
||||
TT_BOOLEAN, /**< Boolean literal. */
|
||||
TT_IT, /**< \ref impvar "Implicit variable". */
|
||||
TT_NOOB, /**< Nil keyword. */
|
||||
TT_NUMBR, /**< Integer keyword. */
|
||||
TT_NUMBAR, /**< Decimal keyword. */
|
||||
TT_TROOF, /**< Boolean keyword. */
|
||||
TT_YARN, /**< String keyword. */
|
||||
TT_BUKKIT, /**< Array. */
|
||||
TT_EOF, /**< End of file. */
|
||||
TT_NEWLINE, /**< Newline. */
|
||||
TT_HAI, /**< Beginning of main block. */
|
||||
TT_KTHXBYE, /**< End of main block. */
|
||||
TT_HASA, /**< Variable declaration. */
|
||||
TT_ITZA, /**< Variable type initialization. */
|
||||
TT_ITZ, /**< Variable value initialization. */
|
||||
TT_RNOOB, /**< Deallocation. */
|
||||
TT_R, /**< Assignment. */
|
||||
TT_ANYR, /**< User-defined function argument separator. */
|
||||
TT_AN, /**< Built-in function argument separator. */
|
||||
TT_SUMOF, /**< Addition. */
|
||||
TT_DIFFOF, /**< Subtraction. */
|
||||
TT_PRODUKTOF, /**< Multiplication. */
|
||||
TT_QUOSHUNTOF, /**< Division. */
|
||||
TT_MODOF, /**< Modulo. */
|
||||
TT_BIGGROF, /**< Greater than. */
|
||||
TT_SMALLROF, /**< Less than. */
|
||||
TT_BOTHOF, /**< Logical AND. */
|
||||
TT_EITHEROF, /**< Logical OR. */
|
||||
TT_WONOF, /**< Logical XOR. */
|
||||
TT_NOT, /**< Logical NOT. */
|
||||
TT_MKAY, /**< Infinite arity argument delimiter. */
|
||||
TT_ALLOF, /**< Infinite arity logical AND. */
|
||||
TT_ANYOF, /**< Infinite arity logical OR. */
|
||||
TT_BOTHSAEM, /**< Equality. */
|
||||
TT_DIFFRINT, /**< Inequality. */
|
||||
TT_MAEK, /**< Cast. */
|
||||
TT_A, /**< Cast target separator. */
|
||||
TT_ISNOWA, /**< In-place cast. */
|
||||
TT_VISIBLE, /**< Print. */
|
||||
TT_SMOOSH, /**< String concatenation. */
|
||||
TT_BANG, /**< Exclamation point (!) */
|
||||
TT_GIMMEH, /**< Input. */
|
||||
TT_ORLY, /**< Conditional. */
|
||||
TT_YARLY, /**< True branch. */
|
||||
TT_MEBBE, /**< Else branch. */
|
||||
TT_NOWAI, /**< False branch. */
|
||||
TT_OIC, /**< Conditional and switch delimiter. */
|
||||
TT_WTF, /**< Switch. */
|
||||
TT_OMG, /**< Case. */
|
||||
TT_OMGWTF, /**< Default case. */
|
||||
TT_GTFO, /**< Break or return without value. */
|
||||
TT_IMINYR, /**< Loop beginning. */
|
||||
TT_UPPIN, /**< Auto increment loop variable. */
|
||||
TT_NERFIN, /**< Auto decrement loop variable. */
|
||||
TT_YR, /**< Function name delimiter. */
|
||||
TT_TIL, /**< Do until. */
|
||||
TT_WILE, /**< Do while. */
|
||||
TT_IMOUTTAYR, /**< Loop ending. */
|
||||
TT_HOWIZ, /**< Function definition beginning. */
|
||||
TT_IZ, /**< Function scope delimiter. */
|
||||
TT_IFUSAYSO, /**< Function definition end. */
|
||||
TT_FOUNDYR, /**< Return with value. */
|
||||
TT_SRS, /**< Indirect variable access. */
|
||||
TT_APOSTROPHEZ, /**< Array slot access ('Z). */
|
||||
TT_ENDOFTOKENS /**< The end of this enum -- don't move it! */
|
||||
} TokenType;
|
||||
|
||||
/** Stores the data associated with a Token structure. */
|
||||
/**
|
||||
* Stores token data with semantic meaning.
|
||||
*/
|
||||
typedef union {
|
||||
int i; /**< Integer data. */
|
||||
float f; /**< Floating point data. */
|
||||
float f; /**< Decimal data. */
|
||||
} TokenData;
|
||||
|
||||
/** Stores a token and any value parsed by the tokenizer. */
|
||||
/**
|
||||
* Stores a token type and any parsed values.
|
||||
*/
|
||||
typedef struct {
|
||||
TokenType type; /**< The type of token. */
|
||||
TokenData data; /**< The stored data of type \a type. */
|
||||
char *image; /**< The array of characters from the lexer which correspond to the token. */
|
||||
const char *fname; /**< A pointer to the name of the file containing the token. */
|
||||
unsigned int line; /**< The line number from the source file that the token occurred on. */
|
||||
char *image; /**< The characters that comprise the token. */
|
||||
const char *fname; /**< The name of the file containing the token. */
|
||||
unsigned int line; /**< The line number the token was on. */
|
||||
} Token;
|
||||
|
||||
/**
|
||||
* \name Utilities
|
||||
*
|
||||
* Functions for performing helper tasks.
|
||||
*/
|
||||
/**@{*/
|
||||
int isInteger(const char *);
|
||||
int isFloat(const char *);
|
||||
int isString(const char *);
|
||||
int isIdentifier(const char *);
|
||||
Token *isKeyword(LexemeList *, unsigned int *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Token modifiers
|
||||
*
|
||||
* Functions for creating and deleting tokens.
|
||||
*/
|
||||
/**@{*/
|
||||
Token *createToken(TokenType, const char *, const char *, unsigned int);
|
||||
void deleteToken(Token *);
|
||||
int addToken(Token ***, unsigned int *, Token*);
|
||||
void deleteTokens(Token **);
|
||||
unsigned int acceptLexemes(LexemeList *, unsigned int, const char *);
|
||||
Token *isKeyword(LexemeList *, unsigned int *);
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* \name Lexeme tokenizer
|
||||
*
|
||||
* Generates tokens from lexemes.
|
||||
*/
|
||||
/**@{*/
|
||||
Token **tokenizeLexemes(LexemeList *);
|
||||
/**@}*/
|
||||
|
||||
#endif /* __TOKENIZER_H__ */
|
||||
|
|
61
unicode.c
61
unicode.c
|
@ -36860,15 +36860,22 @@ static const long codepoints[] = {
|
|||
|
||||
#define NUM_UNICODE 18426
|
||||
|
||||
/** Performs a binary search on an array of strings.
|
||||
*
|
||||
* \return The index of the matching entry, if found.
|
||||
*
|
||||
* \retval -1 The entry does not exist in the array. */
|
||||
int binarySearch(const char **strings, /**< [in] A pointer to an array of character strings to search through. */
|
||||
int start, /**< [in] The start of the range to search through. */
|
||||
int end, /**< [in] The end of the range to search through. */
|
||||
const char *find) /**< [in] The entry to search for. */
|
||||
/**
|
||||
* Performs a binary search on an array of strings.
|
||||
*
|
||||
* \param [in] strings The array of string to search.
|
||||
* \param [in] start The index to start searching at.
|
||||
* \param [in] end The index to end searching at.
|
||||
* \param [in] find The string to search for.
|
||||
*
|
||||
* \return The index of the matching string, if found.
|
||||
*
|
||||
* \retval -1 The string was not found in the array.
|
||||
*/
|
||||
int binarySearch(const char **strings,
|
||||
int start,
|
||||
int end,
|
||||
const char *find)
|
||||
{
|
||||
int midpoint;
|
||||
int cmp;
|
||||
|
@ -36884,12 +36891,16 @@ int binarySearch(const char **strings, /**< [in] A pointer to an array of charac
|
|||
return -1;
|
||||
}
|
||||
|
||||
/** Converts a Unicode normative name to a Unicode code point.
|
||||
*
|
||||
* \return The Unicode code point corresponding to the given Unicode name.
|
||||
*
|
||||
* \retval -1 An invalid Unicode normative name was supplied. */
|
||||
long convertNormativeNameToCodePoint(const char *name) /**< [in] A pointer to a string of characters representing the Unicode normative name desired. */
|
||||
/**
|
||||
* Converts a Unicode normative name to a Unicode code point.
|
||||
*
|
||||
* \param [in] name The Unicode normative name to convert.
|
||||
*
|
||||
* \return The Unicode code point corresponding to \a name.
|
||||
*
|
||||
* \retval -1 An invalid Unicode normative name was supplied.
|
||||
*/
|
||||
long convertNormativeNameToCodePoint(const char *name)
|
||||
{
|
||||
int index = binarySearch(names, 0, NUM_UNICODE - 1, name);
|
||||
if (index < 0) {
|
||||
|
@ -36900,14 +36911,18 @@ long convertNormativeNameToCodePoint(const char *name) /**< [in] A pointer to a
|
|||
return codepoints[index];
|
||||
}
|
||||
|
||||
/** Converts the bits in a long integer representing a Unicode code point to a
|
||||
* series of one or more bytes representing a UTF-8 character.
|
||||
*
|
||||
* \return The number of characters in the converted multi-byte character.
|
||||
*
|
||||
* \retval 0 An invalid Unicode code point was supplied. */
|
||||
size_t convertCodePointToUTF8(unsigned long codepoint, /**< [in] The Unicode code point to convert to UTF-8. */
|
||||
char *out) /**< [out] A pointer to the location to store the resulting UTF-8 bytes. */
|
||||
/**
|
||||
* Converts a Unicode code point to a UTF-8 character.
|
||||
*
|
||||
* \param [in] codepoint The Unicode code point to convert to UTF-8.
|
||||
* \param [out] out A pointer to the location to store the UTF-8 character.
|
||||
*
|
||||
* \return The length of the converted multi-byte UTF-8 character.
|
||||
*
|
||||
* \retval 0 An invalid Unicode code point was supplied.
|
||||
*/
|
||||
size_t convertCodePointToUTF8(unsigned long codepoint,
|
||||
char *out)
|
||||
{
|
||||
/* Out of range */
|
||||
if (codepoint > 0x10FFFF) {
|
||||
|
|
18
unicode.h
18
unicode.h
|
@ -1,11 +1,13 @@
|
|||
/** Data and functions for converting from Unicode normative names to
|
||||
* code points.
|
||||
*
|
||||
* \file unicode.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010 */
|
||||
/**
|
||||
* Data and functions for converting from Unicode normative names to Unicode
|
||||
* code points.
|
||||
*
|
||||
* \file unicode.h
|
||||
*
|
||||
* \author Justin J. Meza
|
||||
*
|
||||
* \date 2010-2011
|
||||
*/
|
||||
|
||||
#ifndef __UNICODE_H__
|
||||
#define __UNICODE_H__
|
||||
|
|
Loading…
Reference in New Issue