lci/tokenizer.h

161 lines
5.1 KiB
C

/**
* Structures and functions for grouping lexemes into tokens. The tokenizer
* reads through an array of lexemes (generated by the lexer) and groups them
* into tokens based on their structure. In addition, some lexemes with
* semantic meaning (such as integers, floats, strings, and booleans) will have
* their values extracted and stored.
*
* \file tokenizer.h
*
* \author Justin J. Meza
*
* \date 2010-2011
*/
#ifndef __TOKENIZER_H__
#define __TOKENIZER_H__
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "lexer.h"
#undef DEBUG
/**
* Represents a token type. All of the token type names correspond to either
* the semantic type of token data or the lexemes which make up the particular
* token.
*
* \note Remember to update the keywords array (in the tokens C file) with the
* token image.
*/
typedef enum {
TT_INTEGER, /**< Integer literal. */
TT_FLOAT, /**< Decimal literal. */
TT_STRING, /**< String literal. */
TT_IDENTIFIER, /**< Identifier literal. */
TT_BOOLEAN, /**< Boolean literal. */
TT_IT, /**< \ref impvar "Implicit variable". */
TT_NOOB, /**< Nil keyword. */
TT_NUMBR, /**< Integer keyword. */
TT_NUMBAR, /**< Decimal keyword. */
TT_TROOF, /**< Boolean keyword. */
TT_YARN, /**< String keyword. */
TT_BUKKIT, /**< Array. */
TT_EOF, /**< End of file. */
TT_NEWLINE, /**< Newline. */
TT_HAI, /**< Beginning of main block. */
TT_KTHXBYE, /**< End of main block. */
TT_HASA, /**< Variable declaration. */
TT_ITZA, /**< Variable type initialization. */
TT_ITZ, /**< Variable value initialization. */
TT_RNOOB, /**< Deallocation. */
TT_R, /**< Assignment. */
TT_ANYR, /**< User-defined function argument separator. */
TT_AN, /**< Built-in function argument separator. */
TT_SUMOF, /**< Addition. */
TT_DIFFOF, /**< Subtraction. */
TT_PRODUKTOF, /**< Multiplication. */
TT_QUOSHUNTOF, /**< Division. */
TT_MODOF, /**< Modulo. */
TT_BIGGROF, /**< Greater than. */
TT_SMALLROF, /**< Less than. */
TT_BOTHOF, /**< Logical AND. */
TT_EITHEROF, /**< Logical OR. */
TT_WONOF, /**< Logical XOR. */
TT_NOT, /**< Logical NOT. */
TT_MKAY, /**< Infinite arity argument delimiter. */
TT_ALLOF, /**< Infinite arity logical AND. */
TT_ANYOF, /**< Infinite arity logical OR. */
TT_BOTHSAEM, /**< Equality. */
TT_DIFFRINT, /**< Inequality. */
TT_MAEK, /**< Cast. */
TT_A, /**< Cast target separator. */
TT_ISNOWA, /**< In-place cast. */
TT_VISIBLE, /**< Print. */
TT_SMOOSH, /**< String concatenation. */
TT_BANG, /**< Exclamation point (!) */
TT_GIMMEH, /**< Input. */
TT_ORLY, /**< Conditional. */
TT_YARLY, /**< True branch. */
TT_MEBBE, /**< Else branch. */
TT_NOWAI, /**< False branch. */
TT_OIC, /**< Conditional and switch delimiter. */
TT_WTF, /**< Switch. */
TT_OMG, /**< Case. */
TT_OMGWTF, /**< Default case. */
TT_GTFO, /**< Break or return without value. */
TT_IMINYR, /**< Loop beginning. */
TT_UPPIN, /**< Auto increment loop variable. */
TT_NERFIN, /**< Auto decrement loop variable. */
TT_YR, /**< Function name delimiter. */
TT_TIL, /**< Do until. */
TT_WILE, /**< Do while. */
TT_IMOUTTAYR, /**< Loop ending. */
TT_HOWIZ, /**< Function definition beginning. */
TT_IZ, /**< Function scope delimiter. */
TT_IFUSAYSO, /**< Function definition end. */
TT_FOUNDYR, /**< Return with value. */
TT_SRS, /**< Indirect variable access. */
TT_APOSTROPHEZ, /**< Array slot access ('Z). */
TT_ENDOFTOKENS /**< The end of this enum -- don't move it! */
} TokenType;
/**
* Stores token data with semantic meaning.
*/
typedef union {
int i; /**< Integer data. */
float f; /**< Decimal data. */
} TokenData;
/**
* Stores a token type and any parsed values.
*/
typedef struct {
TokenType type; /**< The type of token. */
TokenData data; /**< The stored data of type \a type. */
char *image; /**< The characters that comprise the token. */
const char *fname; /**< The name of the file containing the token. */
unsigned int line; /**< The line number the token was on. */
} Token;
/**
* \name Utilities
*
* Functions for performing helper tasks.
*/
/**@{*/
int isInteger(const char *);
int isFloat(const char *);
int isString(const char *);
int isIdentifier(const char *);
Token *isKeyword(LexemeList *, unsigned int *);
/**@}*/
/**
* \name Token modifiers
*
* Functions for creating and deleting tokens.
*/
/**@{*/
Token *createToken(TokenType, const char *, const char *, unsigned int);
void deleteToken(Token *);
int addToken(Token ***, unsigned int *, Token*);
void deleteTokens(Token **);
unsigned int acceptLexemes(LexemeList *, unsigned int, const char *);
/**@}*/
/**
* \name Lexeme tokenizer
*
* Generates tokens from lexemes.
*/
/**@{*/
Token **tokenizeLexemes(LexemeList *);
/**@}*/
#endif /* __TOKENIZER_H__ */