lci/main.c

208 lines
7.0 KiB
C
Raw Normal View History

2010-08-09 07:01:59 +00:00
/** \mainpage lci Documentation
*
* \section license License
*
* lci - a LOLCODE interpreter written in C.
* Copyright (C) 2010 Justin J. Meza
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* \section maintainer Maintainer
*
* The lead maintainer for this project is Justin J. Meza (justin.meza@gmail.com).
* For more information, check this project's webpage at http://icanhaslolcode.org .
*
* \section about About
*
* lci is a LOLCODE interpreter written in C and is designed to be correct,
* portable, fast, and precisely documented.
*
* - correct: Every effort has been made to test lci's conformance to the
* LOLCODE language specification. Unit tests come packaged with
* the lci source code.
* - portable: lci follows the widely ported ANSI C specification allowing it
* to compile on a broad range of systems.
* - fast: Much effort has gone into producing simple and efficient code
* whenever possible to the extent that the above points are not
* compromized.
* - precisely documented: lci uses Doxygen to generate literate code
* documentation, browsable here.
*
* \section organization Organization
*
* lci employs several different modules which each perform a specific task
* during interpretation of code:
*
* - \b lexer (lexer.c, lexer.h)- The lexer takes an array of characters and
* splits it up into individual \e lexemes. Lexemes are divided by
* whitespace and other rules of the language.
* - \b tokenizer (tokenizer.c, tokenizer.h) - The tokenizer takes the
* output of the lexer and converts it into individual \e tokens. Tokens
* are different from lexemes in that a single token may be made up of
* multiple lexemes. Also, the contents of some tokens are evaluated (such
* as integers and floats) for later use.
* - \b parser (parser.c, parser.h) - The parser takes the output of the
* tokenizer and analyzes it semantically to turn it into a parse tree.
* - \b interpreter (interpreter.c, interpreter.h) - The interpreter takes
* the output of the parser and executes it.
*
* Each of these modules is contained within its own C header and source code
* files of the same name.
*
* To handle the conversion of Unicode code points and normative names to
* bytes, two additional files, unicode.c and unicode.h are used.
*
* Finally, main.c ties all of these modules together and handles the initial
* loading of input data for the lexer. */
/** \page varscope Variable Scope
*
* The specification states that variables are local to the scope of the main
* block or any function they are contained within--except for temporary loop
* variables which are local to the loop they are instantiated within. This
* behavior, combined with the fact that variables must be declared before
* being used, means that variables may not be shadowed in different control
* scopes (such as loops and conditional statements) and, more importantly,
* programmers must keep track of whether variables have been previously
* declared within conditionally executed code (for example, under this
* scoping if a variable is declared in a conditional block it cannot be
* safely used in later code).
*
* One advantage of a flat scoping scheme is that nearly everything can be
* stored in a single structure, making lookups faster. However, I believe
* that this advantage is not worth the extra frustration transferred to the
* programmer and so scoping in lci is done in a similar manner to other
* programming languages, to wit, within
* - the main block of code,
* - the body of functions,
* - the body of loop statements, and
* - the bodies of conditional statements.
*
* This should alleviate any confusion which may have been caused by using a
* completely local free-for-all scope. Also, there seems to be a general
* consensus on the LOLCODE forums that this is the way to go and flat scoping
* causes too many problems for the programmer. */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "lexer.h"
#include "tokenizer.h"
#include "parser.h"
#include "interpreter.h"
#define READSIZE 512
char *getFileArg(int argc, char **argv, char *vals) {
int n;
for (n = 1; n < argc; n++) {
if (argv[n][0] == '-' && argv[n][1]) {
int c;
for (c = 0; vals[c] != '\0'; c++) {
if (argv[n][1] == vals[c]) {
n++;
break;
}
}
continue;
}
else return argv[n];
}
return NULL;
}
int main(int argc, char **argv)
{
long size = 0;
long length = 0;
char *buffer = NULL;
LexemeList *lexemes = NULL;
Token **tokens = NULL;
FunctionTable *functab = NULL;
MainNode *node = NULL;
char *fname = NULL;
FILE *file = NULL;
fname = getFileArg(argc, argv, "");
if (fname == NULL || fname[0] == '-') {
fname = "stdin";
file = stdin;
}
else {
file = fopen(fname, "r");
}
if (!file) {
fprintf(stderr, "File does not exist.\n");
return 1;
}
while (!feof(file)) {
size += READSIZE;
buffer = realloc(buffer, sizeof(char) * size);
length += fread((buffer + size) - READSIZE, 1, READSIZE, file);
}
fclose(file);
if (!buffer) return 1;
buffer[length] = '\0';
/* Remove hash bang line if run as a standalone script */
if (buffer[0] == '#' && buffer[1] == '!') {
unsigned int n;
for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
buffer[n] = ' ';
}
/* Remove UTF-8 BOM if present and add it to the output stream (we
* assume here that if a BOM is present, the system will also expect
* the output to include a BOM). */
if (buffer[0] == (char)0xef
|| buffer[1] == (char)0xbb
|| buffer[2] == (char)0xbf) {
buffer[0] = ' ';
buffer[1] = ' ';
buffer[2] = ' ';
printf("%c%c%c", 0xef, 0xbb, 0xbf);
}
/* Begin main pipeline */
if (!(lexemes = scanBuffer(buffer, length, fname))) {
free(buffer);
return 1;
}
free(buffer);
if (!(tokens = tokenizeLexemes(lexemes))) {
deleteLexemeList(lexemes);
return 1;
}
deleteLexemeList(lexemes);
if (!(functab = setupFunctionTable(tokens))) {
deleteFunctionTable(functab);
return 1;
}
if (!(node = parseMainNode(tokens, functab))) {
deleteFunctionTable(functab);
deleteTokens(tokens);
return 1;
}
deleteTokens(tokens);
if (interpretMainNode(node)) {
deleteMainNode(node);
return 1;
}
deleteMainNode(node);
/* End main pipeline */
return 0;
}