lci/main.c

/**
 * \mainpage lci Documentation
 *
 * \section license License
 *
 * lci - a LOLCODE interpreter written in C.
 * Copyright (C) 2010-2012 Justin J. Meza
 *
 * This program is free software: you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * \section maintainer Maintainer
 *
 *   - The lead maintainer for this project is Justin J. Meza
 *   (justin.meza@gmail.com).
 *
 *   - For more information, check this project's webpage at
 *   http://lolcode.org .
 *
 * \section about About
 *
 * lci is a LOLCODE interpreter written in C and is designed to be correct,
 * portable, fast, and precisely documented.
 *
 *   - correct: Every effort has been made to test lci's conformance to the
 *   LOLCODE language specification. Unit tests come packaged with the lci
 *   source code.
 *
 *   - portable: lci follows the widely ported ANSI C specification allowing it
 *   to compile on a broad range of systems.
 *
 *   - fast: Much effort has gone into producing simple and efficient code
 *   whenever possible to the extent that the above points are not compromized.
 *
 *   - precisely documented: lci uses Doxygen to generate literate code
 *   documentation, browsable here.
 *
 * \section organization Organization
 *
 * lci employs several different modules which each perform a specific task
 * during interpretation of code:
 *
 *   - \b lexer (lexer.c, lexer.h)- The lexer takes an array of characters and
 *   splits it up into individual \e lexemes.  Lexemes are divided by whitespace
 *   and other rules of the language.
 *
 *   - \b tokenizer (tokenizer.c, tokenizer.h) - The tokenizer takes the output
 *   of the lexer and converts it into individual \e tokens.  Tokens are
 *   different from lexemes in that a single token may be made up of multiple
 *   lexemes.  Also, the contents of some tokens are evaluated (such as integers
 *   and floats) for later use.
 *
 *   - \b parser (parser.c, parser.h) - The parser takes the output of the
 *   tokenizer and analyzes it semantically to turn it into a parse tree.
 *
 *   - \b interpreter (interpreter.c, interpreter.h) - The interpreter takes the
 *   output of the parser and executes it.
 *
 * Each of these modules is contained within its own C header and source code
 * files of the same name.
 *
 * To handle the conversion of Unicode code points and normative names to bytes,
 * two additional files, unicode.c and unicode.h are used.
 * 
 * Finally, main.c ties all of these modules together and handles the initial
 * loading of input data for the lexer.
 */

/**
 * \page varscope Variable Scope
 *
 * The specification states that variables are local to the scope of the main
 * block or any function they are contained within--except for temporary loop
 * variables which are local to the loop they are instantiated within.  This
 * behavior, combined with the fact that variables must be declared before being
 * used, means that variables may not be shadowed in different control scopes
 * (such as loops and conditional statements) and, more importantly, programmers
 * must keep track of whether variables have been previously declared within
 * conditionally executed code (for example, under this scoping if a variable is
 * declared in a conditional block it cannot be safely used in later code).
 *
 * One advantage of a flat scoping scheme is that nearly everything can be
 * stored in a single structure, making lookups faster.  However, I believe that
 * this advantage is not worth the extra frustration transferred to the
 * programmer and so scoping in lci is done in a similar manner to other
 * programming languages, to wit, within
 *
 *   - the main block of code,
 *   - the body of functions,
 *   - the body of loop statements, and
 *   - the bodies of conditional statements.
 *
 * This should alleviate any confusion which may have been caused by using a
 * completely local free-for-all scope.  Also, there seems to be a general
 * consensus on the LOLCODE forums that this is the way to go and flat scoping
 * causes too many problems for the programmer.
 */

#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <readline/readline.h>
#include <readline/history.h>

#include "lexer.h"
#include "tokenizer.h"
#include "parser.h"
#include "interpreter.h"
#include "error.h"

#define READSIZE 512

static char *program_name;

static char *shortopt = "hvi";
static struct option longopt[] = {
	{ "help", no_argument, NULL, (int)'h' },
	{ "version", no_argument, NULL, (int)'v' },
	{ "interactive", no_argument, NULL, (int)'i' },
	{ 0, 0, 0, 0 }
};

static void help(void) {
	fprintf(stderr, "\
Usage: %s [FILE] ... \n\
Interpret FILE(s) as LOLCODE. Let FILE be '-' for stdin.\n\
  -h, --help\t\toutput this help\n\
  -v, --version\t\tprogram version\n\
  -i, --interactive\tinteractive prompt\n", program_name);
}

static void version (char *revision) {
	fprintf(stderr, "%s %s\n", program_name, revision);
}

int pipeline(char *buffer, unsigned int length, const char *fname, ScopeObject *scope)
{
	LexemeList *lexemes = NULL;
	Token **tokens = NULL;
	MainNode *node = NULL;
	if (!(lexemes = scanBuffer(buffer, length, fname))) {
		free(buffer);
		return 1;
	}
	free(buffer);
	if (!(tokens = tokenizeLexemes(lexemes))) {
		deleteLexemeList(lexemes);
		return 1;
	}
	deleteLexemeList(lexemes);
	if (!(node = parseMainNode(tokens))) {
		deleteTokens(tokens);
		return 1;
	}
	deleteTokens(tokens);
	if (interpretMainNodeScope(node, NULL)) {
		deleteMainNode(node);
		return 1;
	}
	deleteMainNode(node);
	return 0;
}

int main(int argc, char **argv)
{
	unsigned int size = 0;
	unsigned int length = 0;
	char *buffer = NULL;
	char *fname = NULL;
	FILE *file = NULL;
	int ch;

	char *revision = "v0.11.2";
	program_name = argv[0];

	while ((ch = getopt_long(argc, argv, shortopt, longopt, NULL)) != -1) {
		switch (ch) {
			default:
				help();
				exit(EXIT_FAILURE);
			case 'h':
				help();
				exit(EXIT_SUCCESS);
			case 'v':
				version(revision);
				exit(EXIT_SUCCESS);
			case 'i':
				{
					char *line = NULL;
					/* Save state between lines using a ScopeObject structure */
					ScopeObject *scope = createScopeObject(NULL);
					if (!scope) return 1;
					while ((line = readline("lci> "))) {
						char *pre = "HAI 1.4\n";
						char *post = "\n\nKTHXBYE\n";
						char *code = NULL;
						size = strlen(line);
						buffer = realloc(buffer, sizeof(char) * (length + size + 1));
						strncpy(buffer + length, line, size);
						buffer[length + size] = '\n';
						length += size + 1;
						add_history(line);
						/* Intercept KTHXBYE to quit */
						if (!strcmp(line, "KTHXBYE")) {
							break;
						}
						/* Intercept HALP to display help message */
						else if (!strcmp(line, "HALP")) {
							version(revision);
							help();
							continue;
						}
						/* Create staged code file */
						code = malloc(sizeof(char) * (strlen(pre) + size + strlen(post) + 1));
						strcpy(code, pre);
						strncpy(code + strlen(pre), line, size);
						strcpy(code + strlen(pre) + size, post);
						code[strlen(pre) + size + strlen(post)] = '\0';
						pipeline(code, strlen(code), "interactive", scope);
					}
					free(buffer);
					deleteScopeObject(scope);
					exit(EXIT_SUCCESS);
				}
		}
	}

	for (; optind < argc; optind++) {
		size = length = 0;
		buffer = fname = NULL;
		file = NULL;

		if (!strncmp(argv[optind],"-\0",2)) {
			file = stdin;
			fname = "stdin";
		}
		else {
			file = fopen(argv[optind], "r");
			fname = argv[optind];
		}

		if (!file) {
			error(MN_ERROR_OPENING_FILE, argv[optind]);
			return 1;
		}

		while (!feof(file)) {
			size += READSIZE;
			buffer = realloc(buffer, sizeof(char) * size);
			length += fread((buffer + size) - READSIZE,
					1,
					READSIZE,
					file);
		}

		if (fclose(file) != 0) {
			error(MN_ERROR_CLOSING_FILE, argv[optind]);
			if (buffer) free(buffer);
			return 1;
		}
		if (!buffer) return 1;
		buffer[length] = '\0';

		/* Remove hash bang line if run as a standalone script */
		if (buffer[0] == '#' && buffer[1] == '!') {
			unsigned int n;
			for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
				buffer[n] = ' ';
		}

		/*
		 * Remove UTF-8 BOM if present and add it to the output stream
		 * (we assume here that if a BOM is present, the system will
		 * also expect the output to include a BOM).
		 */
		if (buffer[0] == (char)0xef
				|| buffer[1] == (char)0xbb
				|| buffer[2] == (char)0xbf) {
			buffer[0] = ' ';
			buffer[1] = ' ';
			buffer[2] = ' ';
			printf("%c%c%c", 0xef, 0xbb, 0xbf);
		}

		return pipeline(buffer, length, fname, NULL);

	}

	return 0;
}
Fixed input EOF bug and updated main documentation. 2011-06-29 20:25:56 +00:00			`/**`
			`* \mainpage lci Documentation`
			`*`
			`* \section license License`
			`*`
			`* lci - a LOLCODE interpreter written in C.`
minor bookkeeping changes 2012-12-13 05:02:17 +00:00			`* Copyright (C) 2010-2012 Justin J. Meza`
Fixed input EOF bug and updated main documentation. 2011-06-29 20:25:56 +00:00			`*`
			`* This program is free software: you can redistribute it and/or modify it under`
			`* the terms of the GNU General Public License as published by the Free Software`
			`* Foundation, either version 3 of the License, or (at your option) any later`
			`* version.`
			`*`
			`* This program is distributed in the hope that it will be useful, but WITHOUT`
			`* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS`
			`* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more`
			`* details.`
			`*`
			`* You should have received a copy of the GNU General Public License along with`
			`* this program. If not, see <http://www.gnu.org/licenses/>.`
			`*`
			`* \section maintainer Maintainer`
			`*`
			`* - The lead maintainer for this project is Justin J. Meza`
			`* (justin.meza@gmail.com).`
			`*`
			`* - For more information, check this project's webpage at`
Update project homepage a few places. There were two links to http://icanhaslolcode.org/ , which is now a parked domain. Now those appropriately go to http://lolcode.org/ . 2013-11-19 18:51:48 +00:00			`* http://lolcode.org .`
Fixed input EOF bug and updated main documentation. 2011-06-29 20:25:56 +00:00			`*`
			`* \section about About`
			`*`
			`* lci is a LOLCODE interpreter written in C and is designed to be correct,`
			`* portable, fast, and precisely documented.`
			`*`
			`* - correct: Every effort has been made to test lci's conformance to the`
			`* LOLCODE language specification. Unit tests come packaged with the lci`
			`* source code.`
			`*`
			`* - portable: lci follows the widely ported ANSI C specification allowing it`
			`* to compile on a broad range of systems.`
			`*`
			`* - fast: Much effort has gone into producing simple and efficient code`
			`* whenever possible to the extent that the above points are not compromized.`
			`*`
			`* - precisely documented: lci uses Doxygen to generate literate code`
			`* documentation, browsable here.`
			`*`
			`* \section organization Organization`
			`*`
			`* lci employs several different modules which each perform a specific task`
			`* during interpretation of code:`
			`*`
			`* - \b lexer (lexer.c, lexer.h)- The lexer takes an array of characters and`
			`* splits it up into individual \e lexemes. Lexemes are divided by whitespace`
			`* and other rules of the language.`
			`*`
			`* - \b tokenizer (tokenizer.c, tokenizer.h) - The tokenizer takes the output`
			`* of the lexer and converts it into individual \e tokens. Tokens are`
			`* different from lexemes in that a single token may be made up of multiple`
			`* lexemes. Also, the contents of some tokens are evaluated (such as integers`
			`* and floats) for later use.`
			`*`
			`* - \b parser (parser.c, parser.h) - The parser takes the output of the`
			`* tokenizer and analyzes it semantically to turn it into a parse tree.`
			`*`
			`* - \b interpreter (interpreter.c, interpreter.h) - The interpreter takes the`
			`* output of the parser and executes it.`
			`*`
			`* Each of these modules is contained within its own C header and source code`
			`* files of the same name.`
			`*`
			`* To handle the conversion of Unicode code points and normative names to bytes,`
			`* two additional files, unicode.c and unicode.h are used.`
			`*`
			`* Finally, main.c ties all of these modules together and handles the initial`
			`* loading of input data for the lexer.`
			`*/`

			`/**`
			`* \page varscope Variable Scope`
			`*`
			`* The specification states that variables are local to the scope of the main`
			`* block or any function they are contained within--except for temporary loop`
			`* variables which are local to the loop they are instantiated within. This`
			`* behavior, combined with the fact that variables must be declared before being`
			`* used, means that variables may not be shadowed in different control scopes`
			`* (such as loops and conditional statements) and, more importantly, programmers`
			`* must keep track of whether variables have been previously declared within`
			`* conditionally executed code (for example, under this scoping if a variable is`
			`* declared in a conditional block it cannot be safely used in later code).`
			`*`
			`* One advantage of a flat scoping scheme is that nearly everything can be`
			`* stored in a single structure, making lookups faster. However, I believe that`
			`* this advantage is not worth the extra frustration transferred to the`
			`* programmer and so scoping in lci is done in a similar manner to other`
			`* programming languages, to wit, within`
			`*`
			`* - the main block of code,`
			`* - the body of functions,`
			`* - the body of loop statements, and`
			`* - the bodies of conditional statements.`
			`*`
			`* This should alleviate any confusion which may have been caused by using a`
			`* completely local free-for-all scope. Also, there seems to be a general`
			`* consensus on the LOLCODE forums that this is the way to go and flat scoping`
			`* causes too many problems for the programmer.`
			`*/`
Initial project import 2010-08-09 07:01:59 +00:00
			`#include <stdio.h>`
			`#include <stdlib.h>`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`#include <getopt.h>`
added experimental interactive mode 2013-03-10 21:23:53 +00:00			`#include <readline/readline.h>`
			`#include <readline/history.h>`
Initial project import 2010-08-09 07:01:59 +00:00
			`#include "lexer.h"`
			`#include "tokenizer.h"`
			`#include "parser.h"`
			`#include "interpreter.h"`
started adding new error infrastructure 2012-03-24 15:58:04 +00:00			`#include "error.h"`
Initial project import 2010-08-09 07:01:59 +00:00
			`#define READSIZE 512`

Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`static char *program_name;`

added experimental interactive mode 2013-03-10 21:23:53 +00:00			`static char *shortopt = "hvi";`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`static struct option longopt[] = {`
Merged SRS and de-linted code 2010-12-21 08:20:31 +00:00			`{ "help", no_argument, NULL, (int)'h' },`
			`{ "version", no_argument, NULL, (int)'v' },`
added experimental interactive mode 2013-03-10 21:23:53 +00:00			`{ "interactive", no_argument, NULL, (int)'i' },`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`{ 0, 0, 0, 0 }`
			`};`

			`static void help(void) {`
			`fprintf(stderr, "\`
			`Usage: %s [FILE] ... \n\`
			`Interpret FILE(s) as LOLCODE. Let FILE be '-' for stdin.\n\`
			`-h, --help\t\toutput this help\n\`
added experimental interactive mode 2013-03-10 21:23:53 +00:00			`-v, --version\t\tprogram version\n\`
			`-i, --interactive\tinteractive prompt\n", program_name);`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`}`

			`static void version (char *revision) {`
			`fprintf(stderr, "%s %s\n", program_name, revision);`
Initial project import 2010-08-09 07:01:59 +00:00			`}`

added experimental interactive mode 2013-03-10 21:23:53 +00:00			`int pipeline(char buffer, unsigned int length, const char fname, ScopeObject *scope)`
			`{`
			`LexemeList *lexemes = NULL;`
			`Token **tokens = NULL;`
			`MainNode *node = NULL;`
			`if (!(lexemes = scanBuffer(buffer, length, fname))) {`
			`free(buffer);`
			`return 1;`
			`}`
			`free(buffer);`
			`if (!(tokens = tokenizeLexemes(lexemes))) {`
			`deleteLexemeList(lexemes);`
			`return 1;`
			`}`
			`deleteLexemeList(lexemes);`
			`if (!(node = parseMainNode(tokens))) {`
			`deleteTokens(tokens);`
			`return 1;`
			`}`
			`deleteTokens(tokens);`
			`if (interpretMainNodeScope(node, NULL)) {`
			`deleteMainNode(node);`
			`return 1;`
			`}`
			`deleteMainNode(node);`
			`return 0;`
			`}`

Initial project import 2010-08-09 07:01:59 +00:00			`int main(int argc, char **argv)`
			`{`
Merged SRS and de-linted code 2010-12-21 08:20:31 +00:00			`unsigned int size = 0;`
			`unsigned int length = 0;`
Initial project import 2010-08-09 07:01:59 +00:00			`char *buffer = NULL;`
			`char *fname = NULL;`
			`FILE *file = NULL;`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`int ch;`
Initial project import 2010-08-09 07:01:59 +00:00
bumping version for new release 2014-12-12 23:09:31 +00:00			`char *revision = "v0.11.2";`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`program_name = argv[0];`

			`while ((ch = getopt_long(argc, argv, shortopt, longopt, NULL)) != -1) {`
			`switch (ch) {`
			`default:`
			`help();`
			`exit(EXIT_FAILURE);`
			`case 'h':`
			`help();`
			`exit(EXIT_SUCCESS);`
			`case 'v':`
			`version(revision);`
			`exit(EXIT_SUCCESS);`
added experimental interactive mode 2013-03-10 21:23:53 +00:00			`case 'i':`
			`{`
			`char *line = NULL;`
			`/* Save state between lines using a ScopeObject structure */`
			`ScopeObject *scope = createScopeObject(NULL);`
			`if (!scope) return 1;`
removed more Clang warnings 2013-12-30 17:55:39 +00:00			`while ((line = readline("lci> "))) {`
added experimental interactive mode 2013-03-10 21:23:53 +00:00			`char *pre = "HAI 1.4\n";`
			`char *post = "\n\nKTHXBYE\n";`
			`char *code = NULL;`
			`size = strlen(line);`
			`buffer = realloc(buffer, sizeof(char) * (length + size + 1));`
			`strncpy(buffer + length, line, size);`
			`buffer[length + size] = '\n';`
			`length += size + 1;`
			`add_history(line);`
			`/* Intercept KTHXBYE to quit */`
			`if (!strcmp(line, "KTHXBYE")) {`
			`break;`
			`}`
			`/* Intercept HALP to display help message */`
			`else if (!strcmp(line, "HALP")) {`
			`version(revision);`
			`help();`
			`continue;`
			`}`
			`/* Create staged code file */`
			`code = malloc(sizeof(char) * (strlen(pre) + size + strlen(post) + 1));`
			`strcpy(code, pre);`
			`strncpy(code + strlen(pre), line, size);`
			`strcpy(code + strlen(pre) + size, post);`
			`code[strlen(pre) + size + strlen(post)] = '\0';`
			`pipeline(code, strlen(code), "interactive", scope);`
			`}`
			`free(buffer);`
			`deleteScopeObject(scope);`
			`exit(EXIT_SUCCESS);`
			`}`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`}`
Initial project import 2010-08-09 07:01:59 +00:00			`}`

Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`for (; optind < argc; optind++) {`
			`size = length = 0;`
			`buffer = fname = NULL;`
			`file = NULL;`

started adding new error infrastructure 2012-03-24 15:58:04 +00:00			`if (!strncmp(argv[optind],"-\0",2)) {`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`file = stdin;`
			`fname = "stdin";`
			`}`
			`else {`
			`file = fopen(argv[optind], "r");`
			`fname = argv[optind];`
			`}`

			`if (!file) {`
new error infrastructure added 2012-12-13 04:53:54 +00:00			`error(MN_ERROR_OPENING_FILE, argv[optind]);`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`return 1;`
			`}`

			`while (!feof(file)) {`
			`size += READSIZE;`
			`buffer = realloc(buffer, sizeof(char) * size);`
Fixed input EOF bug and updated main documentation. 2011-06-29 20:25:56 +00:00			`length += fread((buffer + size) - READSIZE,`
			`1,`
			`READSIZE,`
			`file);`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`}`

Merged SRS and de-linted code 2010-12-21 08:20:31 +00:00			`if (fclose(file) != 0) {`
new error infrastructure added 2012-12-13 04:53:54 +00:00			`error(MN_ERROR_CLOSING_FILE, argv[optind]);`
Merged SRS and de-linted code 2010-12-21 08:20:31 +00:00			`if (buffer) free(buffer);`
			`return 1;`
			`}`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`if (!buffer) return 1;`
			`buffer[length] = '\0';`

			`/* Remove hash bang line if run as a standalone script */`
			`if (buffer[0] == '#' && buffer[1] == '!') {`
			`unsigned int n;`
			`for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)`
			`buffer[n] = ' ';`
			`}`

Fixed input EOF bug and updated main documentation. 2011-06-29 20:25:56 +00:00			`/*`
			`* Remove UTF-8 BOM if present and add it to the output stream`
			`* (we assume here that if a BOM is present, the system will`
			`* also expect the output to include a BOM).`
			`*/`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00			`if (buffer[0] == (char)0xef`
			`\|\| buffer[1] == (char)0xbb`
			`\|\| buffer[2] == (char)0xbf) {`
			`buffer[0] = ' ';`
			`buffer[1] = ' ';`
			`buffer[2] = ' ';`
			`printf("%c%c%c", 0xef, 0xbb, 0xbf);`
			`}`

added experimental interactive mode 2013-03-10 21:23:53 +00:00			`return pipeline(buffer, length, fname, NULL);`
Fixed string literal parsing 2010-08-27 17:50:13 +00:00
Initial project import 2010-08-09 07:01:59 +00:00			`}`

			`return 0;`
			`}`