Fixed string literal parsing

2010-08-27 10:50:13 -07:00 · 2010-08-27 10:50:13 -07:00 · 3d68f2b31b
parent 2e14ac0b3f
commit 3d68f2b31b
2 changed files with 136 additions and 83 deletions
--- a/lexer.c
+++ b/lexer.c
@ -259,18 +259,31 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
 					&& *(start + len) != '\r'
 					&& *(start + len) != '\n'
 					&& *(start + len) != '"')
-					|| (*(start + len - 1) == ':'
-					&& *(start + len - 2) != '"'))
+					|| (*(start + len) == '"'
+					&& *(start + len - 1) == ':'
+					&& *(start + len - 2) != ':'))
 				len++;
 			if (*(start + len) == '"') len++;
+			/* Make sure this is the end of the token */
+			if (start[len] && !isspace(start[len])
+					&& *(start + len) != ','
+					&& *(start + len) != '!'
+					&& strncmp(start + len, "...", 3)
+					&& strncmp(start + len, "\xE2\x80\xA6", 3)) {
+				fprintf(stderr, "%s:%d: expected token delimiter after string literal\n", fname, line);
+				deleteLexemeList(list);
+				return NULL;
+			}
+		}
+		else {
+			/* Scan for the end of the token */
+			while (start[len] && !isspace(start[len])
+					&& *(start + len) != ','
+					&& *(start + len) != '!'
+					&& strncmp(start + len, "...", 3)
+					&& strncmp(start + len, "\xE2\x80\xA6", 3))
+				len++;
 		}
-		/* Scan for the end of the token */
-		while (start[len] && !isspace(start[len])
-				&& *(start + len) != ','
-				&& *(start + len) != '!'
-				&& strncmp(start + len, "...", 3)
-				&& strncmp(start + len, "\xE2\x80\xA6", 3))
-			len++;
 		temp = malloc(sizeof(char) * (len + 1));
 		if (!temp) {
 			perror("malloc");
--- a/main.c
+++ b/main.c
@ -97,6 +97,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <getopt.h>

 #include "lexer.h"
 #include "tokenizer.h"
@ -105,22 +106,25 @@

 #define READSIZE 512

-char *getFileArg(int argc, char **argv, char *vals) {
-	int n;
-	for (n = 1; n < argc; n++) {
-		if (argv[n][0] == '-' && argv[n][1]) {
-			int c;
-			for (c = 0; vals[c] != '\0'; c++) {
-				if (argv[n][1] == vals[c]) {
-					n++;
-					break;
-				}
-			}
-			continue;
-		}
-		else return argv[n];
-	}
-	return NULL;
+static char *program_name;
+
+static char *shortopt = "hv";
+static struct option longopt[] = {
+	{ "help", no_argument, NULL, 'h' },
+	{ "version", no_argument, NULL, 'v' },
+	{ 0, 0, 0, 0 }
+};
+
+static void help(void) {
+	fprintf(stderr, "\
+Usage: %s [FILE] ... \n\
+Interpret FILE(s) as LOLCODE. Let FILE be '-' for stdin.\n\
+  -h, --help\t\toutput this help\n\
+  -v, --version\t\tprogram version\n", program_name);
+}
+
+static void version (char *revision) {
+	fprintf(stderr, "%s %s\n", program_name, revision);
 }

 int main(int argc, char **argv)
@ -134,74 +138,110 @@ int main(int argc, char **argv)
 	MainNode *node = NULL;
 	char *fname = NULL;
 	FILE *file = NULL;
+	int ch;

-	fname = getFileArg(argc, argv, "");
-	if (fname == NULL || fname[0] == '-') {
-		fname = "stdin";
-		file = stdin;
-	}
-	else {
-		file = fopen(fname, "r");
-	}
-	if (!file) {
-		fprintf(stderr, "File does not exist.\n");
-		return 1;
-	}
-	while (!feof(file)) {
-		size += READSIZE;
-		buffer = realloc(buffer, sizeof(char) * size);
-		length += fread((buffer + size) - READSIZE, 1, READSIZE, file);
-	}
-	fclose(file);
-	if (!buffer) return 1;
-	buffer[length] = '\0';
+	char *revision = "v0.9.1";
+	program_name = argv[0];

-	/* Remove hash bang line if run as a standalone script */
-	if (buffer[0] == '#' && buffer[1] == '!') {
-		unsigned int n;
-		for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
-			buffer[n] = ' ';
+	while ((ch = getopt_long(argc, argv, shortopt, longopt, NULL)) != -1) {
+		switch (ch) {
+			default:
+				fprintf (stderr, "Incorrect option '%c'\n", ch);
+				help();
+				exit(EXIT_FAILURE);
+
+			case 'h':
+				help();
+				exit(EXIT_SUCCESS);
+				break;
+
+			case 'v':
+				version(revision);
+				exit(EXIT_SUCCESS);
+		}
 	}

-	/* Remove UTF-8 BOM if present and add it to the output stream (we
-	 * assume here that if a BOM is present, the system will also expect
-	 * the output to include a BOM). */
-	if (buffer[0] == (char)0xef
-			|| buffer[1] == (char)0xbb
-			|| buffer[2] == (char)0xbf) {
-		buffer[0] = ' ';
-		buffer[1] = ' ';
-		buffer[2] = ' ';
-		printf("%c%c%c", 0xef, 0xbb, 0xbf);
-	}
+	for (; optind < argc; optind++) {
+		size = length = 0;
+		buffer = fname = NULL;
+		lexemes = NULL;
+		tokens = NULL;
+		functab = NULL;
+		node = NULL;
+		file = NULL;

-	/* Begin main pipeline */
-	if (!(lexemes = scanBuffer(buffer, length, fname))) {
+		if (!strncmp (argv[optind],"-\0",2)) {
+			file = stdin;
+			fname = "stdin";
+		}
+		else {
+			file = fopen(argv[optind], "r");
+			fname = argv[optind];
+		}
+
+		if (!file) {
+			fprintf(stderr, "File does not exist.\n");
+			return 1;
+		}
+
+		while (!feof(file)) {
+			size += READSIZE;
+			buffer = realloc(buffer, sizeof(char) * size);
+			length += fread((buffer + size) - READSIZE, 1, READSIZE, file);
+		}
+
+		fclose(file);
+		if (!buffer) return 1;
+		buffer[length] = '\0';
+
+		/* Remove hash bang line if run as a standalone script */
+		if (buffer[0] == '#' && buffer[1] == '!') {
+			unsigned int n;
+			for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
+				buffer[n] = ' ';
+		}
+
+		/* Remove UTF-8 BOM if present and add it to the output stream (we
+		 * assume here that if a BOM is present, the system will also expect
+		 * the output to include a BOM). */
+		if (buffer[0] == (char)0xef
+				|| buffer[1] == (char)0xbb
+				|| buffer[2] == (char)0xbf) {
+			buffer[0] = ' ';
+			buffer[1] = ' ';
+			buffer[2] = ' ';
+			printf("%c%c%c", 0xef, 0xbb, 0xbf);
+		}
+
+		/* Begin main pipeline */
+		if (!(lexemes = scanBuffer(buffer, length, fname))) {
+			free(buffer);
+			return 1;
+		}
 		free(buffer);
-		return 1;
-	}
-	free(buffer);
-	if (!(tokens = tokenizeLexemes(lexemes))) {
+		if (!(tokens = tokenizeLexemes(lexemes))) {
+			deleteLexemeList(lexemes);
+			return 1;
+		}
 		deleteLexemeList(lexemes);
-		return 1;
-	}
-	deleteLexemeList(lexemes);
-	if (!(functab = setupFunctionTable(tokens))) {
-		deleteFunctionTable(functab);
-		return 1;
-	}
-	if (!(node = parseMainNode(tokens, functab))) {
-		deleteFunctionTable(functab);
+		if (!(functab = setupFunctionTable(tokens))) {
+			deleteFunctionTable(functab);
+			return 1;
+		}
+		if (!(node = parseMainNode(tokens, functab))) {
+			deleteFunctionTable(functab);
+			deleteTokens(tokens);
+			return 1;
+		}
 		deleteTokens(tokens);
-		return 1;
-	}
-	deleteTokens(tokens);
-	if (interpretMainNode(node)) {
+		if (interpretMainNode(node)) {
+			deleteMainNode(node);
+			return 1;
+		}
 		deleteMainNode(node);
-		return 1;
+		/* End main pipeline */
+
 	}
-	deleteMainNode(node);
-	/* End main pipeline */

 	return 0;
 }