Fixed string literal parsing

This commit is contained in:
Justin J. Meza 2010-08-27 10:50:13 -07:00
parent 2e14ac0b3f
commit 3d68f2b31b
2 changed files with 136 additions and 83 deletions

31
lexer.c
View File

@ -259,18 +259,31 @@ LexemeList *scanBuffer(const char *buffer, /**< [in] An array of characters to t
&& *(start + len) != '\r'
&& *(start + len) != '\n'
&& *(start + len) != '"')
|| (*(start + len - 1) == ':'
&& *(start + len - 2) != '"'))
|| (*(start + len) == '"'
&& *(start + len - 1) == ':'
&& *(start + len - 2) != ':'))
len++;
if (*(start + len) == '"') len++;
/* Make sure this is the end of the token */
if (start[len] && !isspace(start[len])
&& *(start + len) != ','
&& *(start + len) != '!'
&& strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3)) {
fprintf(stderr, "%s:%d: expected token delimiter after string literal\n", fname, line);
deleteLexemeList(list);
return NULL;
}
}
else {
/* Scan for the end of the token */
while (start[len] && !isspace(start[len])
&& *(start + len) != ','
&& *(start + len) != '!'
&& strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3))
len++;
}
/* Scan for the end of the token */
while (start[len] && !isspace(start[len])
&& *(start + len) != ','
&& *(start + len) != '!'
&& strncmp(start + len, "...", 3)
&& strncmp(start + len, "\xE2\x80\xA6", 3))
len++;
temp = malloc(sizeof(char) * (len + 1));
if (!temp) {
perror("malloc");

188
main.c
View File

@ -97,6 +97,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
#include "lexer.h"
#include "tokenizer.h"
@ -105,22 +106,25 @@
#define READSIZE 512
char *getFileArg(int argc, char **argv, char *vals) {
int n;
for (n = 1; n < argc; n++) {
if (argv[n][0] == '-' && argv[n][1]) {
int c;
for (c = 0; vals[c] != '\0'; c++) {
if (argv[n][1] == vals[c]) {
n++;
break;
}
}
continue;
}
else return argv[n];
}
return NULL;
static char *program_name;
static char *shortopt = "hv";
static struct option longopt[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'v' },
{ 0, 0, 0, 0 }
};
static void help(void) {
fprintf(stderr, "\
Usage: %s [FILE] ... \n\
Interpret FILE(s) as LOLCODE. Let FILE be '-' for stdin.\n\
-h, --help\t\toutput this help\n\
-v, --version\t\tprogram version\n", program_name);
}
static void version (char *revision) {
fprintf(stderr, "%s %s\n", program_name, revision);
}
int main(int argc, char **argv)
@ -134,74 +138,110 @@ int main(int argc, char **argv)
MainNode *node = NULL;
char *fname = NULL;
FILE *file = NULL;
int ch;
fname = getFileArg(argc, argv, "");
if (fname == NULL || fname[0] == '-') {
fname = "stdin";
file = stdin;
}
else {
file = fopen(fname, "r");
}
if (!file) {
fprintf(stderr, "File does not exist.\n");
return 1;
}
while (!feof(file)) {
size += READSIZE;
buffer = realloc(buffer, sizeof(char) * size);
length += fread((buffer + size) - READSIZE, 1, READSIZE, file);
}
fclose(file);
if (!buffer) return 1;
buffer[length] = '\0';
char *revision = "v0.9.1";
program_name = argv[0];
/* Remove hash bang line if run as a standalone script */
if (buffer[0] == '#' && buffer[1] == '!') {
unsigned int n;
for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
buffer[n] = ' ';
while ((ch = getopt_long(argc, argv, shortopt, longopt, NULL)) != -1) {
switch (ch) {
default:
fprintf (stderr, "Incorrect option '%c'\n", ch);
help();
exit(EXIT_FAILURE);
case 'h':
help();
exit(EXIT_SUCCESS);
break;
case 'v':
version(revision);
exit(EXIT_SUCCESS);
}
}
/* Remove UTF-8 BOM if present and add it to the output stream (we
* assume here that if a BOM is present, the system will also expect
* the output to include a BOM). */
if (buffer[0] == (char)0xef
|| buffer[1] == (char)0xbb
|| buffer[2] == (char)0xbf) {
buffer[0] = ' ';
buffer[1] = ' ';
buffer[2] = ' ';
printf("%c%c%c", 0xef, 0xbb, 0xbf);
}
for (; optind < argc; optind++) {
size = length = 0;
buffer = fname = NULL;
lexemes = NULL;
tokens = NULL;
functab = NULL;
node = NULL;
file = NULL;
/* Begin main pipeline */
if (!(lexemes = scanBuffer(buffer, length, fname))) {
if (!strncmp (argv[optind],"-\0",2)) {
file = stdin;
fname = "stdin";
}
else {
file = fopen(argv[optind], "r");
fname = argv[optind];
}
if (!file) {
fprintf(stderr, "File does not exist.\n");
return 1;
}
while (!feof(file)) {
size += READSIZE;
buffer = realloc(buffer, sizeof(char) * size);
length += fread((buffer + size) - READSIZE, 1, READSIZE, file);
}
fclose(file);
if (!buffer) return 1;
buffer[length] = '\0';
/* Remove hash bang line if run as a standalone script */
if (buffer[0] == '#' && buffer[1] == '!') {
unsigned int n;
for (n = 0; buffer[n] != '\n' && buffer[n] != '\r'; n++)
buffer[n] = ' ';
}
/* Remove UTF-8 BOM if present and add it to the output stream (we
* assume here that if a BOM is present, the system will also expect
* the output to include a BOM). */
if (buffer[0] == (char)0xef
|| buffer[1] == (char)0xbb
|| buffer[2] == (char)0xbf) {
buffer[0] = ' ';
buffer[1] = ' ';
buffer[2] = ' ';
printf("%c%c%c", 0xef, 0xbb, 0xbf);
}
/* Begin main pipeline */
if (!(lexemes = scanBuffer(buffer, length, fname))) {
free(buffer);
return 1;
}
free(buffer);
return 1;
}
free(buffer);
if (!(tokens = tokenizeLexemes(lexemes))) {
if (!(tokens = tokenizeLexemes(lexemes))) {
deleteLexemeList(lexemes);
return 1;
}
deleteLexemeList(lexemes);
return 1;
}
deleteLexemeList(lexemes);
if (!(functab = setupFunctionTable(tokens))) {
deleteFunctionTable(functab);
return 1;
}
if (!(node = parseMainNode(tokens, functab))) {
deleteFunctionTable(functab);
if (!(functab = setupFunctionTable(tokens))) {
deleteFunctionTable(functab);
return 1;
}
if (!(node = parseMainNode(tokens, functab))) {
deleteFunctionTable(functab);
deleteTokens(tokens);
return 1;
}
deleteTokens(tokens);
return 1;
}
deleteTokens(tokens);
if (interpretMainNode(node)) {
if (interpretMainNode(node)) {
deleteMainNode(node);
return 1;
}
deleteMainNode(node);
return 1;
/* End main pipeline */
}
deleteMainNode(node);
/* End main pipeline */
return 0;
}