(#6) Start refactoring header logic

REGRESSION: Headers and functions are currently in a BROKEN state, but at least the code base is a little nicer right? :)
2021-02-06 16:18:03 +11:00 · 2021-02-06 16:18:03 +11:00 · 8fbbc8e192
parent 14910e7967
commit 8fbbc8e192
5 changed files with 154 additions and 98 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 a.out
 shmd
 *.swp
+tags
--- a/examples/index.shmd
+++ b/examples/index.shmd
@ -0,0 +1,12 @@
+# Index
+
+Files in examples/
+
+$(
+    for f in examples/*; do
+        echo "* [$(basename $f)]($f)"
+    done;
+)
+
+Pretty cool :)
+
--- a/examples/tutorial.md
+++ b/examples/tutorial.md
@ -0,0 +1,12 @@
+/*
+ * title "SHMD Tutorial"
+ * author "Dylan Lom"
+ * func1() 'echo "hello!"'
+ */
+
+# $(echo "$title")
+
+Embed shell commands with `\$(echo "hi!")`
+
+You can write functions: $(func1 "$(whoami)")
+
--- a/shmd.c
+++ b/shmd.c
@ -2,119 +2,134 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>

 #include "shmd.h"

-const char* argv0;
-/*
- * Not a great name, but this is used for variables that we want avaiable to the
- * shell when we start, ie. the header stuff.
- */
-char* sh_prefix;
+const char *argv0;
+char *sh_prefix;

-struct str_list header_split(char* s) {
-    struct str_list l = STR_LIST_INIT();
-    size_t word_size = 100;
-    char* word = STR_EALLOC(word_size);
+char *header_process_extract_field(FILE *fp)
+{
+    char c;
+    size_t dest_size = 100;
+    char *dest = STR_EALLOC(dest_size);

-    /* Keep track of whether we're inside of a quoted or escaped string */
-    int in_dquote = 0, in_squote = 0, in_escape = 0;
-
-    for ( ; *s != '\0'; s++) {
-        if (*s == ' ' && !in_squote && !in_dquote) {
-            str_list_add(&l, word);
-            word = STR_EALLOC(word_size); /* Allocate memory for next word */
-            continue;
+    bool is_escaped = false,
+         is_dquoted = false,
+         is_squoted = false;
+    while (
+        (c = fgetc(fp)) != '\0' && !(
+            !is_dquoted
+            && !is_squoted
+            && !is_escaped
+            && (c == ' ' || c == '\n')
+        )
+    ) {
+        if (!is_escaped) {
+            if (c == '"' && !is_squoted) {
+                is_dquoted = !is_dquoted;
+                continue;
+            } else if (c == '\'' && !is_dquoted) {
+                is_squoted = !is_squoted;
+                continue;
+            } else if (c == '\\') {
+                is_escaped = true;
+                continue;
+            }
        }
-        else if (*s == '\\' && !in_escape) { in_escape = 1; continue; }
-        else if (*s == '"' && !in_escape && !in_squote) { in_dquote ^= 1; continue; }
-        else if (*s == '\'' && !in_escape && !in_dquote) { in_squote ^= 1; continue; }
-
-        word_size = str_pushc(word, *s, word_size, 100);
-        /* It's just simpler to reset this after every iteration */
-        in_escape = 0;
+        dest_size = str_pushc(dest, c, dest_size, 50);
    }
-
-    if (strlen(word) > 0) str_list_add(&l, word);
-    return l;
+    return dest;
 }

-#define HTML_SPRINTF(l_min, fmt, ...) if (l.size < l_min) return ""; \
-    html = STR_EALLOC(sizeof(fmt) + vals_size + 1); \
-    sprintf(html, fmt, __VA_ARGS__)
-char* header_list_process(struct str_list l) {
-    if (l.size < 1) return "";
-    char** vals = l.values;
-    /* work out the total length of all strings */
-    size_t vals_size = 0;
-    for (int i = 0; i < l.size; i++)
-        vals_size += strlen(vals[i]);
-
-    char* html;
-    /*
-     * TODO: This is gross... I did it like this to make the if ... else
-     * statement readable... need a better abstraction though
-     */
-    /* Known HEAD tags */
-    if (strcmp(vals[0], "charset") == 0) {
-        HTML_SPRINTF(2, "<meta charset=\"%s\">", vals[1]);
-    } else if (strcmp(vals[0], "title") == 0) {
-        HTML_SPRINTF(2, "<title>%s</title>", vals[1]);
-    } else if (strcmp(vals[0], "link") == 0) {
-        HTML_SPRINTF(3, "<link rel=\"%s\" href=\"%s\">", vals[1], vals[2]);
-    } else {
-        HTML_SPRINTF(2, "<meta name=\"%s\" content=\"%s\">", vals[0], vals[1]);
+size_t header_field_count(enum header_field_type type)
+{
+    size_t count = 0;
+    switch (type) {
+    case E_HEADER_FIELD_CHARSET:
+        count = 1;
+        break;
+    case E_HEADER_FIELD_TITLE:
+        count = 1;
+        break;
+    case E_HEADER_FIELD_LINK:
+        count = 2;
+        break;
+    case E_HEADER_FIELD_META:
+        count = 1;
+        break;
    }
-
-    /* Add to sh_prefix so that values are available in the shell environment */
-    char* tmp = sh_prefix;
-    size_t val0_length = strlen(vals[0]) - 1;
-    if (vals[0][val0_length-1] == '(' && vals[0][val0_length] == ')') {
-        /* Function */
-        sh_prefix = str_concat(5, sh_prefix, vals[0], " { ", vals[l.size-1], "; }; ");
-    } else {
-        /* Variable */
-        sh_prefix = str_concat(5, sh_prefix, vals[0], "=\"", vals[l.size-1], "\"; ");
-    }
-    free(tmp);
-
-    return html;
+    return count;
 }
-#undef HTML_SPRINTF

-#define HEADER_ISEND(s) (strlen(s) >= 2 && s[0] == '*' && s[1] == '/')
+char **header_process_fields(FILE *fp, enum header_field_type type)
+{
+    size_t values_count = header_field_count(type);
+    char **values = ecalloc(values_count, sizeof(char*));
+
+    for (int i = 0; i < values_count; i++) {
+        values[i] = header_process_extract_field(fp);
+    }
+
+    return values;
+}
+
+/* Process a header record */
+char *header_process(FILE *fp)
+{
+    size_t name_size = 100;
+    char* name = STR_EALLOC(name_size);
+    char c;
+    while ((c = fgetc(fp)) != '\0' && c != ' ') {
+        name_size = str_pushc(name, c, name_size, 100);
+    }
+
+    enum header_field_type type;
+    if (strcmp(name, "charset") == 0)
+        type = E_HEADER_FIELD_CHARSET;
+    else if (strcmp(name, "title") == 0)
+        type = E_HEADER_FIELD_TITLE;
+    else if (strcmp(name, "link") == 0)
+        type = E_HEADER_FIELD_LINK;
+    else
+        type = E_HEADER_FIELD_META;
+
+    char **values = header_process_fields(fp, type);
+    size_t values_count = header_field_count(type);
+
+    return name;
+}
+
+#define HEADER_ISEND(b, c) (b == '*' && c == '/')
 char* header_substitute(FILE* fp) {
    char* result = str_concat(1, "<head>");

-    size_t line_size = 250;
-    char* line = STR_EALLOC(line_size);
-    char* lp;
-    char* tmp;
-    while (fgets(line, line_size, fp) != NULL) {
-        str_trimr(line, '\n', 1);
-        lp = line;
+    char b = '\0';
+    char c;
+    while ((c = fgetc(fp)) != '\0') {
        /*
         * Non-alphabetical characters (e.g. whitespace, '*') in the header are
         * ignored, advance lp to the start of input, while also making sure we
         * don't reach the end of the line ('\0') or the end of the header
         * section ('* /' without the space).
         */
-        while (!isalpha(*lp) && *lp != '\0' && !HEADER_ISEND(lp)) { lp++; }
-        if (HEADER_ISEND(lp)) break;
-        if (*lp == '\0') continue;
+        while (
+            !isalpha(c)
+            && c != '"'
+            && c != '\''
+            && c != '\0'
+            && !HEADER_ISEND(b, c)
+        ) {
+            b = c;
+            c = fgetc(fp);
+        }
+        if (c == '\0') continue;
+        if (HEADER_ISEND(b, c)) break;

-        struct str_list list = header_split(lp);
-        char* line_html = header_list_process(list);
-        str_list_free(&list);
-
-        /* concat html to result */
-        tmp = result;
-        result = str_concat(3, result, "\n\t", line_html);
-        free(tmp);
+        ungetc(c, fp);
+        header_process(fp);
    }
-    tmp = result;
-    result = str_concat(3, result, "\n", "</head>");
-    free(tmp);
    return result;
 }
 #undef HEADER_ISEND
--- a/shmd.h
+++ b/shmd.h
@ -1,14 +1,30 @@
 #include "util.h"

+
+
 /**
 * #argv0
 * value of argv[0] at the start of execution (ie. the program's name).
 */
-const char* argv0;
+const char *argv0;
+char *sh_prefix;
+
+enum header_field_type
+{
+    E_HEADER_FIELD_CHARSET,
+    E_HEADER_FIELD_TITLE,
+    E_HEADER_FIELD_LINK,
+    E_HEADER_FIELD_META
+};
+/* Read the next field from @fp */
+char *header_process_extract_field(FILE *fp);
+/* Get the number of properties for header type @type */
+size_t header_field_count(enum header_field_type type);
+/* Read space-seperated fields from @fp */
+char **header_process_fields(FILE *fp, enum header_field_type type);
+char *header_process(FILE *fp);
+char *header_substitute(FILE *fp);

-struct str_list header_split        (char* s);
-char*           header_list_process (struct str_list l);
-char*           header_substitute   (FILE* fp);

 /**
 * #command_execute
@ -17,7 +33,7 @@ char*           header_substitute   (FILE* fp);
 * @return:  stdout returned when running @command, with at most one trailing
 *           newline stripped.
 */
-char*           command_execute     (const char* command);
+char *command_execute(const char *command);
 /**
 * #command_substitute
 * Read @fp until the next unmatched `)` character and execute. This process is
@ -26,7 +42,7 @@ char*           command_execute     (const char* command);
 * @fp:     file pointer to read from.
 * @return: the output of running #execute_command on the input read.
 */
-char*           command_substitute  (FILE* fp);
+char *command_substitute  (FILE *fp);

-int             process_input       (FILE* fp);
-int             main                (int argc, char* argv[]);
+int process_input(FILE *fp);
+int main(int argc, char *argv[]);