codex/format.c

503 lines
9.8 KiB
C

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
/* input/output streams */
FILE *IN;
FILE *OUT;
/* util */
void
die(char *msg)
{
fprintf(stderr, "%s\n", msg);
exit(1);
}
/* header/footer */
void
write_file(char *input)
{
FILE *f = fopen(input, "r");
char c;
while ((c = fgetc(f)) != EOF)
fputc(c, OUT);
fclose(f);
}
/* remove dangerous characters */
void
emit_sanitized_char(char c)
{
switch (c) {
case '<':
fprintf(OUT, "&lt;");
break;
case '>':
fprintf(OUT, "&gt;");
break;
case '&':
fprintf(OUT, "&amp;");
break;
case '\'':
fprintf(OUT, "&#39;");
break;
case '\"':
fprintf(OUT, "&quot;");
break;
default:
fputc(c, OUT);
}
}
void
emit_sanitized_string(char *str)
{
while (*str != '\0') {
emit_sanitized_char(*str);
str++;
}
}
/* read n bytes from IN */
void
read_n_bytes(size_t n, char *buf)
{
size_t i;
for (i = 0; i < n; i++)
buf[i] = fgetc(IN);
buf[i] = '\0';
}
/* remove occurrences of c from the right side of buf */
void
rstrip(char *buf, char c)
{
size_t i = strlen(buf) - 1;
for(;;) {
if (buf[i] == c)
buf[i] = '\0';
else if (i <= 0 || buf[i] != c)
break;
i--;
}
}
/* headings */
int
heading_level(void)
{
int heading_level = 1;
while (fgetc(IN) == '*')
heading_level++;
return heading_level;
}
void
emit_heading(void)
{
int level = heading_level();
fprintf(OUT, "<h%d>", level);
char c;
while ((c = fgetc(IN)) != '\n')
emit_sanitized_char(c);
fprintf(OUT, "</h%d>", level);
/* We want the newline we used to break the loop to be picked up by the
* formatter again. */
fseek(IN, -1, SEEK_CUR);
}
/* links */
bool
external_link_p(void)
{
char buf[5] = {0};
fgets(buf, 5, IN);
fseek(IN, -4, SEEK_CUR);
return !strcmp(buf, "http");
}
bool
internal_implicit_link_p(void)
{
int count = 0;
bool ret = true;
while (1) {
char c = fgetc(IN);
if (c == EOF) {
die("End of file encountered while parsing link.");
}
else if (c == '}') {
break;
}
else if (c == ' ') {
ret = false;
break;
}
count--;
}
fseek(IN, count, SEEK_CUR);
return ret;
}
void
get_linkname(char *out)
{
char c;
while ((c = fgetc(IN)) != '}') {
if (c == EOF)
die("End of file encountered while parsing link name.");
else if (c == '\n')
c = ' ';
*(out++) = c;
}
*out = '\0';
}
void
get_link(char *out)
{
char c;
/* some links add the initial { for some reason */
if (fgetc(IN) != '{')
fseek(IN, -1L, SEEK_CUR);
while (((c = fgetc(IN)) != ' ') && c != '\n' && c != '}') {
if (c == EOF)
die("End of file encountered while parsing link.");
*(out)++ = c;
}
*out = '\0';
}
void
emit_external_link(void)
{
char link[1024];
get_link(link);
char linkname[1024];
get_linkname(linkname);
fprintf(OUT, "<a href=\"%s\" rel=\"external\">%s</a>", link, linkname);
}
void
emit_internal_implicit_link(void)
{
fseek(IN, -1, SEEK_CUR);
char link[1024];
char page[1024];
get_link(link);
memcpy(page, link, 1024);
for (size_t i = 0; link[i] != '\0'; i++)
if (link[i] >= 'A' && link[i] <= 'Z') link[i] = link[i] + 32;
for (size_t i = 0; page[i] != '\0'; i++)
if (page[i] == '-') page[i] = ' ';
fprintf(OUT, "<a href=\"%s.html\">%s</a>", link, page);
}
bool
explicit_link_extension(char *link)
{
while (*link != '\0') {
if (*link++ == '.')
return true;
}
return false;
}
void
emit_internal_explicit_link(void)
{
fseek(IN, -1, SEEK_CUR);
char page[1024];
get_link(page);
char linkname[1024];
get_linkname(linkname);
if (explicit_link_extension(page))
fprintf(OUT, "<a href=\"%s\">%s</a>", page, linkname);
else
fprintf(OUT, "<a href=\"%s.html\">%s</a>", page, linkname);
}
void
emit_link(void)
{
if (external_link_p())
emit_external_link();
else if (internal_implicit_link_p())
emit_internal_implicit_link();
else
emit_internal_explicit_link();
}
/* code blocks */
void
emit_code_block(char firstc)
{
char nextc = fgetc(IN);
fseek(IN, -1L, SEEK_CUR);
bool pre = firstc == '\n' && nextc =='\n';
/* if this code block is on a separate line, create a <pre>. otherwise, it
* is inlined in a paragraph and we want a <code>. */
if (pre)
fprintf(OUT, "<pre>");
else
fprintf(OUT, "<code>");
for (;;) {
char c = fgetc(IN);
if (c == '`') {
if (fgetc(IN) == '`')
break;
else
fseek(IN, -1L, SEEK_CUR);
}
if (c == EOF)
die("Unexpected EOF while converting code block.");
emit_sanitized_char(c);
}
if (pre)
fprintf(OUT, "</pre>");
else
fprintf(OUT, "</code>");
}
/* blockquotes */
void
emit_blockquote(void)
{
fprintf(OUT, "<blockquote>");
char c = fgetc(IN), lastc = 'a';
for(;;) {
if (lastc == '\n' && c != '>')
break;
else if (lastc == '>' && c == ' ')
;
else if (lastc == '\n' && c == '>')
fputc(' ', OUT);
else if (c != '\n')
emit_sanitized_char(c);
lastc = c;
c = fgetc(IN);
}
fseek(IN, -1L, SEEK_CUR);
fprintf(OUT, "</blockquote>\n");
}
void
emit_image(void)
{
char buf[80];
char *bufptr = buf;
for(;;) {
char c = fgetc(IN);
if (isspace(c))
break;
else if (c == EOF)
die("EOF encountered while parsing image filename.");
else
*(bufptr++) = c;
}
*bufptr = '\0';
fprintf(OUT, "<img src=\"../resources/img/%s\" />", buf);
fseek(IN, -1L, SEEK_CUR);
}
void
emit_table(void)
{
char buf[256] = {0};
char *bufptr = buf;
fprintf(OUT, "<table border=\"1\"><tbody>\n");
for(;;) {
read_n_bytes(4, buf);
if (!strcmp("===\n", buf)) {
fseek(IN, -1L, SEEK_CUR);
break;
}
else {
fseek(IN, -4L, SEEK_CUR);
memset(buf, '\0', 256);
fprintf(OUT, "<tr>\n");
char c;
while ((c = fgetc(IN)) != '\n') {
if (c == '|') {
rstrip(buf, ' ');
fprintf(OUT, "<td>");
emit_sanitized_string(buf);
fprintf(OUT, "</td>\n");
memset(buf, '\0', 256);
bufptr = buf;
}
else {
*(bufptr++) = c;
}
}
rstrip(buf, ' ');
fprintf(OUT, "<td>");
emit_sanitized_string(buf);
fprintf(OUT, "</td>\n");
fprintf(OUT, "</tr>\n");
bufptr = buf;
}
}
fprintf(OUT, "</tbody></table>\n");
}
/* main formatter */
void
format(void)
{
char c, lastc = '\n';
char *em_tags[] = {"<em>", "</em>"};
int in_em = 0; /* boolean used as index, therefore it's an int */
bool last_was_paragraph = false;
while ((c = fgetc(IN)) != EOF) {
if (c == '\\') { /* escaped character */
emit_sanitized_char(fgetc(IN));
}
else if (c == '*' && lastc == '\n') { /* heading */
emit_heading();
}
else if (c == '{') { /* link */
if (!last_was_paragraph && lastc == '\n')
fprintf(OUT, "\n<p>");
emit_link();
}
else if (c == '$') { /* maybe image */
char buf[5];
read_n_bytes(4, buf);
if (!strcmp(buf, "img ")) {
emit_image();
}
else {
fseek(IN, -4L, SEEK_CUR);
emit_sanitized_char(c);
}
}
else if (c == '_') {
fprintf(OUT, "%s", em_tags[in_em]);
in_em = !in_em;
}
else if (c == '`') { /* code block */
if (fgetc(IN) == '`')
emit_code_block(lastc);
else
fseek(IN, -1L, SEEK_CUR);
}
else if (c == '>' && lastc == '\n') { /* quote */
emit_blockquote();
}
else if (c == '=' && lastc == '\n') { /* maybe table */
char buf[4];
read_n_bytes(3, buf);
if (!strcmp(buf, "==\n")) {
emit_table();
}
else {
fseek(IN, -3L, SEEK_CUR);
emit_sanitized_char(c);
}
}
else if (lastc == '\n' && c == '\n') { /* new paragraph */
fprintf(OUT, "\n<p>");
last_was_paragraph = true;
lastc = c;
continue;
}
else if (c == '\n') { /* just a new line, same paragraph */
fputc(' ', OUT);
}
else {
emit_sanitized_char(c);
}
lastc = c;
last_was_paragraph = false;
}
}
void
basename(char *in, char *out, char delim)
{
for (size_t i = 0; in[i] != delim; i++)
out[i] = in[i];
}
/* entrypoint */
int
main(int argc, char *argv[])
{
if (argc < 2)
die("No filename provided, exiting.");
IN = fopen(argv[1], "r");
if (argc == 2) {
char bn[80] = {0};
basename(argv[1], bn, '.');
char out_name[85] = {0};
sprintf(out_name, "%s.html", bn);
OUT = fopen(out_name, "w");
}
else {
OUT = fopen(argv[2], "w");
}
write_file("resources/header.html");
format();
write_file("resources/footer.html");
fclose(IN);
fclose(OUT);
}