codex/format.c

409 lines
8.4 KiB
C

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
/* util */
void
die(char *msg)
{
fprintf(stderr, "%s\n", msg);
exit(1);
}
/* header/footer */
void
write_file(FILE *output, char *input)
{
FILE *f = fopen(input, "r");
char c;
while ((c = fgetc(f)) != EOF)
fputc(c, output);
fclose(f);
}
/* remove dangerous characters */
void
emit_sanitized_char(char c, FILE *output)
{
switch (c) {
case '<':
fprintf(output, "&lt;");
break;
case '>':
fprintf(output, "&gt;");
break;
case '&':
fprintf(output, "&amp;");
break;
case '\'':
fprintf(output, "&#39;");
break;
case '\"':
fprintf(output, "&quot;");
break;
default:
fputc(c, output);
}
}
/* headings */
int
heading_level(FILE *input)
{
int heading_level = 1;
while (fgetc(input) == '*')
heading_level++;
return heading_level;
}
void
emit_heading(FILE *input, FILE *output)
{
int level = heading_level(input);
fprintf(output, "<h%d>", level);
char c;
while ((c = fgetc(input)) != '\n')
emit_sanitized_char(c, output);
fprintf(output, "</h%d>", level);
/* We want the newline we used to break the loop to be picked up by the
* formatter again. */
fseek(input, -1, SEEK_CUR);
}
/* links */
bool
external_link_p(FILE *input)
{
char buf[5] = {0};
fgets(buf, 5, input);
fseek(input, -4, SEEK_CUR);
return !strcmp(buf, "http");
}
bool
internal_implicit_link_p(FILE *input)
{
int count = 0;
bool ret = true;
while (1) {
char c = fgetc(input);
if (c == EOF) {
die("End of file encountered while parsing link.");
}
else if (c == '}') {
break;
}
else if (c == ' ') {
ret = false;
break;
}
count--;
}
fseek(input, count, SEEK_CUR);
return ret;
}
void
get_linkname(FILE *input, char *out)
{
char c;
while ((c = fgetc(input)) != '}') {
if (c == EOF)
die("End of file encountered while parsing link name.");
else if (c == '\n')
c = ' ';
*(out++) = c;
}
*out = '\0';
}
void
get_link(FILE *input, char *out)
{
char c;
/* some links add the initial { for some reason */
if (fgetc(input) != '{')
fseek(input, -1L, SEEK_CUR);
while (((c = fgetc(input)) != ' ') && c != '\n' && c != '}') {
if (c == EOF)
die("End of file encountered while parsing link.");
*(out)++ = c;
}
*out = '\0';
}
void
emit_external_link(FILE *input, FILE *output)
{
char link[1024];
get_link(input, link);
char linkname[1024];
get_linkname(input, linkname);
fprintf(output, "<a href=\"%s\">[%s]</a>", link, linkname);
}
void
emit_internal_implicit_link(FILE *input, FILE *output)
{
fseek(input, -1, SEEK_CUR);
char link[1024];
char page[1024];
get_link(input, link);
memcpy(page, link, 1024);
for (size_t i = 0; link[i] != '\0'; i++)
if (link[i] >= 'A' && link[i] <= 'Z') link[i] = link[i] + 32;
for (size_t i = 0; page[i] != '\0'; i++)
if (page[i] == '-') page[i] = ' ';
fprintf(output, "<a href=\"%s.html\">{%s}</a>", link, page);
}
bool
explicit_link_extension(char *link)
{
while (*link != '\0') {
if (*link++ == '.')
return true;
}
return false;
}
void
emit_internal_explicit_link(FILE *input, FILE *output)
{
fseek(input, -1, SEEK_CUR);
char page[1024];
get_link(input, page);
char linkname[1024];
get_linkname(input, linkname);
if (explicit_link_extension(page))
fprintf(output, "<a href=\"%s\">{%s}</a>", page, linkname);
else
fprintf(output, "<a href=\"%s.html\">{%s}</a>", page, linkname);
}
void
emit_link(FILE *input, FILE *output)
{
if (external_link_p(input))
emit_external_link(input, output);
else if (internal_implicit_link_p(input))
emit_internal_implicit_link(input, output);
else
emit_internal_explicit_link(input, output);
}
/* code blocks */
void
emit_code_block(char firstc, FILE *input, FILE *output)
{
char nextc = fgetc(input);
fseek(input, -1L, SEEK_CUR);
bool pre = firstc == '\n' && nextc =='\n';
/* if this code block is on a separate line, create a <pre>. otherwise, it
* is inlined in a paragraph and we want a <code>. */
if (pre)
fprintf(output, "<pre>");
else
fprintf(output, "<code>");
for (;;) {
char c = fgetc(input);
if (c == '`') {
if (fgetc(input) == '`')
break;
else
fseek(input, -1L, SEEK_CUR);
}
if (c == EOF)
die("Unexpected EOF while converting code block.");
emit_sanitized_char(c, output);
}
if (pre)
fprintf(output, "</pre>");
else
fprintf(output, "</code>");
}
/* blockquotes */
void
emit_blockquote(FILE *input, FILE *output)
{
fprintf(output, "<blockquote>");
char c = fgetc(input), lastc = 'a';
for(;;) {
if (lastc == '\n' && c != '>')
break;
else if (lastc == '>' && c == ' ')
;
else if (lastc == '\n' && c == '>')
fputc(' ', output);
else if (c != '\n')
emit_sanitized_char(c, output);
lastc = c;
c = fgetc(input);
}
fseek(input, -1L, SEEK_CUR);
fprintf(output, "</blockquote>\n");
}
void
emit_image(FILE *input, FILE *output)
{
char buf[80];
char *bufptr = buf;
for(;;) {
char c = fgetc(input);
if (isspace(c))
break;
else if (c == EOF)
die("EOF encountered while parsing image filename.");
else
*(bufptr++) = c;
}
*bufptr = '\0';
fprintf(output, "<img src=\"../resources/img/%s\" />", buf);
fseek(input, -1L, SEEK_CUR);
}
/* main formatter */
void
format(FILE *input, FILE *output)
{
char c, lastc = '\n';
char *em_tags[] = {"<em>", "</em>"};
int in_em = 0; /* boolean used as index, therefore it's an int */
while ((c = fgetc(input)) != EOF) {
if (c == '\\') { /* escaped character */
emit_sanitized_char(fgetc(input), output);
}
else if (c == '*' && lastc == '\n') { /* heading */
emit_heading(input, output);
}
else if (c == '{') { /* link */
if (lastc == '\n')
fprintf(output, "<p>");
emit_link(input, output);
}
else if (c == '$') { /* maybe image */
char buf[5];
size_t i;
for (i = 0; i < 4; i++)
buf[i] = fgetc(input);
buf[i] = '\0';
if (!strcmp(buf, "img ")) {
emit_image(input, output);
}
else {
fseek(input, -4L, SEEK_CUR);
emit_sanitized_char(c, output);
}
}
else if (c == '_') {
fprintf(output, "%s", em_tags[in_em]);
in_em = !in_em;
}
else if (c == '`') { /* code block */
if (fgetc(input) == '`')
emit_code_block(lastc, input, output);
else
fseek(input, -1L, SEEK_CUR);
}
else if (c == '>' && lastc == '\n') { /* quote */
emit_blockquote(input, output);
}
else if (lastc == '\n' && c == '\n') { /* new paragraph */
fputc('\n', output);
fprintf(output, "<p>");
}
else if (c == '\n') { /* just a new line, same paragraph */
fputc(' ', output);
}
else {
emit_sanitized_char(c, output);
}
lastc = c;
}
}
void
basename(char *in, char *out, char delim)
{
for (size_t i = 0; in[i] != delim; i++)
out[i] = in[i];
}
/* entrypoint */
int
main(int argc, char *argv[])
{
if (!argc)
die("No filename provided, exiting.");
FILE *in = fopen(argv[1], "r");
FILE *out;
if (argc == 2) {
char bn[80] = {0};
basename(argv[1], bn, '.');
char out_name[85] = {0};
sprintf(out_name, "%s.html", bn);
out = fopen(out_name, "w");
}
else {
out = fopen(argv[2], "w");
}
write_file(out, "resources/header.html");
format(in, out);
write_file(out, "resources/footer.html");
fclose(in);
fclose(out);
}