This commit is contained in:
opFez 2021-07-30 16:55:37 +02:00
commit 937414d915
2 changed files with 251 additions and 0 deletions

5
Makefile Normal file
View File

@ -0,0 +1,5 @@
CC = clang
CFLAGS = -Wall -Wextra -Werror -std=c99 -pedantic-errors -g
parser: parser.c
$(CC) $(CFLAGS) $< -o $@

246
parser.c Normal file
View File

@ -0,0 +1,246 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
/* util */
void
die(char *msg)
{
fprintf(stderr, "%s\n", msg);
exit(1);
}
/* header/footer */
void
write_header(FILE *output)
{
fprintf(output,
"<!DOCTYPE html>\n"
"<html>\n"
"<body>\n"
);
}
void
write_footer(FILE *output)
{
fprintf(output,
"\n"
"</body>\n"
"</html>\n"
);
}
/* headings */
int
heading_level(FILE *input)
{
int heading_level = 1;
while (fgetc(input) == '*')
heading_level++;
return heading_level;
}
void
emit_heading(FILE *input, FILE *output)
{
int level = heading_level(input);
fprintf(output, "<h%d>", level);
char c;
while ((c = fgetc(input)) != '\n')
fputc(c, output);
fprintf(output, "</h%d>", level);
/* We want the newline we used to break the loop to be picked up by the
* formatter again. */
fseek(input, -1, SEEK_CUR);
}
/* links */
bool
external_link_p(FILE *input)
{
char buf[5] = {0};
fgets(buf, 5, input);
fseek(input, -4, SEEK_CUR);
return !strcmp(buf, "http");
}
bool
internal_implicit_link_p(FILE *input)
{
int count = 0;
bool ret = true;
while (1) {
char c = fgetc(input);
if (c == EOF) {
die("End of file encountered while parsing link.");
}
else if (c == '}') {
break;
}
else if (c == ' ') {
ret = false;
break;
}
count--;
}
fseek(input, count, SEEK_CUR);
return ret;
}
void
emit_external_link(FILE *input, FILE *output)
{
char link[1024] = {0};
char *link_ptr = link;
while ((*(link_ptr++) = fgetc(input)) != ' ');
*(link_ptr - 1) = '\0';
char linkname[1024] = {0};
char *linkname_ptr = linkname;
while ((*(linkname_ptr++) = fgetc(input)) != '}');
*(linkname_ptr - 1) = '\0';
fprintf(output, "<a href=\"%s\">[%s]</a>", link, linkname);
}
void
emit_internal_implicit_link(FILE *input, FILE *output)
{
fseek(input, -1, SEEK_CUR);
char page[1024] = {0};
char *page_ptr = page;
while ((*(page_ptr++) = fgetc(input)) != '}');
*(page_ptr - 1) = '\0';
fprintf(output, "<a href=\"%s.html\">{%s}</a>", page, page);
}
void
emit_internal_explicit_link(FILE *input, FILE *output)
{
fseek(input, -1, SEEK_CUR);
char page[1024] = {0};
char *page_ptr = page;
while ((*(page_ptr++) = fgetc(input)) != ' ');
*(page_ptr - 1) = '\0';
char linkname[1024] = {0};
char *linkname_ptr = linkname;
while ((*(linkname_ptr++) = fgetc(input)) != '}');
*(linkname_ptr - 1) = '\0';
fprintf(output, "<a href=\"%s.html\">{%s}</a>", page, linkname);
}
void
emit_link(FILE *input, FILE *output)
{
if (external_link_p(input))
emit_external_link(input, output);
else if (internal_implicit_link_p(input))
emit_internal_implicit_link(input, output);
else
emit_internal_explicit_link(input, output);
}
/* collapse paragraphs onto one line */
FILE *
collapse_paragraphs(FILE *input)
{
const char *filename = "/tmp/AAAA";
FILE *w = fopen(filename, "w");
char c, lastc = '\n';
while ((c = fgetc(input)) != EOF) {
if (!(lastc == '\n' && c != '\n'))
fputc(lastc, w);
lastc = c;
}
fputc(lastc, w);
fclose(w);
return fopen(filename, "r");
}
/* main formatter */
void
format(FILE *input, FILE *output)
{
char c, lastc = '\n';
while ((c = fgetc(input)) != EOF) {
if (c == '*' && lastc == '\n') { /* heading */
emit_heading(input, output);
}
else if (c == '{') { /* link */
if (lastc == '\n')
fprintf(output, "<p>");
emit_link(input, output);
}
else if (lastc == '\n') { /* new paragraph */
fprintf(output, "<p>");
fputc(c, output);
}
else {
fputc(c, output);
}
lastc = c;
}
}
void
basename(char *in, char *out, char delim)
{
for (size_t i = 0; in[i] != delim; i++)
out[i] = in[i];
}
/* entrypoint */
int
main(int argc, char *argv[])
{
if (!argc)
die("No filename provided, exiting.");
FILE *in_initial = fopen(argv[1], "r");
FILE *in_collapsed = collapse_paragraphs(in_initial);
char bn[80] = {0};
basename(argv[1], bn, '.');
char out_name[80] = {0};
sprintf(out_name, "%s.html", bn);
FILE *out = fopen(out_name, "w");
/* write_header(out); */
format(in_collapsed, out);
/* write_footer(out); */
fclose(in_initial);
fclose(out);
fclose(in_collapsed);
}