561 lines
11 KiB
Awk
Executable File
561 lines
11 KiB
Awk
Executable File
#!/bin/awk -f
|
|
#
|
|
# by: Josemar Lohn <j@lo.hn>
|
|
# lo.hn on www/gemini/gopher
|
|
#
|
|
# Based on md2html by Jesus Galan (yiyus) 2009
|
|
#
|
|
#
|
|
# Usage: md2gopher.awk file.md > file.txt
|
|
|
|
function eschtml(t) {
|
|
#gsub("&", "\\&", t);
|
|
#gsub("<", "\\<", t);
|
|
return t;
|
|
}
|
|
|
|
function oprint(t){
|
|
if(nr == 0)
|
|
print t;
|
|
else
|
|
otext = otext "\n" t;
|
|
}
|
|
|
|
# https://unix.stackexchange.com/a/94751
|
|
function centralize(text, border , margin){
|
|
L = col - border - margin - length(text);
|
|
#print("====" length(t) "=====" t "=====\n")
|
|
for(i=1; i<=(int(L/2) + margin) ; i++)
|
|
text = " "text;
|
|
for(i=1; i<=int(L/2+.5) ; i++)
|
|
text = text " "
|
|
return text
|
|
}
|
|
|
|
|
|
# function from https://unix.stackexchange.com/a/282338
|
|
function justify(text, margin, j, nbchar, wreturn, spcpf, spaces, nbspc){
|
|
$0=text
|
|
if (NF <= 1) { return text }
|
|
else {
|
|
nbchar = 0
|
|
for (i = 1; i <= NF; i++) {
|
|
nbchar += length($i)
|
|
}
|
|
nbspc = (col - margin) - nbchar - 1
|
|
spcpf = int(nbspc / (NF - 1))
|
|
for (i = 1; i < NF; i++) {
|
|
wreturn = wreturn $i
|
|
spaces = (NF == 2 || i == NF - 1) ? nbspc : spcpf
|
|
if (spaces < 1) spaces = 1
|
|
for (j = 0; j < spaces; j++) {
|
|
wreturn = wreturn " "
|
|
}
|
|
nbspc -= spaces
|
|
}
|
|
wreturn = wreturn $NF
|
|
}
|
|
return wreturn
|
|
}
|
|
|
|
function subref(id){
|
|
for(; nr > 0 && sub("<<" id, ref[id], otext); nr--);
|
|
if(nr == 0 && otext) {
|
|
print otext;
|
|
otext = "";
|
|
}
|
|
}
|
|
|
|
function nextil(t) {
|
|
if(!match(t, /[`<&\[*_\\-]|(!\[)|(\[\^)/))
|
|
return t;
|
|
t1 = substr(t, 1, RSTART - 1);
|
|
tag = substr(t, RSTART, RLENGTH);
|
|
t2 = substr(t, RSTART + RLENGTH);
|
|
if(ilcode && tag != "`")
|
|
return eschtml(t1 tag) nextil(t2);
|
|
# Backslash escaping
|
|
if(tag == "\\"){
|
|
if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){
|
|
tag = substr(t2, 1, 1);
|
|
t2 = substr(t2, 2);
|
|
}
|
|
return t1 tag nextil(t2);
|
|
}
|
|
# Dashes
|
|
if(tag == "-"){
|
|
if(sub(/^-/, "", t2))
|
|
tag = "—";
|
|
return t1 tag nextil(t2);
|
|
}
|
|
# Inline Code
|
|
if(tag == "`"){
|
|
if(sub(/^`/, "", t2)){
|
|
if(!match(t2, /``/))
|
|
return t1 "”" nextil(t2);
|
|
ilcode2 = !ilcode2;
|
|
}
|
|
else if(ilcode2)
|
|
return t1 tag nextil(t2);
|
|
tag = "<code>";
|
|
if(ilcode){
|
|
t1 = eschtml(t1);
|
|
tag = "</code>";
|
|
}
|
|
ilcode = !ilcode;
|
|
return t1 tag nextil(t2);
|
|
return t1 tag nextil(t2);
|
|
}
|
|
if(tag == "<"){
|
|
# Autolinks
|
|
if(match(t2, /^[^ ]+[\.@][^ ]+>/)){
|
|
url = eschtml(substr(t2, 1, RLENGTH - 1));
|
|
t2 = substr(t2, RLENGTH + 1);
|
|
linktext = url;
|
|
if(match(url, /@/) && !match(url, /^mailto:/))
|
|
url = "mailto:" url;
|
|
return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2);
|
|
}
|
|
# Html tags
|
|
if(match(t2, /^[A-Za-z\/!][^>]*>/)){
|
|
tag = tag substr(t2, RSTART, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 1);
|
|
return t1 tag nextil(t2);
|
|
}
|
|
return t1 "<" nextil(t2);
|
|
}
|
|
# Html special entities
|
|
if(tag == "&"){
|
|
if(match(t2, /^#?[A-Za-z0-9]+;/)){
|
|
tag = tag substr(t2, RSTART, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 1);
|
|
return t1 tag nextil(t2);
|
|
}
|
|
return t1 "&" nextil(t2);
|
|
}
|
|
# Images
|
|
if(tag == "!["){
|
|
if(!match(t2, /(\[.*\])|(\(.*\))/))
|
|
return t1 tag nextil(t2);
|
|
match(t2, /^[^\]]*/);
|
|
alt = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(match(t2, /^\(/)){
|
|
# Inline
|
|
sub(/^\(/, "", t2);
|
|
match(t2, /^[^\)]+/);
|
|
url = eschtml(substr(t2, 1, RLENGTH));
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
title = "";
|
|
if(match(url, /[ ]+".*"[ ]*$/)) {
|
|
title = substr(url, RSTART, RLENGTH);
|
|
url = substr(url, 1, RSTART - 1);
|
|
match(title, /".*"/);
|
|
title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
|
|
}
|
|
if(match(url, /^<.*>$/))
|
|
url = substr(url, 2, RLENGTH - 2);
|
|
return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2);
|
|
}
|
|
else{
|
|
# Referenced
|
|
sub(/^ ?\[/, "", t2);
|
|
id = alt;
|
|
if(match(t2, /^[^\]]+/))
|
|
id = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(ref[id])
|
|
r = ref[id];
|
|
else{
|
|
r = "<<" id;
|
|
nr++;
|
|
}
|
|
return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2);
|
|
}
|
|
}
|
|
# Footnotes
|
|
if(tag == "[^"){
|
|
match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/);
|
|
linktext = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
return t1 "<sup class=\"fnref\"><a href=\"#fn-" linktext "\" id=\"fnref-" linktext "\">" linktext "</a></sup>" nextil(t2);
|
|
}
|
|
# Links
|
|
if(tag == "["){
|
|
if(!match(t2, /(\[.*\])|(\(.*\))/))
|
|
return t1 tag nextil(t2);
|
|
match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/);
|
|
linktext = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(match(t2, /^\(/)){
|
|
# Inline
|
|
match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/);
|
|
url = substr(t2, 2, RLENGTH - 1);
|
|
pt2 = substr(t2, RLENGTH + 2);
|
|
title = "";
|
|
if(match(url, /[ ]+".*"[ ]*$/)) {
|
|
title = substr(url, RSTART, RLENGTH);
|
|
url = substr(url, 1, RSTART - 1);
|
|
match(title, /".*"/);
|
|
title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
|
|
}
|
|
if(match(url, /^<.*>$/))
|
|
url = substr(url, 2, RLENGTH - 2);
|
|
url = eschtml(url);
|
|
return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2);
|
|
}
|
|
else{
|
|
# Referenced
|
|
sub(/^ ?\[/, "", t2);
|
|
id = linktext;
|
|
if(match(t2, /^[^\]]+/))
|
|
id = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(ref[id])
|
|
r = ref[id];
|
|
else{
|
|
r = "<<" id;
|
|
nr++;
|
|
}
|
|
pt2 = t2;
|
|
return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2);
|
|
}
|
|
}
|
|
# Emphasis
|
|
if(match(tag, /[*_]/)){
|
|
ntag = tag;
|
|
if(sub("^" tag, "", t2)){
|
|
if(stag[ns] == tag && match(t2, "^" tag))
|
|
t2 = tag t2;
|
|
else
|
|
ntag = tag tag
|
|
}
|
|
n = length(ntag);
|
|
tag = (n == 2) ? "strong" : "em";
|
|
if(match(t1, / $/) && match(t2, /^ /))
|
|
return t1 tag nextil(t2);
|
|
if(stag[ns] == ntag){
|
|
tag = "/" tag;
|
|
ns--;
|
|
}
|
|
else
|
|
stag[++ns] = ntag;
|
|
tag = "<" tag ">";
|
|
#return t1 tag nextil(t2);
|
|
return t1 nextil(t2);
|
|
}
|
|
}
|
|
|
|
function inline(t) {
|
|
ilcode = 0;
|
|
ilcode2 = 0;
|
|
ns = 0;
|
|
|
|
return nextil(t);
|
|
}
|
|
|
|
#https://unix.stackexchange.com/a/337656
|
|
function wrap(t,align) {
|
|
final=""
|
|
z=""
|
|
y=0
|
|
margin_spaces=""
|
|
localmargin = blockquote * 5
|
|
for(c = 0; c < localmargin; c++)
|
|
margin_spaces = " " margin_spaces
|
|
while (t)
|
|
{
|
|
q = match(t, / |$/); y += q
|
|
if (y > col - localmargin) {
|
|
#print "|"localmargin"|"blockquote"|"nnl"|"
|
|
if (align != 0)
|
|
{
|
|
if (align=="c")
|
|
final = final margin_spaces centralize(z,localmargin) RS
|
|
if (align=="j")
|
|
final = final margin_spaces justify(z,localmargin) RS
|
|
}
|
|
else
|
|
{
|
|
final = final z RS
|
|
}
|
|
y = q - 1
|
|
z = ""
|
|
}
|
|
else if (z) z = z FS
|
|
z = z substr(t, 1, q - 1)
|
|
t = substr(t, q + 1)
|
|
}
|
|
if (align=="c")
|
|
{
|
|
final = final margin_spaces centralize(z, localmargin)
|
|
}
|
|
else {
|
|
final = final margin_spaces z
|
|
}
|
|
return final
|
|
}
|
|
|
|
function printp(tag) {
|
|
if(!match(text, /^[ ]*$/)){
|
|
text = inline(text);
|
|
if(tag == "p")
|
|
{
|
|
oprint(wrap(text,"j"))
|
|
}
|
|
else
|
|
{
|
|
oprint(text);
|
|
}
|
|
}
|
|
text = "";
|
|
}
|
|
|
|
BEGIN {
|
|
blank = 0;
|
|
code = 0;
|
|
hr = 0;
|
|
html = 0;
|
|
nl = 0;
|
|
nr = 0;
|
|
margin = 0;
|
|
otext = "";
|
|
text = "";
|
|
par = "p";
|
|
col=70;
|
|
listitem=0;
|
|
c=0; do { lineheader = "=" lineheader; c++ } while ( c < col )
|
|
c=0; do { lineheadersmall = "-" lineheadersmall; c++ } while ( c < col )
|
|
}
|
|
|
|
# References
|
|
!code && /^ *\[\^![^\]]*\]:[ ]+/ {
|
|
sub(/^ *\[\^!/, "");
|
|
match($0, /\]/);
|
|
id = substr($0, 1, RSTART - 1);
|
|
sub(id "\\]:[ ]+", "");
|
|
title = "";
|
|
if(match($0, /".*"$/))
|
|
title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2);
|
|
sub(/[ ]+".*"$/, "");
|
|
url = eschtml($0);
|
|
ref[id] = url title;
|
|
|
|
subref(id);
|
|
next;
|
|
}
|
|
|
|
!code && /^ *\[\^[^\]]*\]:[ ]+/ {
|
|
sub(/^ *\[\^/, "");
|
|
match($0, /\]/);
|
|
id = substr($0, 1, RSTART - 1);
|
|
sub(id "\\]:[ ]+", "");
|
|
sub(/[ ]+".*"$/, "");
|
|
url = eschtml($0);
|
|
fnref[id] = url;
|
|
|
|
subref(id);
|
|
next;
|
|
}
|
|
|
|
# List and quote blocks
|
|
|
|
# Remove indentation
|
|
{
|
|
for(nnl = 0; nnl < nl; nnl++)
|
|
if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \
|
|
(block[nnl + 1] == "blockquote" && !sub(/^> ?/, "")))
|
|
break;
|
|
}
|
|
nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; }
|
|
# Quote blocks
|
|
{
|
|
while(sub(/^> /, ""))
|
|
nblock[++nnl] = "blockquote";
|
|
blockquote = nnl;
|
|
}
|
|
# Horizontal rules
|
|
{ hr = 0; }
|
|
(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ {
|
|
if(code){
|
|
#oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
blank = 0;
|
|
nnl = 0;
|
|
hr = 1;
|
|
}
|
|
# List items
|
|
block[nl] ~ /[ou]l/ && /^$/ {
|
|
blank = 1;
|
|
next;
|
|
}
|
|
{ newli = 0; }
|
|
!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ {
|
|
sub(/^ ? ? ?[*+-]( +| )/, "");
|
|
nnl++;
|
|
nblock[nnl] = "ul";
|
|
listtype="ul";
|
|
newli = 1;
|
|
}
|
|
(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ {
|
|
sub(/^ ? ? ?([0-9]+\.)+( +| )/, "");
|
|
nnl++;
|
|
nblock[nnl] = "ol";
|
|
listtype="ol";
|
|
listitem++
|
|
newli = 1;
|
|
}
|
|
newli {
|
|
if(blank && nnl == nl && !par)
|
|
par = "p";
|
|
blank = 0;
|
|
printp(par);
|
|
if(nnl == nl && block[nl] == nblock[nl])
|
|
#oprint("</li><li>");
|
|
if(listtype == "ul")
|
|
printf "- ";
|
|
if(listtype == "ol")
|
|
printf "%s - ", listitem;
|
|
|
|
}
|
|
blank && ! /^$/ {
|
|
if(match(block[nnl], /[ou]l/) && !par)
|
|
par = "p";
|
|
printp(par);
|
|
par = "p";
|
|
blank = 0;
|
|
}
|
|
|
|
# Close old blocks and open new ones
|
|
nnl != nl || nblock[nl] != block[nl] {
|
|
if(code){
|
|
#oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
printp(par);
|
|
b = (nnl > nl) ? nblock[nnl] : block[nnl];
|
|
par = (match(b, /[ou]l/)) ? "" : "p";
|
|
|
|
|
|
}
|
|
nnl < nl || (nnl == nl && nblock[nl] != block[nl]) {
|
|
for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){
|
|
if(match(block[nl], /[ou]l/))
|
|
#oprint("</li>");
|
|
printf ""
|
|
#oprint("</" block[nl] ">");
|
|
#if(listtype == "ol")
|
|
# listitem = 0;
|
|
}
|
|
}
|
|
nnl > nl {
|
|
for(; nl < nnl; nl++){
|
|
block[nl + 1] = nblock[nl + 1];
|
|
#oprint("<" block[nl + 1] ">");
|
|
if(match(block[nl + 1], /[ou]l/))
|
|
#oprint("<li>");
|
|
if(listtype == "ul")
|
|
printf "- ";
|
|
#if(listtype == "ol")
|
|
#printf "%s - ", listitem;
|
|
#listitem=0
|
|
}
|
|
}
|
|
hr {
|
|
oprint(lineheader);
|
|
next;
|
|
}
|
|
|
|
# Code blocks
|
|
code && /^$/ {
|
|
if(blanK)
|
|
oprint("");
|
|
blank = 1;
|
|
next;
|
|
}
|
|
|
|
!text && sub(/^( | )/, "") {
|
|
if(blanK)
|
|
oprint("");
|
|
blank = 0;
|
|
#if(!code)
|
|
# oprint("<code><pre>");
|
|
code = 1;
|
|
$0 = eschtml($0);
|
|
oprint($0);
|
|
next;
|
|
}
|
|
code {
|
|
#oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
|
|
# Setex-style Headers
|
|
text && /^=+$/ {printp("h1"); next;}
|
|
text && /^-+$/ {printp("h2"); next;}
|
|
|
|
# Atx-Style headers
|
|
/^#+/ && (!newli || par=="p" || /^##/) {
|
|
for(n = 0; n < 6 && sub(/^# */, ""); n++)
|
|
{
|
|
sub(/#$/, "");
|
|
}
|
|
par = "h" n;
|
|
if (n == 1) {
|
|
oprint( text lineheader "\n=" centralize($0,2) "=\n" lineheader "\n" )
|
|
next;
|
|
}
|
|
if (n == 2) {
|
|
oprint("\n" text wrap($0,"c") "\n" lineheader "\n")
|
|
next;
|
|
}
|
|
if (n == 3) {
|
|
oprint("\n" text centralize($0) "\n" lineheadersmall "\n")
|
|
next;
|
|
}
|
|
if (n > 3) {
|
|
text = text centralize($0) "\n"
|
|
next;
|
|
}
|
|
}
|
|
|
|
# Paragraph
|
|
/^$/ {
|
|
printp(par);
|
|
par = "p";
|
|
next;
|
|
}
|
|
|
|
# Add text
|
|
{ text = (text ? text " " : "") $0; }
|
|
|
|
function alen(a, ix, k) {
|
|
k = 0
|
|
for(ix in a) k++
|
|
return k
|
|
}
|
|
|
|
END {
|
|
if(code){
|
|
# oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
#printp(par);
|
|
#for(; nl > 0; nl--){
|
|
# if(match(block[nl], /[ou]l/))
|
|
# listitem=0
|
|
#oprint("</li>");
|
|
# oprint("</" block[nl] ">");
|
|
#}
|
|
gsub(/<<[^"]*/, "", otext);
|
|
print(otext);
|
|
|
|
# Print footnotes
|
|
if(alen(fnref)>0) {
|
|
print "<ul class=\"fn-list\">";
|
|
for (i in fnref) print "<li id=\"fn-" i "\" class=\"fn-item\"><span class=\"fn-handle\">" i ": </span><span class=\"fn-text\">" inline(fnref[i]) " <a href=\"#fnref-" i "\" class=\"fn-backref\">↩︎</a></span></li>";
|
|
print "</ul>";
|
|
}
|
|
}
|