#!/bin/awk -f # # by: Josemar Lohn # lo.hn on www/gemini/gopher # # Based on md2html by Jesus Galan (yiyus) 2009 # # # Usage: md2gopher.awk file.md > file.txt function eschtml(t) { #gsub("&", "\\&", t); #gsub("<", "\\<", t); return t; } function oprint(t){ if(nr == 0) print t; else otext = otext "\n" t; } # https://unix.stackexchange.com/a/94751 function centralize(text, border , margin){ L = col - border - margin - length(text); #print("====" length(t) "=====" t "=====\n") for(i=1; i<=(int(L/2) + margin) ; i++) text = " "text; for(i=1; i<=int(L/2+.5) ; i++) text = text " " return text } # function from https://unix.stackexchange.com/a/282338 function justify(text, margin, j, nbchar, wreturn, spcpf, spaces, nbspc){ $0=text if (NF <= 1) { return text } else { nbchar = 0 for (i = 1; i <= NF; i++) { nbchar += length($i) } nbspc = (col - margin) - nbchar - 1 spcpf = int(nbspc / (NF - 1)) for (i = 1; i < NF; i++) { wreturn = wreturn $i spaces = (NF == 2 || i == NF - 1) ? nbspc : spcpf if (spaces < 1) spaces = 1 for (j = 0; j < spaces; j++) { wreturn = wreturn " " } nbspc -= spaces } wreturn = wreturn $NF } return wreturn } function subref(id){ for(; nr > 0 && sub("<<" id, ref[id], otext); nr--); if(nr == 0 && otext) { print otext; otext = ""; } } function nextil(t) { if(!match(t, /[`<&\[*_\\-]|(!\[)|(\[\^)/)) return t; t1 = substr(t, 1, RSTART - 1); tag = substr(t, RSTART, RLENGTH); t2 = substr(t, RSTART + RLENGTH); if(ilcode && tag != "`") return eschtml(t1 tag) nextil(t2); # Backslash escaping if(tag == "\\"){ if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){ tag = substr(t2, 1, 1); t2 = substr(t2, 2); } return t1 tag nextil(t2); } # Dashes if(tag == "-"){ if(sub(/^-/, "", t2)) tag = "—"; return t1 tag nextil(t2); } # Inline Code if(tag == "`"){ if(sub(/^`/, "", t2)){ if(!match(t2, /``/)) return t1 "”" nextil(t2); ilcode2 = !ilcode2; } else if(ilcode2) return t1 tag nextil(t2); tag = ""; if(ilcode){ t1 = eschtml(t1); tag = ""; } ilcode = !ilcode; return t1 tag nextil(t2); return t1 tag nextil(t2); } if(tag == "<"){ # Autolinks if(match(t2, /^[^ ]+[\.@][^ ]+>/)){ url = eschtml(substr(t2, 1, RLENGTH - 1)); t2 = substr(t2, RLENGTH + 1); linktext = url; if(match(url, /@/) && !match(url, /^mailto:/)) url = "mailto:" url; return t1 "" linktext "" nextil(t2); } # Html tags if(match(t2, /^[A-Za-z\/!][^>]*>/)){ tag = tag substr(t2, RSTART, RLENGTH); t2 = substr(t2, RLENGTH + 1); return t1 tag nextil(t2); } return t1 "<" nextil(t2); } # Html special entities if(tag == "&"){ if(match(t2, /^#?[A-Za-z0-9]+;/)){ tag = tag substr(t2, RSTART, RLENGTH); t2 = substr(t2, RLENGTH + 1); return t1 tag nextil(t2); } return t1 "&" nextil(t2); } # Images if(tag == "!["){ if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*/); alt = substr(t2, 1, RLENGTH); t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ # Inline sub(/^\(/, "", t2); match(t2, /^[^\)]+/); url = eschtml(substr(t2, 1, RLENGTH)); t2 = substr(t2, RLENGTH + 2); title = ""; if(match(url, /[ ]+".*"[ ]*$/)) { title = substr(url, RSTART, RLENGTH); url = substr(url, 1, RSTART - 1); match(title, /".*"/); title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; } if(match(url, /^<.*>$/)) url = substr(url, 2, RLENGTH - 2); return t1 "\""" nextil(t2); } else{ # Referenced sub(/^ ?\[/, "", t2); id = alt; if(match(t2, /^[^\]]+/)) id = substr(t2, 1, RLENGTH); t2 = substr(t2, RLENGTH + 2); if(ref[id]) r = ref[id]; else{ r = "<<" id; nr++; } return t1 "\""" nextil(t2); } } # Footnotes if(tag == "[^"){ match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); linktext = substr(t2, 1, RLENGTH); t2 = substr(t2, RLENGTH + 2); return t1 "" linktext "" nextil(t2); } # Links if(tag == "["){ if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); linktext = substr(t2, 1, RLENGTH); t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ # Inline match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/); url = substr(t2, 2, RLENGTH - 1); pt2 = substr(t2, RLENGTH + 2); title = ""; if(match(url, /[ ]+".*"[ ]*$/)) { title = substr(url, RSTART, RLENGTH); url = substr(url, 1, RSTART - 1); match(title, /".*"/); title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; } if(match(url, /^<.*>$/)) url = substr(url, 2, RLENGTH - 2); url = eschtml(url); return t1 "" nextil(linktext) "" nextil(pt2); } else{ # Referenced sub(/^ ?\[/, "", t2); id = linktext; if(match(t2, /^[^\]]+/)) id = substr(t2, 1, RLENGTH); t2 = substr(t2, RLENGTH + 2); if(ref[id]) r = ref[id]; else{ r = "<<" id; nr++; } pt2 = t2; return t1 "" nextil(linktext) "" nextil(pt2); } } # Emphasis if(match(tag, /[*_]/)){ ntag = tag; if(sub("^" tag, "", t2)){ if(stag[ns] == tag && match(t2, "^" tag)) t2 = tag t2; else ntag = tag tag } n = length(ntag); tag = (n == 2) ? "strong" : "em"; if(match(t1, / $/) && match(t2, /^ /)) return t1 tag nextil(t2); if(stag[ns] == ntag){ tag = "/" tag; ns--; } else stag[++ns] = ntag; tag = "<" tag ">"; #return t1 tag nextil(t2); return t1 nextil(t2); } } function inline(t) { ilcode = 0; ilcode2 = 0; ns = 0; return nextil(t); } #https://unix.stackexchange.com/a/337656 function wrap(t,align) { final="" z="" y=0 margin_spaces="" localmargin = blockquote * 5 for(c = 0; c < localmargin; c++) margin_spaces = " " margin_spaces while (t) { q = match(t, / |$/); y += q if (y > col - localmargin) { #print "|"localmargin"|"blockquote"|"nnl"|" if (align != 0) { if (align=="c") final = final margin_spaces centralize(z,localmargin) RS if (align=="j") final = final margin_spaces justify(z,localmargin) RS } else { final = final z RS } y = q - 1 z = "" } else if (z) z = z FS z = z substr(t, 1, q - 1) t = substr(t, q + 1) } if (align=="c") { final = final margin_spaces centralize(z, localmargin) } else { final = final margin_spaces z } return final } function printp(tag) { if(!match(text, /^[ ]*$/)){ text = inline(text); if(tag == "p") { oprint(wrap(text,"j")) } else { oprint(text); } } text = ""; } BEGIN { blank = 0; code = 0; hr = 0; html = 0; nl = 0; nr = 0; margin = 0; otext = ""; text = ""; par = "p"; col=70; listitem=0; c=0; do { lineheader = "=" lineheader; c++ } while ( c < col ) c=0; do { lineheadersmall = "-" lineheadersmall; c++ } while ( c < col ) } # References !code && /^ *\[\^![^\]]*\]:[ ]+/ { sub(/^ *\[\^!/, ""); match($0, /\]/); id = substr($0, 1, RSTART - 1); sub(id "\\]:[ ]+", ""); title = ""; if(match($0, /".*"$/)) title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2); sub(/[ ]+".*"$/, ""); url = eschtml($0); ref[id] = url title; subref(id); next; } !code && /^ *\[\^[^\]]*\]:[ ]+/ { sub(/^ *\[\^/, ""); match($0, /\]/); id = substr($0, 1, RSTART - 1); sub(id "\\]:[ ]+", ""); sub(/[ ]+".*"$/, ""); url = eschtml($0); fnref[id] = url; subref(id); next; } # List and quote blocks # Remove indentation { for(nnl = 0; nnl < nl; nnl++) if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) break; } nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } # Quote blocks { while(sub(/^> /, "")) nblock[++nnl] = "blockquote"; blockquote = nnl; } # Horizontal rules { hr = 0; } (blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ { if(code){ #oprint(""); code = 0; } blank = 0; nnl = 0; hr = 1; } # List items block[nl] ~ /[ou]l/ && /^$/ { blank = 1; next; } { newli = 0; } !hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { sub(/^ ? ? ?[*+-]( +| )/, ""); nnl++; nblock[nnl] = "ul"; listtype="ul"; newli = 1; } (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ { sub(/^ ? ? ?([0-9]+\.)+( +| )/, ""); nnl++; nblock[nnl] = "ol"; listtype="ol"; listitem++ newli = 1; } newli { if(blank && nnl == nl && !par) par = "p"; blank = 0; printp(par); if(nnl == nl && block[nl] == nblock[nl]) #oprint("
  • "); if(listtype == "ul") printf "- "; if(listtype == "ol") printf "%s - ", listitem; } blank && ! /^$/ { if(match(block[nnl], /[ou]l/) && !par) par = "p"; printp(par); par = "p"; blank = 0; } # Close old blocks and open new ones nnl != nl || nblock[nl] != block[nl] { if(code){ #oprint(""); code = 0; } printp(par); b = (nnl > nl) ? nblock[nnl] : block[nnl]; par = (match(b, /[ou]l/)) ? "" : "p"; } nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ if(match(block[nl], /[ou]l/)) #oprint("
  • "); printf "" #oprint(""); #if(listtype == "ol") # listitem = 0; } } nnl > nl { for(; nl < nnl; nl++){ block[nl + 1] = nblock[nl + 1]; #oprint("<" block[nl + 1] ">"); if(match(block[nl + 1], /[ou]l/)) #oprint("
  • "); if(listtype == "ul") printf "- "; #if(listtype == "ol") #printf "%s - ", listitem; #listitem=0 } } hr { oprint(lineheader); next; } # Code blocks code && /^$/ { if(blanK) oprint(""); blank = 1; next; } !text && sub(/^( | )/, "") { if(blanK) oprint(""); blank = 0; #if(!code) # oprint("
    ");
    	code = 1;
    	$0 = eschtml($0);
    	oprint($0);
    	next;
    }
    code {
    	#oprint("
    "); code = 0; } # Setex-style Headers text && /^=+$/ {printp("h1"); next;} text && /^-+$/ {printp("h2"); next;} # Atx-Style headers /^#+/ && (!newli || par=="p" || /^##/) { for(n = 0; n < 6 && sub(/^# */, ""); n++) { sub(/#$/, ""); } par = "h" n; if (n == 1) { oprint( text lineheader "\n=" centralize($0,2) "=\n" lineheader "\n" ) next; } if (n == 2) { oprint("\n" text wrap($0,"c") "\n" lineheader "\n") next; } if (n == 3) { oprint("\n" text centralize($0) "\n" lineheadersmall "\n") next; } if (n > 3) { text = text centralize($0) "\n" next; } } # Paragraph /^$/ { printp(par); par = "p"; next; } # Add text { text = (text ? text " " : "") $0; } function alen(a, ix, k) { k = 0 for(ix in a) k++ return k } END { if(code){ # oprint(""); code = 0; } #printp(par); #for(; nl > 0; nl--){ # if(match(block[nl], /[ou]l/)) # listitem=0 #oprint("
  • "); # oprint(""); #} gsub(/<<[^"]*/, "", otext); print(otext); # Print footnotes if(alen(fnref)>0) { print ""; } }