g2e/g2e.awk

254 lines
5.4 KiB
Awk

# g2e - an opinionated gempub to epub converter written in awk
# THIS IS CHAOTIC SOFTWARE BEWARE <//xj-ix.luxe/wiki/chaotic-software/>
# usage:
# awk -f g2e.awk example-gpub/
# where example-gpub/ is a directory containing an uncompressed gempub file
BEGIN{
gpubdir = ARGV[1]
tdir = "templates/" # templates dir
epubodir = "out/"
metadatafile = gpubdir "metadata.txt"
# read metadata file fields
while( getline < metadatafile ){
key = $1; sub(":","",key)
value = $2
for(i=3;i<=NF;i++) value = value " " $i
if(key=="cover"){
sub(/^.+\//,"",value) # get basename
}
meta[key] = value
# set index file as an argument to be read
if(key == "index" ){
ARGV[1] = (gpubdir value) # overwrite argument 1
# store index dirname:
indexdirname = value
sub(/[^/]+\.gmi$/,"",indexdirname)
}
}
# read linked files from index and append as arguments to process
while( getline < (gpubdir meta["index"]) ){
if($1~"=>") ARGV[ARGC++] = gpubdir indexdirname $2
}
# setup epub
system( "mkdir -p " epubodir "META-INF" )
system( "mkdir -p " epubodir indexdirname "img" )
system( "cp -u " tdir "container.xml " epubodir "META-INF/" )
system( "cp -u " tdir "mimetype " epubodir )
system( "cp -u " tdir "style.css " epubodir )
# read templates
templatefiles = "find " tdir " -type f -not -name '.*'"
while( (templatefiles | getline )>0 ){
tpath = $0
tkey = tpath; sub(tdir, "", tkey)
RS = "\f" # to get the whole file in one getline
getline templates[tkey] < tpath # read template
RS = "\n"
}
# start writing metadata files
content = write_template("content-header.opf", meta )
toc = write_template( "toc-header.ncx", meta )
spinetoc = " <spine toc=\"ncx\">\n"
}
# write the template into output, replacing {keys} with contents of values["keys"]
function write_template( templatek, values ){
output = ""
line = templates[templatek]
while(match(line,/\{[^{}]+\}/)){ # has {key}
key = substr(line,RSTART+1,RLENGTH-2)
output = output substr(line, 1, RSTART-1) # before {key}
output = output values[key]
line = substr(line, RSTART+RLENGTH)
}
output = output line "\n"
return output
}
# ----------------
# index file links
# ----------------
ARGIND==1 && /^=>/{
id = $2
sub(/.gmi$/,"",id)
name = $3
for(i=4;i<=NF;i++) name = name " " $i
ch["id"] = id
ch["name"] = name
ch["num"]++
ch["dir"] = indexdirname
content = content " <item id=\"" id "\" href=\"" indexdirname id ".xhtml\" media-type=\"application/xhtml+xml\"/>\n"
spinetoc = spinetoc " <itemref idref=\"" id "\" />\n"
toc = toc write_template( "toc-navpoint.ncx", ch )
next
}
ARGIND==1{ # skip other lines of the index
next
}
# when finished reading the index:
ARGIND==2 && FNR==1{
line = $0
# add images to manifest
while( "find " gpubdir indexdirname "img/ -type f -regextype awk -iregex \".+(png|jpg|gif)$\"" | getline){
path = $0
dest = path; sub(gpubdir, "", dest)
system( "cp -u " $0 " " epubodir dest) # copy images
mediatype = dest~/gif$/ ? "image/gif" : dest~/jpg$/ ? "image/jpeg" : "image/png"
id = dest
sub(/^.+\//,"",id) # get basename
properties = ""
if(id~meta["cover"]){
properties = "properties=\"cover-image\""
}
content = content " <item id=\"" id "\" href=\"" dest "\" media-type=\"" mediatype "\" " properties "/>\n"
}
$0 = line
# finalize metadata files
content = content " </manifest>\n\n" spinetoc " </spine>\n\n</package>"
print content > epubodir "content.opf"
toc = toc " </navMap>\n</ncx>"
print toc > epubodir "toc.ncx"
}
# -------------
# content files
# -------------
function finishfile(){
# finish writing the previous content file
if(is_list){ append("</ul>"); is_list = 0 }
if(is_pre){ append("</pre>"); is_pre = 0 }
out = out " </body>\n</html>"
print out > nameout
}
function append(line){
out = out "\t" line "\n"
}
# setup the writing for this content file
FNR==1 {
if(ARGIND>2) finishfile()
id=FILENAME
match(id,/[^/]+.gmi$/)
filenamestart = RSTART
match(id,/.gmi$/)
name = substr(id,filenamestart,RSTART-filenamestart)
nameout = epubodir indexdirname name ".xhtml"
sub(/#{1,3}[[:blank:]]+/,"", $0) # use first line as document title
m["title"] = $0
out = write_template("header.xhtml",m)
is_pre = 0
is_list = 0
test = "hola"
next
}
function sanitize(){
gsub("&","\\&amp;")
gsub("<","\\&lt;")
gsub(">","\\&gt;")
}
# --------------------------
# gemtext to html conversion
# --------------------------
# pre-formatted
/^```/{
is_pre = !is_pre
if(is_pre) append("<pre>")
else append("</pre>")
next
}
is_pre{
sanitize()
append($0)
next
}
# empty lines
/^$/{
if(is_list){ append("</ul>"); is_list = 0 }
next
}
# lists
sub(/^\*[[:space:]]*/,""){
if(!is_list){ append("<ul>"); is_list = 1 }
sanitize()
append("<li>" $0 "</li>")
next
}
# headers
match($0,/^#{1,3}+/){
sub(/^#{1,3}[[:space:]]+/,"",$0)
sanitize()
append( "<h" RLENGTH ">" $0 "</h" RLENGTH ">")
next
}
# blockquote
sub(/^>[[:space:]]*/,""){
sanitize()
append("<blockquote>" $0 "</blockquote>")
next
}
# links?
sub(/^=>[[:space:]]*/,""){
link = $1
text = $2
for(i=3;i<=NF;i++) text = text " " $i
if(link~/gmi$/){
sub(/gmi$/,"xhtml",link)
append("<p><a href=\"" link "\">" text "</a></p>")
}
else if(link~/(gif|jpg|png)$/){
append("<img src=\"" link "\" alt=\"" text "\" />")
}
else{
append("<p><a href=\"" link "\">" text "</a></p>")
}
next
}
# raw html +
sub(/^\+[[:space:]]*/,""){
append($0)
next
}
# paragraphs
{
sanitize()
append("<p>" $0 "</p>")
}
END{
finishfile()
}