254 lines
5.4 KiB
Awk
254 lines
5.4 KiB
Awk
# g2e - an opinionated gempub to epub converter written in awk
|
|
# THIS IS CHAOTIC SOFTWARE BEWARE <//xj-ix.luxe/wiki/chaotic-software/>
|
|
|
|
# usage:
|
|
# awk -f g2e.awk example-gpub/
|
|
# where example-gpub/ is a directory containing an uncompressed gempub file
|
|
|
|
BEGIN{
|
|
gpubdir = ARGV[1]
|
|
tdir = "templates/" # templates dir
|
|
epubodir = "out/"
|
|
metadatafile = gpubdir "metadata.txt"
|
|
|
|
# read metadata file fields
|
|
while( getline < metadatafile ){
|
|
key = $1; sub(":","",key)
|
|
value = $2
|
|
for(i=3;i<=NF;i++) value = value " " $i
|
|
|
|
if(key=="cover"){
|
|
sub(/^.+\//,"",value) # get basename
|
|
}
|
|
|
|
meta[key] = value
|
|
|
|
# set index file as an argument to be read
|
|
if(key == "index" ){
|
|
ARGV[1] = (gpubdir value) # overwrite argument 1
|
|
# store index dirname:
|
|
indexdirname = value
|
|
sub(/[^/]+\.gmi$/,"",indexdirname)
|
|
}
|
|
}
|
|
|
|
# read linked files from index and append as arguments to process
|
|
while( getline < (gpubdir meta["index"]) ){
|
|
if($1~"=>") ARGV[ARGC++] = gpubdir indexdirname $2
|
|
}
|
|
|
|
# setup epub
|
|
system( "mkdir -p " epubodir "META-INF" )
|
|
system( "mkdir -p " epubodir indexdirname "img" )
|
|
system( "cp -u " tdir "container.xml " epubodir "META-INF/" )
|
|
system( "cp -u " tdir "mimetype " epubodir )
|
|
system( "cp -u " tdir "style.css " epubodir )
|
|
|
|
|
|
# read templates
|
|
templatefiles = "find " tdir " -type f -not -name '.*'"
|
|
while( (templatefiles | getline )>0 ){
|
|
tpath = $0
|
|
tkey = tpath; sub(tdir, "", tkey)
|
|
|
|
RS = "\f" # to get the whole file in one getline
|
|
getline templates[tkey] < tpath # read template
|
|
RS = "\n"
|
|
}
|
|
|
|
# start writing metadata files
|
|
content = write_template("content-header.opf", meta )
|
|
toc = write_template( "toc-header.ncx", meta )
|
|
spinetoc = " <spine toc=\"ncx\">\n"
|
|
}
|
|
|
|
# write the template into output, replacing {keys} with contents of values["keys"]
|
|
function write_template( templatek, values ){
|
|
output = ""
|
|
line = templates[templatek]
|
|
while(match(line,/\{[^{}]+\}/)){ # has {key}
|
|
key = substr(line,RSTART+1,RLENGTH-2)
|
|
output = output substr(line, 1, RSTART-1) # before {key}
|
|
output = output values[key]
|
|
line = substr(line, RSTART+RLENGTH)
|
|
}
|
|
output = output line "\n"
|
|
return output
|
|
}
|
|
|
|
# ----------------
|
|
# index file links
|
|
# ----------------
|
|
|
|
ARGIND==1 && /^=>/{
|
|
id = $2
|
|
sub(/.gmi$/,"",id)
|
|
name = $3
|
|
for(i=4;i<=NF;i++) name = name " " $i
|
|
|
|
ch["id"] = id
|
|
ch["name"] = name
|
|
ch["num"]++
|
|
ch["dir"] = indexdirname
|
|
content = content " <item id=\"" id "\" href=\"" indexdirname id ".xhtml\" media-type=\"application/xhtml+xml\"/>\n"
|
|
spinetoc = spinetoc " <itemref idref=\"" id "\" />\n"
|
|
toc = toc write_template( "toc-navpoint.ncx", ch )
|
|
next
|
|
}
|
|
|
|
ARGIND==1{ # skip other lines of the index
|
|
next
|
|
}
|
|
|
|
# when finished reading the index:
|
|
ARGIND==2 && FNR==1{
|
|
line = $0
|
|
# add images to manifest
|
|
while( "find " gpubdir indexdirname "img/ -type f -regextype awk -iregex \".+(png|jpg|gif)$\"" | getline){
|
|
path = $0
|
|
dest = path; sub(gpubdir, "", dest)
|
|
system( "cp -u " $0 " " epubodir dest) # copy images
|
|
mediatype = dest~/gif$/ ? "image/gif" : dest~/jpg$/ ? "image/jpeg" : "image/png"
|
|
id = dest
|
|
sub(/^.+\//,"",id) # get basename
|
|
|
|
properties = ""
|
|
if(id~meta["cover"]){
|
|
properties = "properties=\"cover-image\""
|
|
}
|
|
content = content " <item id=\"" id "\" href=\"" dest "\" media-type=\"" mediatype "\" " properties "/>\n"
|
|
}
|
|
$0 = line
|
|
|
|
# finalize metadata files
|
|
content = content " </manifest>\n\n" spinetoc " </spine>\n\n</package>"
|
|
print content > epubodir "content.opf"
|
|
|
|
toc = toc " </navMap>\n</ncx>"
|
|
print toc > epubodir "toc.ncx"
|
|
}
|
|
|
|
|
|
# -------------
|
|
# content files
|
|
# -------------
|
|
|
|
function finishfile(){
|
|
# finish writing the previous content file
|
|
if(is_list){ append("</ul>"); is_list = 0 }
|
|
if(is_pre){ append("</pre>"); is_pre = 0 }
|
|
out = out "\t</main>\n </body>\n</html>"
|
|
print out > nameout
|
|
}
|
|
function append(line){
|
|
out = out "\t" line "\n"
|
|
}
|
|
|
|
# setup the writing for this content file
|
|
FNR==1 {
|
|
if(ARGIND>2) finishfile()
|
|
|
|
id=FILENAME
|
|
match(id,/[^/]+.gmi$/)
|
|
filenamestart = RSTART
|
|
match(id,/.gmi$/)
|
|
name = substr(id,filenamestart,RSTART-filenamestart)
|
|
nameout = epubodir indexdirname name ".xhtml"
|
|
|
|
sub(/#{1,3}[[:blank:]]+/,"", $0) # use first line as document title
|
|
m["title"] = $0
|
|
out = write_template("header.xhtml",m)
|
|
is_pre = 0
|
|
is_list = 0
|
|
test = "hola"
|
|
next
|
|
}
|
|
|
|
function sanitize(){
|
|
gsub("&","\\&")
|
|
gsub("<","\\<")
|
|
gsub(">","\\>")
|
|
}
|
|
|
|
# --------------------------
|
|
# gemtext to html conversion
|
|
# --------------------------
|
|
|
|
# pre-formatted
|
|
/^```/{
|
|
is_pre = !is_pre
|
|
if(is_pre) append("<pre>")
|
|
else append("</pre>")
|
|
next
|
|
}
|
|
|
|
|
|
is_pre{
|
|
sanitize()
|
|
append($0)
|
|
next
|
|
}
|
|
|
|
# empty lines
|
|
/^$/{
|
|
if(is_list){ append("</ul>"); is_list = 0 }
|
|
next
|
|
}
|
|
|
|
# lists
|
|
sub(/^\*[[:space:]]*/,""){
|
|
if(!is_list){ append("<ul>"); is_list = 1 }
|
|
sanitize()
|
|
append("<li>" $0 "</li>")
|
|
next
|
|
}
|
|
|
|
# headers
|
|
match($0,/^#{1,3}+/){
|
|
sub(/^#{1,3}[[:space:]]+/,"",$0)
|
|
sanitize()
|
|
append( "<h" RLENGTH ">" $0 "</h" RLENGTH ">")
|
|
next
|
|
}
|
|
|
|
# blockquote
|
|
sub(/^>[[:space:]]*/,""){
|
|
sanitize()
|
|
append("<blockquote>" $0 "</blockquote>")
|
|
next
|
|
}
|
|
|
|
# links?
|
|
sub(/^=>[[:space:]]*/,""){
|
|
link = $1
|
|
text = $2
|
|
for(i=3;i<=NF;i++) text = text " " $i
|
|
if(link~/gmi$/){
|
|
sub(/gmi$/,"xhtml",link)
|
|
append("<p><a href=\"" link "\">" text "</a></p>")
|
|
}
|
|
else if(link~/(gif|jpg|png)$/){
|
|
append("<img src=\"" link "\" alt=\"" text "\" />")
|
|
}
|
|
else{
|
|
append("<p><a href=\"" link "\">" text "</a></p>")
|
|
}
|
|
next
|
|
}
|
|
|
|
# raw html +
|
|
sub(/^\+[[:space:]]*/,""){
|
|
append($0)
|
|
next
|
|
}
|
|
|
|
# paragraphs
|
|
{
|
|
sanitize()
|
|
append("<p>" $0 "</p>")
|
|
}
|
|
|
|
END{
|
|
finishfile()
|
|
}
|