# g2e - an opinionated gempub to epub converter written in awk # THIS IS CHAOTIC SOFTWARE BEWARE # usage: # awk -f g2e.awk example-gpub/ # where example-gpub/ is a directory containing an uncompressed gempub file BEGIN{ gpubdir = ARGV[1] tdir = "templates/" # templates dir epubodir = "out/" metadatafile = gpubdir "metadata.txt" # read metadata file fields while( getline < metadatafile ){ key = $1; sub(":","",key) value = $2 for(i=3;i<=NF;i++) value = value " " $i if(key=="cover"){ sub(/^.+\//,"",value) # get basename } meta[key] = value # set index file as an argument to be read if(key == "index" ){ ARGV[1] = (gpubdir value) # overwrite argument 1 # store index dirname: indexdirname = value sub(/[^/]+\.gmi$/,"",indexdirname) } } # read linked files from index and append as arguments to process while( getline < (gpubdir meta["index"]) ){ if($1~"=>") ARGV[ARGC++] = gpubdir indexdirname $2 } # setup epub system( "mkdir -p " epubodir "META-INF" ) system( "mkdir -p " epubodir indexdirname "img" ) system( "cp -u " tdir "container.xml " epubodir "META-INF/" ) system( "cp -u " tdir "mimetype " epubodir ) system( "cp -u " tdir "style.css " epubodir ) # read templates templatefiles = "find " tdir " -type f -not -name '.*'" while( (templatefiles | getline )>0 ){ tpath = $0 tkey = tpath; sub(tdir, "", tkey) RS = "\f" # to get the whole file in one getline getline templates[tkey] < tpath # read template RS = "\n" } # start writing metadata files content = write_template("content-header.opf", meta ) toc = write_template( "toc-header.ncx", meta ) spinetoc = " \n" } # write the template into output, replacing {keys} with contents of values["keys"] function write_template( templatek, values ){ output = "" line = templates[templatek] while(match(line,/\{[^{}]+\}/)){ # has {key} key = substr(line,RSTART+1,RLENGTH-2) output = output substr(line, 1, RSTART-1) # before {key} output = output values[key] line = substr(line, RSTART+RLENGTH) } output = output line "\n" return output } # ---------------- # index file links # ---------------- ARGIND==1 && /^=>/{ id = $2 sub(/.gmi$/,"",id) name = $3 for(i=4;i<=NF;i++) name = name " " $i ch["id"] = id ch["name"] = name ch["num"]++ ch["dir"] = indexdirname content = content " \n" spinetoc = spinetoc " \n" toc = toc write_template( "toc-navpoint.ncx", ch ) next } ARGIND==1{ # skip other lines of the index next } # when finished reading the index: ARGIND==2 && FNR==1{ line = $0 # add images to manifest while( "find " gpubdir indexdirname "img/ -type f -regextype awk -iregex \".+(png|jpg|gif)$\"" | getline){ path = $0 dest = path; sub(gpubdir, "", dest) system( "cp -u " $0 " " epubodir dest) # copy images mediatype = dest~/gif$/ ? "image/gif" : dest~/jpg$/ ? "image/jpeg" : "image/png" id = dest sub(/^.+\//,"",id) # get basename properties = "" if(id~meta["cover"]){ properties = "properties=\"cover-image\"" } content = content " \n" } $0 = line # finalize metadata files content = content " \n\n" spinetoc " \n\n" print content > epubodir "content.opf" toc = toc " \n" print toc > epubodir "toc.ncx" } # ------------- # content files # ------------- function finishfile(){ # finish writing the previous content file out = out "\t\n \n" print out > nameout } function append(line){ out = out "\t" line "\n" } # setup the writing for this content file FNR==1 { if(ARGIND>2) finishfile() id=FILENAME match(id,/[^/]+.gmi$/) filenamestart = RSTART match(id,/.gmi$/) name = substr(id,filenamestart,RSTART-filenamestart) nameout = epubodir indexdirname name ".xhtml" sub(/#{1,3}[[:blank:]]+/,"", $0) # use first line as document title m["title"] = $0 out = write_template("header.xhtml",m) is_pre = 0 is_list = 0 test = "hola" next } function sanitize(){ gsub("&","\\&") gsub("<","\\<") gsub(">","\\>") } # -------------------------- # gemtext to html conversion # -------------------------- # pre-formatted /^```/{ is_pre = !is_pre if(is_pre) append("
")
	else append("
") next } is_pre{ sanitize() append($0) next } # empty lines /^$/{ if(is_list){ append(""); is_list = 0 } next } # lists sub(/^\*[[:space:]]*/,""){ if(!is_list){ append("