Add code and a rambly readme

This commit is contained in:
rmgr 2022-02-12 20:23:04 +10:30
parent 6db21e9cbd
commit 68709c850e
2 changed files with 354 additions and 0 deletions

View File

@ -1,2 +1,5 @@
# nb_wiki.py
The bespoke and very brittle software powering https://wiki.rmgr.dev
It's designed as a wrapper for the wonderful [nb](https://github.com/xwmx/nb) CLI notebook application. To use it, simply add a file called .publish to any notebooks you want to publish and then run the program and pray it works. It's what I do!

351
nb_wiki.py Normal file
View File

@ -0,0 +1,351 @@
import os
import argparse
import http.server
import socketserver
import shutil
list_of_tags = {}
copy_extensions = [".gif",".jpg",".png", ".html"]
output_dir = ""
base_url = ""
template_location = ""
def splitall(path):
allparts = []
while 1:
parts = os.path.split(path)
if parts[0] == path: # sentinel for absolute paths
allparts.insert(0, parts[0])
break
elif parts[1] == path: # sentinel for relative paths
allparts.insert(0, parts[1])
break
else:
path = parts[0]
allparts.insert(0, parts[1])
return allparts
def load_files(directory):
file_list = []
directory = os.path.expanduser(directory)
for d in os.listdir(directory):
if os.path.isfile(os.path.join(directory, d, ".publish")):
for current_dir, subdirs, files in os.walk(os.path.join(directory, d)):
for f in files:
extension = os.path.splitext(f)[1]
if extension in [ ".md"] + copy_extensions:
file_list.append(os.path.join(current_dir, f))
return file_list
def make_temp_directory():
if os.path.isdir(output_dir):
shutil.rmtree(output_dir)
if not os.path.exists(output_dir):
os.mkdir(output_dir)
def process_files(base_dir, files):
global output_dir
global base_url
base_dir = os.path.expanduser(base_dir)
for file_path in files:
if os.path.splitext(file_path)[1] in copy_extensions:
other_bit = file_path.replace(base_dir, "")
output_path = os.path.join(output_dir, other_bit)
_p,_f = os.path.split(output_path)
os.makedirs(_p, exist_ok=True)
shutil.copy(file_path, output_path)
continue
f = open(file_path, "r")
temp_title = f.readline()
f.close()
other_bit = ""
if temp_title[0] != '#':
other_bit = file_path.replace(base_dir,"")
other_bit = os.path.splitext(other_bit)[0] + ".html"
else:
other_bit = file_path.replace(base_dir,"")
_p,_f = os.path.split(file_path)
other_bit = other_bit.replace(_f,temp_title[2:len(temp_title)].strip() + ".html")
#other_bit = file_path.replace(base_dir,"")
#other_bit = os.path.splitext(other_bit)[0] + ".html"
output_path = os.path.join(output_dir, other_bit)
path, fname = os.path.split(output_path)
os.makedirs(path, exist_ok=True)
template_file = open(template_location, "r")
template = template_file.read()
split_path = splitall(file_path.replace(base_dir,""))
#breadcrumbs = ""
breadcrumbs = '> <a href="/">/</a> '
for d in range(0,len(split_path)-1):
breadcrumbs += '> <a href="/' + "/".join(split_path[0:d + 1]) + '">' + split_path[d] + '</a> '
template = template.replace("{{breadcrumbs}}", breadcrumbs)
template = template.replace('{{pagetitle}}',os.path.splitext(fname)[0])
f = open(file_path, "r")
file_content = f.read()
ul_lines = []
doing_list = False
output = open(output_path, "w+")
output.write(template.split("{{content}}")[0])
output.close()
for line in file_content.split("\n"):
if doing_list:
if line[0:1] == "*":
ul_lines.append(line)
else:
process_list(output_path, ul_lines, os.path.splitext(other_bit)[0], base_dir, other_bit)
ul_lines = []
doing_list = False
else:
if line[0:1] == "#" and not doing_list and line[1:2] in [' ', '#']:
#We got a header maybe
if line[1:2] in [' ', '#']:
header_depth = 0
for c in line:
if c == "#":
header_depth += 1
process_header(output_path, line, header_depth)
elif line[0:1] == "*":
#We got a list
doing_list = True
ul_lines = []
ul_lines.append(line)
else:
if len(line) > 0:
output = open(output_path, "a+")
output.write("<p>")
output.close()
search_line_for_links(output_path, line, base_dir, os.path.splitext(other_bit)[0])
output = open(output_path, "a+")
output.write("</p>")
output.close()
output = open(output_path, "a+")
output.write(template.split("{{content}}")[1])
output.close()
#return
def search_line_for_links(output_path, line, base_dir, page_title):
found_link = False
filename = output_path.replace(base_dir, '')
link_text = ""
found_tag = False
found_image = False
output = open(output_path, "a+")
for i in range(0, len(line)):
c = line[i]
if c == "[":
if line[i + 1] == "[":
found_link = True
for u in range(i + 2, len(line)):
if line[u] == "]":
process_link(output, link_text)
break
else:
link_text += line[u]
elif c == "#" and not line[i+1].isspace():
found_tag = True
link_text = ""
for u in range(i, len(line)):
if line[u] in ['\r','\n',' '] or u > len(line):
#process_tag(output, filename, link_text)
break
else:
link_text += line[u]
if len(link_text) > 0:
process_tag(output, page_title, filename, base_dir, link_text)
elif c == "<":
found_link = True
for u in range(i + 1, len(line)):
if line[u] == ">":
process_external_link(output, link_text)
break
else:
link_text += line[u]
elif c == "!":
alt_text = ""
image_link = ""
if len(line) > (i + 1) and line[i + 1] == "[":
found_image = True
for u in range(i+2, len(line)):
if line[u] == "]":
break
else:
alt_text += line[u]
for u in range(i + len(alt_text) + 4, len(line)):
if line[u] == ")":
process_image(output, image_link, alt_text)
break
else:
image_link += line[u]
elif found_tag and c in ['\r','\n',' ']:
found_tag = False
elif found_link and c in ["]", ">"]:
if line[i-1] in ["]", ">"]:
found_link = False
elif found_image and line[i-1] in [")"]:
found_image = False
elif not found_link and not found_tag and not found_image:
output.write(c)
output.close()
def process_image(output, image_link, alt_text):
image_link = image_link.replace(":", "/")
if image_link[0] != "/":
image_link = "/" + image_link
output.write('<img src="' + image_link + '" alt="' + alt_text + '" title="' + alt_text + '"/>')
def process_tag(output, page_title, parent_link, base_dir , tag_text):
base_dir = os.path.expanduser(base_dir)
output.write('<a class="tag" href="' + base_url + "/" + tag_text.replace("#","") + '.html">'+ tag_text+ '</a>')
output_path = base_url
other_bit = parent_link.replace(base_dir,"").replace(output_dir, "")
other_bit = os.path.splitext(other_bit)[0] + ".html"
output_path = os.path.join(output_path, other_bit)
if tag_text in list_of_tags:
list_of_tags[tag_text].append({"path": output_path, "title": page_title})
else:
list_of_tags[tag_text] = [{"path": output_path, "title": page_title}]
def process_link(output, link_text):
link_text = link_text.replace(":","/")
if not "|" in link_text:
output.write('<a href="' + os.path.join(base_url,link_text) + '.html">' + link_text + '</a>')
else:
split_link = link_text.split("|")
output.write('<a href="' + os.path.join(base_url, split_link[0]) + '.html">' + split_link[1] + '</a>')
def process_external_link(output, link_text):
output.write('<a href="' + link_text + '">' + link_text + '</a>')
def process_list(output_path, lines, page_title, base_dir, other_bit):
o = open(output_path, "a+")
o.write("<ul>")
o.close()
for line in lines:
o = open(output_path, "a+")
o.write("<li>")
o.close()
#search_line_for_links(output_path,line[2:len(line)], page_title)
search_line_for_links(output_path, line[2:len(line)], base_dir, os.path.splitext(other_bit)[0])
o = open(output_path, "a+")
o.write("</li>")
o.close()
o = open(output_path, "a+")
o.write("</ul>")
o.close()
def process_header(output_path, line, header_depth):
o = open(output_path, "a+")
o.write("<h" + str(header_depth) + ">" + line[header_depth + 1: len(line)] + "</h" + str(header_depth) + ">")
o.close()
# Construct an index page which lists the published notebooks
def build_index(base_dir):
base_dir = os.path.expanduser(base_dir)
list_of_notebooks = []
output_file = os.path.join(output_dir, "index.html")
template_file = open(template_location, "r")
template = template_file.read()
for directory in os.listdir(base_dir):
if os.path.isfile(os.path.join(base_dir, directory, ".publish")):
list_of_notebooks.append(directory)
o = open(output_file, "a+")
template = template.replace("{{breadcrumbs}}","")
template = template.replace('{{pagetitle}}', "rmgr's wiki")
o.write(template.split("{{content}}")[0])
o.write('<ul>')
for notebook in list_of_notebooks:
o.write('<li><a href="' + os.path.join(base_url, notebook, 'index.html') + '">📂 ' + notebook + '</a></li>')
o.write('</ul>')
o.write(template.split("{{content}}")[1])
o.close()
# Construct an index page for a given directory
def build_directories(base_dir):
dir_list = []
base_dir = os.path.expanduser(base_dir)
for directory in os.listdir(base_dir):
for current_dir, subdirs, files in os.walk(os.path.join(base_dir, directory)):
dir_list.append(current_dir)
for directory in dir_list:
output_file = os.path.join(directory, "index.html")
o = open(output_file, "a+")
template_file = open(template_location, "r")
template = template_file.read()
split_path = splitall(directory.replace(base_dir,"")[1:])
breadcrumbs = '> <a href="/">/</a> '
for d in range(0,len(split_path)-1):
breadcrumbs += '> <a href="/' + "/".join(split_path[0:d + 1]) + '">' + split_path[d] + '</a> '
template = template.replace("{{breadcrumbs}}", breadcrumbs)
template = template.replace('{{pagetitle}}',split_path[len(split_path)-1])
subdir_list = os.listdir(directory)
o.write(template.split("{{content}}")[0])
o.write("<h1>" + split_path[len(split_path) - 1] + "</h1>")
o.write('<ul class="directory-listing">')
for subdir in subdir_list:
if subdir[0] != '.' and subdir != "index.html":
if not os.path.isfile(os.path.join(directory, subdir)):
o.write('<li><a href="' + os.path.join(base_url,directory.replace(base_dir,''), subdir, "index.html") + '">📂 ' + subdir + '</a></li>')
else:
o.write('<li><a href="' + os.path.join(base_url,directory.replace(base_dir,''), subdir) + '">' + os.path.splitext(subdir)[0] + '</a></li>')
o.write('</ul>')
o.write(template.split("{{content}}")[1])
o.close()
def build_tag_pages(output_path):
for tag in list_of_tags.keys():
template_file = open(template_location, "r")
template = template_file.read()
template = template.replace("{{breadcrumbs}}","")
template = template.replace('{{pagetitle}}',tag)
f = open(os.path.join(output_path, tag.replace("#","") + ".html"), "a+")
f.write(template.split("{{content}}")[0])
for obj in list_of_tags[tag]:
f.write('<li><a href="' + obj["path"] + '">' + obj["title"] + '</a></li>')
f.write(template.split("{{content}}")[1])
f.close()
class Handler(http.server.SimpleHTTPRequestHandler):
def __init__(self,*args,**kwargs):
super().__init__(*args, directory="tmp",**kwargs)
def run_wiki(_output_dir, _base_url, serve, _template):
global output_dir
global base_url
global template_location
template_location = _template
output_dir = _output_dir
base_url = _base_url
base_dir = "~/.nb"
if base_dir[len(base_dir)-1:len(base_dir)] != "/":
base_dir += "/"
files = load_files(base_dir)
make_temp_directory()
process_files(base_dir, files )
build_index(base_dir)
build_directories(output_dir)
build_tag_pages(output_dir)
if serve:
with socketserver.TCPServer(("",8111), Handler) as httpd:
print("serving at port 8111")
try:
httpd.serve_forever()
except KeyboardInterrupt:
print('shutting down')
httpd.server_close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process nb directory tree and publish any notebooks containing a .publish file.')
parser.add_argument('-o', '--output', default="./tmp", type=str, help='The directory to dump the generated html')
parser.add_argument('-b', '--base-url', default="http://192.168.1.103:8111", type=str, help='The base url for internal links')
parser.add_argument('--serve', default=False, type=bool, help='Fire up a web server after generation to preview changes.')
parser.add_argument('-t', '--template', default="templates/default.html", type=str, help="Directory containing a template file.")
args = parser.parse_args()
run_wiki(args.output, args.base_url, args.serve, args.template)