From 68709c850e28ac0243eb216ff78d43b2ce6a6287 Mon Sep 17 00:00:00 2001
From: rmgr <jake@rmgr.dev>
Date: Sat, 12 Feb 2022 20:23:04 +1030
Subject: [PATCH] Add code and a rambly readme

---
 README.md  |   3 +
 nb_wiki.py | 351 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 354 insertions(+)
 create mode 100644 nb_wiki.py
diff --git a/README.md b/README.md
index 4ac55f4..afc0f43 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,5 @@
 # nb_wiki.py
 
+The bespoke and very brittle software powering https://wiki.rmgr.dev
+
+It's designed as a wrapper for the wonderful [nb](https://github.com/xwmx/nb) CLI notebook application. To use it, simply add a file called .publish to any notebooks you want to publish and then run the program and pray it works. It's what I do!
diff --git a/nb_wiki.py b/nb_wiki.py
new file mode 100644
index 0000000..32a09cb
--- /dev/null
+++ b/nb_wiki.py
@@ -0,0 +1,351 @@
+import os
+import argparse
+import http.server
+import socketserver
+import shutil
+list_of_tags = {}
+copy_extensions = [".gif",".jpg",".png", ".html"]
+
+output_dir = ""
+base_url = ""
+template_location = ""
+def splitall(path):
+    allparts = []
+    while 1:
+        parts = os.path.split(path)
+        if parts[0] == path:  # sentinel for absolute paths
+            allparts.insert(0, parts[0])
+            break
+        elif parts[1] == path: # sentinel for relative paths
+            allparts.insert(0, parts[1])
+            break
+        else:
+            path = parts[0]
+            allparts.insert(0, parts[1])
+    return allparts
+
+def load_files(directory):
+    file_list = []
+    directory = os.path.expanduser(directory)
+    for d in os.listdir(directory):
+        if os.path.isfile(os.path.join(directory, d, ".publish")):
+            for current_dir, subdirs, files in os.walk(os.path.join(directory, d)):
+                for f in files:            
+                    extension = os.path.splitext(f)[1]
+                    if extension in [ ".md"] + copy_extensions:
+                        file_list.append(os.path.join(current_dir, f))
+    return file_list
+
+def make_temp_directory():
+    if os.path.isdir(output_dir):
+        shutil.rmtree(output_dir)
+    if not os.path.exists(output_dir):
+        os.mkdir(output_dir)
+
+def process_files(base_dir, files):
+    global output_dir
+    global base_url
+    base_dir = os.path.expanduser(base_dir)
+    for file_path in files:
+        if os.path.splitext(file_path)[1] in copy_extensions:
+            other_bit = file_path.replace(base_dir, "")
+            output_path = os.path.join(output_dir, other_bit)
+            _p,_f = os.path.split(output_path)
+            os.makedirs(_p, exist_ok=True)
+            shutil.copy(file_path, output_path)
+            continue
+        f = open(file_path, "r")
+        temp_title = f.readline()
+        f.close()
+        other_bit = ""
+        if temp_title[0] != '#':
+            other_bit = file_path.replace(base_dir,"")
+            other_bit = os.path.splitext(other_bit)[0] + ".html"
+        else:
+            other_bit = file_path.replace(base_dir,"")
+            _p,_f = os.path.split(file_path)
+            other_bit = other_bit.replace(_f,temp_title[2:len(temp_title)].strip() + ".html")
+        #other_bit = file_path.replace(base_dir,"")
+        #other_bit = os.path.splitext(other_bit)[0] + ".html"
+        output_path = os.path.join(output_dir, other_bit)
+        path, fname = os.path.split(output_path)
+        os.makedirs(path, exist_ok=True)
+
+        template_file = open(template_location, "r")
+        template = template_file.read()
+        split_path = splitall(file_path.replace(base_dir,""))
+        #breadcrumbs = ""
+        breadcrumbs = '> <a href="/">/</a> '
+        for d in range(0,len(split_path)-1):
+            breadcrumbs += '> <a href="/' + "/".join(split_path[0:d + 1]) + '">' + split_path[d] + '</a> '
+        template = template.replace("{{breadcrumbs}}", breadcrumbs)
+        template = template.replace('{{pagetitle}}',os.path.splitext(fname)[0])
+        f = open(file_path, "r")
+        file_content = f.read()
+        ul_lines = []
+        doing_list = False
+        output = open(output_path, "w+")
+        output.write(template.split("{{content}}")[0])
+        output.close()
+        for line in file_content.split("\n"):
+            if doing_list:
+                if line[0:1] == "*": 
+                    ul_lines.append(line)
+                else:
+                    process_list(output_path, ul_lines, os.path.splitext(other_bit)[0], base_dir, other_bit)
+                    ul_lines = []
+                    doing_list = False
+            else:
+                if line[0:1] == "#" and not doing_list and line[1:2] in [' ', '#']:
+                 #We got a header maybe
+                    if line[1:2] in [' ', '#']:
+                        header_depth = 0
+                        for c in line:
+                            if c == "#":
+                                header_depth += 1
+                        process_header(output_path, line, header_depth)
+                elif line[0:1] == "*":
+                    #We got a list
+                    doing_list = True
+                    ul_lines = []
+                    ul_lines.append(line)
+                else:
+                    if len(line) > 0:
+                        output = open(output_path, "a+")
+                        output.write("<p>")
+                        output.close()
+                        search_line_for_links(output_path, line, base_dir, os.path.splitext(other_bit)[0])
+                        output = open(output_path, "a+")
+                        output.write("</p>")
+                        output.close()
+                
+        output = open(output_path, "a+")
+        output.write(template.split("{{content}}")[1])
+        output.close()
+        #return
+
+def search_line_for_links(output_path, line, base_dir, page_title):
+    found_link = False
+    filename = output_path.replace(base_dir, '')
+    link_text = ""
+    found_tag = False
+    found_image = False
+    output = open(output_path, "a+")
+    for i in range(0, len(line)):
+        c = line[i]
+        if c == "[":
+            if line[i + 1] == "[":
+                found_link = True
+                for u in range(i + 2, len(line)):
+                    if line[u] == "]":
+                        process_link(output, link_text)
+                        break    
+
+                    else:
+                        link_text += line[u]
+        elif c == "#" and not line[i+1].isspace():
+            found_tag = True
+            link_text = ""
+            for u in range(i, len(line)):
+                if line[u] in ['\r','\n',' '] or u > len(line):
+                    #process_tag(output, filename, link_text)
+
+                    break    
+
+                else:
+                    link_text += line[u]
+            if len(link_text) > 0:
+                process_tag(output, page_title, filename, base_dir, link_text)
+        elif c == "<":
+            found_link = True
+            for u in range(i + 1, len(line)):
+                if line[u] == ">":
+                    process_external_link(output, link_text)
+                    break
+                else:
+                    link_text += line[u]
+        elif c == "!":
+            alt_text = ""
+            image_link = ""
+            if len(line) > (i + 1) and line[i + 1] == "[":
+                found_image = True
+                for u in range(i+2, len(line)):
+                    if line[u] == "]":
+                        break
+                    else:
+                        alt_text += line[u]
+                for u in range(i + len(alt_text) + 4, len(line)):
+                    if line[u] == ")":
+                        process_image(output, image_link, alt_text)
+                        break
+                    else:
+                        image_link += line[u]
+        elif found_tag and c in ['\r','\n',' ']:
+            found_tag = False
+        elif found_link and c in ["]", ">"]:
+            if line[i-1] in ["]", ">"]:
+                found_link = False
+        elif found_image and line[i-1] in [")"]:
+            found_image = False
+        elif not found_link and not found_tag and not found_image:
+            output.write(c)
+    output.close()
+
+def process_image(output, image_link, alt_text):
+    image_link = image_link.replace(":", "/")
+    if image_link[0] != "/":
+        image_link = "/" + image_link
+    output.write('<img src="' + image_link + '" alt="' + alt_text + '" title="' + alt_text + '"/>')
+
+def process_tag(output, page_title, parent_link, base_dir , tag_text):
+    base_dir = os.path.expanduser(base_dir)
+    output.write('<a class="tag" href="' + base_url + "/" + tag_text.replace("#","") + '.html">'+ tag_text+ '</a>')
+    output_path = base_url
+    other_bit = parent_link.replace(base_dir,"").replace(output_dir, "")
+    other_bit = os.path.splitext(other_bit)[0] + ".html"
+    output_path = os.path.join(output_path, other_bit)
+    if tag_text in list_of_tags:
+        list_of_tags[tag_text].append({"path": output_path, "title": page_title})
+    else:
+        list_of_tags[tag_text] = [{"path": output_path, "title": page_title}]
+
+def process_link(output, link_text):
+    link_text = link_text.replace(":","/")
+    if not "|" in link_text:
+        output.write('<a href="' + os.path.join(base_url,link_text) + '.html">' + link_text + '</a>')
+    else:
+        split_link = link_text.split("|")
+        output.write('<a href="' + os.path.join(base_url, split_link[0]) + '.html">' + split_link[1] + '</a>')
+
+def process_external_link(output, link_text):
+    output.write('<a href="' + link_text + '">' + link_text + '</a>')
+
+def process_list(output_path, lines, page_title, base_dir, other_bit):
+    o = open(output_path, "a+")
+    o.write("<ul>")
+    o.close()
+    for line in lines:
+        o = open(output_path, "a+")
+        o.write("<li>")
+        o.close()
+        #search_line_for_links(output_path,line[2:len(line)], page_title)
+        search_line_for_links(output_path, line[2:len(line)],  base_dir, os.path.splitext(other_bit)[0])
+        o = open(output_path, "a+")
+        o.write("</li>")
+        o.close()
+    o = open(output_path, "a+")
+    o.write("</ul>")
+    o.close()
+
+def process_header(output_path, line, header_depth):
+    o = open(output_path, "a+")
+    o.write("<h" + str(header_depth) + ">" + line[header_depth + 1: len(line)] + "</h" + str(header_depth) + ">")
+    o.close()
+
+# Construct an index page which lists the published notebooks 
+def build_index(base_dir):
+    base_dir = os.path.expanduser(base_dir)
+    list_of_notebooks = [] 
+    output_file = os.path.join(output_dir, "index.html")
+    template_file = open(template_location, "r")
+    template = template_file.read()
+    for directory in os.listdir(base_dir):
+        if os.path.isfile(os.path.join(base_dir, directory, ".publish")):
+            list_of_notebooks.append(directory)
+    o = open(output_file, "a+")
+    template = template.replace("{{breadcrumbs}}","")
+    template = template.replace('{{pagetitle}}', "rmgr's wiki")
+    o.write(template.split("{{content}}")[0])
+    o.write('<ul>')
+    for notebook in list_of_notebooks:
+        o.write('<li><a href="' + os.path.join(base_url, notebook, 'index.html') + '">📂 ' + notebook + '</a></li>')
+    o.write('</ul>')
+    o.write(template.split("{{content}}")[1])
+    o.close() 
+
+# Construct an index page for a given directory
+def build_directories(base_dir):
+    dir_list = []
+    base_dir = os.path.expanduser(base_dir)
+    for directory in os.listdir(base_dir):
+        for current_dir, subdirs, files in os.walk(os.path.join(base_dir, directory)):
+            dir_list.append(current_dir)
+    for directory in dir_list:
+        output_file = os.path.join(directory, "index.html")
+        o = open(output_file, "a+")
+        template_file = open(template_location, "r")
+        template = template_file.read()
+        split_path = splitall(directory.replace(base_dir,"")[1:])
+        breadcrumbs = '> <a href="/">/</a> '
+        for d in range(0,len(split_path)-1):
+            breadcrumbs += '> <a href="/' + "/".join(split_path[0:d + 1]) + '">' + split_path[d] + '</a> '
+        template = template.replace("{{breadcrumbs}}", breadcrumbs)
+        template = template.replace('{{pagetitle}}',split_path[len(split_path)-1])
+        subdir_list = os.listdir(directory)
+        o.write(template.split("{{content}}")[0])
+        o.write("<h1>" + split_path[len(split_path) - 1] + "</h1>")
+        o.write('<ul class="directory-listing">')
+        for subdir in subdir_list:
+            if subdir[0] != '.' and subdir != "index.html":
+                if not os.path.isfile(os.path.join(directory, subdir)):
+                    o.write('<li><a href="' + os.path.join(base_url,directory.replace(base_dir,''), subdir, "index.html") + '">📂 ' + subdir + '</a></li>')
+                else:
+        
+                    o.write('<li><a href="' + os.path.join(base_url,directory.replace(base_dir,''), subdir) + '">' + os.path.splitext(subdir)[0] + '</a></li>')
+        
+        o.write('</ul>')
+        o.write(template.split("{{content}}")[1])
+        o.close()
+
+def build_tag_pages(output_path):
+    for tag in list_of_tags.keys():
+        template_file = open(template_location, "r")
+        template = template_file.read()
+        template = template.replace("{{breadcrumbs}}","")
+        template = template.replace('{{pagetitle}}',tag)
+        f = open(os.path.join(output_path, tag.replace("#","") + ".html"), "a+")
+        f.write(template.split("{{content}}")[0])
+        for obj in list_of_tags[tag]:
+            f.write('<li><a href="' + obj["path"] + '">' + obj["title"] + '</a></li>')
+        f.write(template.split("{{content}}")[1])
+        f.close()
+
+class Handler(http.server.SimpleHTTPRequestHandler):
+    def __init__(self,*args,**kwargs):
+        super().__init__(*args, directory="tmp",**kwargs)
+
+def run_wiki(_output_dir, _base_url, serve, _template):
+    global output_dir
+    global base_url
+    global template_location 
+    template_location = _template
+    output_dir = _output_dir
+    base_url = _base_url
+    base_dir = "~/.nb"
+    if base_dir[len(base_dir)-1:len(base_dir)] != "/":
+        base_dir += "/"
+    files = load_files(base_dir)
+    make_temp_directory()
+    process_files(base_dir, files )
+    build_index(base_dir)
+    build_directories(output_dir)
+    build_tag_pages(output_dir)
+    if serve:
+        with socketserver.TCPServer(("",8111), Handler) as httpd:
+                print("serving at port 8111")
+                try: 
+                    httpd.serve_forever()
+                except KeyboardInterrupt:
+                    print('shutting down')
+
+                httpd.server_close()
+
+        
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Process nb directory tree and publish any notebooks containing a .publish file.')
+    parser.add_argument('-o', '--output', default="./tmp", type=str, help='The directory to dump the generated html')
+    parser.add_argument('-b', '--base-url', default="http://192.168.1.103:8111", type=str, help='The base url for internal links')
+    parser.add_argument('--serve', default=False, type=bool, help='Fire up a web server after generation to preview changes.')
+    parser.add_argument('-t', '--template', default="templates/default.html", type=str, help="Directory containing a template file.")
+    args = parser.parse_args()
+    run_wiki(args.output, args.base_url, args.serve, args.template)