From 68709c850e28ac0243eb216ff78d43b2ce6a6287 Mon Sep 17 00:00:00 2001 From: rmgr Date: Sat, 12 Feb 2022 20:23:04 +1030 Subject: [PATCH] Add code and a rambly readme --- README.md | 3 + nb_wiki.py | 351 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100644 nb_wiki.py diff --git a/README.md b/README.md index 4ac55f4..afc0f43 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ # nb_wiki.py +The bespoke and very brittle software powering https://wiki.rmgr.dev + +It's designed as a wrapper for the wonderful [nb](https://github.com/xwmx/nb) CLI notebook application. To use it, simply add a file called .publish to any notebooks you want to publish and then run the program and pray it works. It's what I do! diff --git a/nb_wiki.py b/nb_wiki.py new file mode 100644 index 0000000..32a09cb --- /dev/null +++ b/nb_wiki.py @@ -0,0 +1,351 @@ +import os +import argparse +import http.server +import socketserver +import shutil +list_of_tags = {} +copy_extensions = [".gif",".jpg",".png", ".html"] + +output_dir = "" +base_url = "" +template_location = "" +def splitall(path): + allparts = [] + while 1: + parts = os.path.split(path) + if parts[0] == path: # sentinel for absolute paths + allparts.insert(0, parts[0]) + break + elif parts[1] == path: # sentinel for relative paths + allparts.insert(0, parts[1]) + break + else: + path = parts[0] + allparts.insert(0, parts[1]) + return allparts + +def load_files(directory): + file_list = [] + directory = os.path.expanduser(directory) + for d in os.listdir(directory): + if os.path.isfile(os.path.join(directory, d, ".publish")): + for current_dir, subdirs, files in os.walk(os.path.join(directory, d)): + for f in files: + extension = os.path.splitext(f)[1] + if extension in [ ".md"] + copy_extensions: + file_list.append(os.path.join(current_dir, f)) + return file_list + +def make_temp_directory(): + if os.path.isdir(output_dir): + shutil.rmtree(output_dir) + if not os.path.exists(output_dir): + os.mkdir(output_dir) + +def process_files(base_dir, files): + global output_dir + global base_url + base_dir = os.path.expanduser(base_dir) + for file_path in files: + if os.path.splitext(file_path)[1] in copy_extensions: + other_bit = file_path.replace(base_dir, "") + output_path = os.path.join(output_dir, other_bit) + _p,_f = os.path.split(output_path) + os.makedirs(_p, exist_ok=True) + shutil.copy(file_path, output_path) + continue + f = open(file_path, "r") + temp_title = f.readline() + f.close() + other_bit = "" + if temp_title[0] != '#': + other_bit = file_path.replace(base_dir,"") + other_bit = os.path.splitext(other_bit)[0] + ".html" + else: + other_bit = file_path.replace(base_dir,"") + _p,_f = os.path.split(file_path) + other_bit = other_bit.replace(_f,temp_title[2:len(temp_title)].strip() + ".html") + #other_bit = file_path.replace(base_dir,"") + #other_bit = os.path.splitext(other_bit)[0] + ".html" + output_path = os.path.join(output_dir, other_bit) + path, fname = os.path.split(output_path) + os.makedirs(path, exist_ok=True) + + template_file = open(template_location, "r") + template = template_file.read() + split_path = splitall(file_path.replace(base_dir,"")) + #breadcrumbs = "" + breadcrumbs = '> / ' + for d in range(0,len(split_path)-1): + breadcrumbs += '> ' + split_path[d] + ' ' + template = template.replace("{{breadcrumbs}}", breadcrumbs) + template = template.replace('{{pagetitle}}',os.path.splitext(fname)[0]) + f = open(file_path, "r") + file_content = f.read() + ul_lines = [] + doing_list = False + output = open(output_path, "w+") + output.write(template.split("{{content}}")[0]) + output.close() + for line in file_content.split("\n"): + if doing_list: + if line[0:1] == "*": + ul_lines.append(line) + else: + process_list(output_path, ul_lines, os.path.splitext(other_bit)[0], base_dir, other_bit) + ul_lines = [] + doing_list = False + else: + if line[0:1] == "#" and not doing_list and line[1:2] in [' ', '#']: + #We got a header maybe + if line[1:2] in [' ', '#']: + header_depth = 0 + for c in line: + if c == "#": + header_depth += 1 + process_header(output_path, line, header_depth) + elif line[0:1] == "*": + #We got a list + doing_list = True + ul_lines = [] + ul_lines.append(line) + else: + if len(line) > 0: + output = open(output_path, "a+") + output.write("

") + output.close() + search_line_for_links(output_path, line, base_dir, os.path.splitext(other_bit)[0]) + output = open(output_path, "a+") + output.write("

") + output.close() + + output = open(output_path, "a+") + output.write(template.split("{{content}}")[1]) + output.close() + #return + +def search_line_for_links(output_path, line, base_dir, page_title): + found_link = False + filename = output_path.replace(base_dir, '') + link_text = "" + found_tag = False + found_image = False + output = open(output_path, "a+") + for i in range(0, len(line)): + c = line[i] + if c == "[": + if line[i + 1] == "[": + found_link = True + for u in range(i + 2, len(line)): + if line[u] == "]": + process_link(output, link_text) + break + + else: + link_text += line[u] + elif c == "#" and not line[i+1].isspace(): + found_tag = True + link_text = "" + for u in range(i, len(line)): + if line[u] in ['\r','\n',' '] or u > len(line): + #process_tag(output, filename, link_text) + + break + + else: + link_text += line[u] + if len(link_text) > 0: + process_tag(output, page_title, filename, base_dir, link_text) + elif c == "<": + found_link = True + for u in range(i + 1, len(line)): + if line[u] == ">": + process_external_link(output, link_text) + break + else: + link_text += line[u] + elif c == "!": + alt_text = "" + image_link = "" + if len(line) > (i + 1) and line[i + 1] == "[": + found_image = True + for u in range(i+2, len(line)): + if line[u] == "]": + break + else: + alt_text += line[u] + for u in range(i + len(alt_text) + 4, len(line)): + if line[u] == ")": + process_image(output, image_link, alt_text) + break + else: + image_link += line[u] + elif found_tag and c in ['\r','\n',' ']: + found_tag = False + elif found_link and c in ["]", ">"]: + if line[i-1] in ["]", ">"]: + found_link = False + elif found_image and line[i-1] in [")"]: + found_image = False + elif not found_link and not found_tag and not found_image: + output.write(c) + output.close() + +def process_image(output, image_link, alt_text): + image_link = image_link.replace(":", "/") + if image_link[0] != "/": + image_link = "/" + image_link + output.write('' + alt_text + '') + +def process_tag(output, page_title, parent_link, base_dir , tag_text): + base_dir = os.path.expanduser(base_dir) + output.write(''+ tag_text+ '') + output_path = base_url + other_bit = parent_link.replace(base_dir,"").replace(output_dir, "") + other_bit = os.path.splitext(other_bit)[0] + ".html" + output_path = os.path.join(output_path, other_bit) + if tag_text in list_of_tags: + list_of_tags[tag_text].append({"path": output_path, "title": page_title}) + else: + list_of_tags[tag_text] = [{"path": output_path, "title": page_title}] + +def process_link(output, link_text): + link_text = link_text.replace(":","/") + if not "|" in link_text: + output.write('' + link_text + '') + else: + split_link = link_text.split("|") + output.write('' + split_link[1] + '') + +def process_external_link(output, link_text): + output.write('' + link_text + '') + +def process_list(output_path, lines, page_title, base_dir, other_bit): + o = open(output_path, "a+") + o.write("") + o.close() + +def process_header(output_path, line, header_depth): + o = open(output_path, "a+") + o.write("" + line[header_depth + 1: len(line)] + "") + o.close() + +# Construct an index page which lists the published notebooks +def build_index(base_dir): + base_dir = os.path.expanduser(base_dir) + list_of_notebooks = [] + output_file = os.path.join(output_dir, "index.html") + template_file = open(template_location, "r") + template = template_file.read() + for directory in os.listdir(base_dir): + if os.path.isfile(os.path.join(base_dir, directory, ".publish")): + list_of_notebooks.append(directory) + o = open(output_file, "a+") + template = template.replace("{{breadcrumbs}}","") + template = template.replace('{{pagetitle}}', "rmgr's wiki") + o.write(template.split("{{content}}")[0]) + o.write('') + o.write(template.split("{{content}}")[1]) + o.close() + +# Construct an index page for a given directory +def build_directories(base_dir): + dir_list = [] + base_dir = os.path.expanduser(base_dir) + for directory in os.listdir(base_dir): + for current_dir, subdirs, files in os.walk(os.path.join(base_dir, directory)): + dir_list.append(current_dir) + for directory in dir_list: + output_file = os.path.join(directory, "index.html") + o = open(output_file, "a+") + template_file = open(template_location, "r") + template = template_file.read() + split_path = splitall(directory.replace(base_dir,"")[1:]) + breadcrumbs = '> / ' + for d in range(0,len(split_path)-1): + breadcrumbs += '> ' + split_path[d] + ' ' + template = template.replace("{{breadcrumbs}}", breadcrumbs) + template = template.replace('{{pagetitle}}',split_path[len(split_path)-1]) + subdir_list = os.listdir(directory) + o.write(template.split("{{content}}")[0]) + o.write("

" + split_path[len(split_path) - 1] + "

") + o.write('') + o.write(template.split("{{content}}")[1]) + o.close() + +def build_tag_pages(output_path): + for tag in list_of_tags.keys(): + template_file = open(template_location, "r") + template = template_file.read() + template = template.replace("{{breadcrumbs}}","") + template = template.replace('{{pagetitle}}',tag) + f = open(os.path.join(output_path, tag.replace("#","") + ".html"), "a+") + f.write(template.split("{{content}}")[0]) + for obj in list_of_tags[tag]: + f.write('
  • ' + obj["title"] + '
  • ') + f.write(template.split("{{content}}")[1]) + f.close() + +class Handler(http.server.SimpleHTTPRequestHandler): + def __init__(self,*args,**kwargs): + super().__init__(*args, directory="tmp",**kwargs) + +def run_wiki(_output_dir, _base_url, serve, _template): + global output_dir + global base_url + global template_location + template_location = _template + output_dir = _output_dir + base_url = _base_url + base_dir = "~/.nb" + if base_dir[len(base_dir)-1:len(base_dir)] != "/": + base_dir += "/" + files = load_files(base_dir) + make_temp_directory() + process_files(base_dir, files ) + build_index(base_dir) + build_directories(output_dir) + build_tag_pages(output_dir) + if serve: + with socketserver.TCPServer(("",8111), Handler) as httpd: + print("serving at port 8111") + try: + httpd.serve_forever() + except KeyboardInterrupt: + print('shutting down') + + httpd.server_close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process nb directory tree and publish any notebooks containing a .publish file.') + parser.add_argument('-o', '--output', default="./tmp", type=str, help='The directory to dump the generated html') + parser.add_argument('-b', '--base-url', default="http://192.168.1.103:8111", type=str, help='The base url for internal links') + parser.add_argument('--serve', default=False, type=bool, help='Fire up a web server after generation to preview changes.') + parser.add_argument('-t', '--template', default="templates/default.html", type=str, help="Directory containing a template file.") + args = parser.parse_args() + run_wiki(args.output, args.base_url, args.serve, args.template)