from whoosh.fields import Schema, ID, TEXT, NUMERIC from whoosh import index from bs4 import BeautifulSoup from markdown import markdown import os, os.path, frontmatter import sys def textify(md): return BeautifulSoup(markdown(md),"lxml").text schema = Schema( url = ID(stored=True), title = ID(stored=True), text = TEXT ) if not os.path.exists("_index"): os.mkdir("_index") _index = index.create_in("_index",schema) writer = _index.writer() pages = os.listdir("/var/www/tilde.team/wiki/pages") for page in pages: with open(os.path.join("/var/www/tilde.team/wiki/pages",page)) as f: post = frontmatter.load(f) url = "https://tilde.team/wiki/"+os.path.splitext(page)[0] title = post["title"] text = textify(post.content) writer.add_document(title=title, text=text, url=url) writer.commit(optimize=True)