teamwikibot/create_index.py

34 lines
816 B
Python

from whoosh.fields import Schema, ID, TEXT, NUMERIC
from whoosh import index
from bs4 import BeautifulSoup
from markdown import markdown
import os, os.path, frontmatter
import sys
def textify(md):
return BeautifulSoup(markdown(md),"lxml").text
schema = Schema(
url = ID(stored=True),
title = ID(stored=True),
text = TEXT
)
if not os.path.exists("_index"):
os.mkdir("_index")
_index = index.create_in("_index",schema)
writer = _index.writer()
pages = os.listdir("/var/www/tilde.team/wiki/pages")
for page in pages:
with open(os.path.join("/var/www/tilde.team/wiki/pages",page)) as f:
post = frontmatter.load(f)
url = "https://tilde.wiki/"+os.path.splitext(page)[0]
title = post["title"]
text = textify(post.content)
writer.add_document(title=title, text=text, url=url)
writer.commit(optimize=True)