106 lines
3.7 KiB
Python
106 lines
3.7 KiB
Python
#! /usr/bin/env python
|
|
#
|
|
# mill.py, Markdown interface for llama.cpp
|
|
# Copyright (C) 2024 unworriedsafari <unworriedsafari@tilde.club>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>
|
|
|
|
r"""
|
|
## CGI install + usage
|
|
|
|
1. Clone the Git repo or download a release tarball and unpack it.
|
|
2. Set the environment variable `MILL_LLAMACPP_MAIN` to the path of
|
|
`llama.cpp/main` or your wrapper around it.
|
|
3. Start your CGI web server.
|
|
|
|
```bash
|
|
mkdir -pv public_html/cgi-bin
|
|
cp -v mill_cgi.py public_html/cgi-bin
|
|
cp -v mill.py public_html/cgi-bin
|
|
cp -v mill_readme.py public_html/cgi-bin
|
|
cp -v mill_lang_markdown.py public_html/cgi-bin
|
|
cp -v mill_llm_llama_cpp.py public_html/cgi-bin
|
|
cp -v mill_example_markdown_llama_cpp.py public_html/cgi-bin
|
|
chmod +x public_html/cgi-bin/mill_cgi.py
|
|
export MILL_LLAMACPP_MAIN=/path/to/llama.cpp/main
|
|
python -m http.server --cgi -d public_html
|
|
```
|
|
|
|
`mill.py` doesn't come with a web interface, but it should work well with
|
|
generic HTTP tools. Here is an example `curl` invocation:
|
|
|
|
```bash
|
|
cat document.md | curl -s -N -X POST --data-binary @- \
|
|
--dump-header /dev/null http://host/path/to/cgi-bin/mill_cgi.py
|
|
```
|
|
|
|
On Android, I can recommend [HTTP
|
|
Shortcuts](https://github.com/Waboodoo/HTTP-Shortcuts). You can for example use
|
|
it to send your phone's clipboard directly to the CGI tool and copy the HTTP
|
|
response automatically back to the clipboard.
|
|
|
|
Use the `language` and `llm_engine` query-string parameters to select a
|
|
different language or LLM engine.
|
|
"""
|
|
|
|
import contextlib, io, mill, mill_readme, os, sys, urllib.parse
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if os.environ['REQUEST_METHOD'].upper() not in ['GET','POST']:
|
|
print('Content-type: text/plain')
|
|
print()
|
|
print('Need either a GET or POST request.')
|
|
exit(1)
|
|
|
|
args = urllib.parse.parse_qs(os.environ.get('QUERY_STRING',''))
|
|
|
|
language = args.get('language', mill.default_language())
|
|
llm_engine = args.get('llm_engine', mill.default_llm_engine())
|
|
|
|
if os.environ['REQUEST_METHOD'].upper() == 'GET':
|
|
print('Content-type: text/markdown')
|
|
print()
|
|
mill_readme.print_readme(language, llm_engine)
|
|
exit(0)
|
|
|
|
language_mod = mill.load_module(f'lang_{language}')
|
|
llm_engine_mod = mill.load_module(f'llm_{llm_engine}')
|
|
|
|
input_lines = sys.stdin.buffer.read(
|
|
int(os.environ['CONTENT_LENGTH'])).decode()
|
|
|
|
# match behavior of sys.stdin.readlines
|
|
input_lines = input_lines.split(os.linesep)
|
|
input_lines = [line + os.linesep for line in input_lines]
|
|
if input_lines and input_lines[-1] == os.linesep:
|
|
# iff the original input ended with a newline, an extra blank line was
|
|
# just added
|
|
input_lines = input_lines[:-1]
|
|
|
|
with contextlib.redirect_stderr(io.StringIO()) as main_err:
|
|
try:
|
|
print(f'Content-type: {os.environ["CONTENT_TYPE"]}')
|
|
print()
|
|
exit_code = mill.main(language_mod, llm_engine_mod, input_lines)
|
|
except Exception as e:
|
|
print('EXCEPTION', file=sys.stderr)
|
|
print(e, file=sys.stderr)
|
|
exit_code = 1
|
|
|
|
if exit_code:
|
|
print(main_err.getvalue())
|
|
|
|
exit(exit_code)
|