mill.py/mill.py

# mill.py, Markdown interface for llama.cpp
# Copyright (C) 2024 unworriedsafari <unworriedsafari@tilde.club>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>

"""
## Adding support for other languages

To add support for another language:

1. Create a new Python module named `mill_lang_<language_id>` where all
   non-alphanumeric characters of `language_id` are replaced by underscores.
2. Implement a `parse` function similar to the one in `mill_lang_markdown.py`.
3. Add a docstring to the module. This docstring serves as the module's README.
4. Put your module anywhere on the Python path of `mill.py`.
5. When using the CLI interface, pass the `-l <language_id>` argument.
6. When using the CGI interface, pass the `language=<language_id>` query-string
   parameter.

If the environment variable `MILL_DEFAULT_LANGUAGE` is set to `<language_id>`,
`mill.py` uses the language by default.


## Adding support for other LLM engines

Adding support for another LLM engine is similar to adding support for another
language:

1. Create a new Python module named `mill_llm_<llm_id>` where all
   non-alphanumeric characters of `llm_id` are replaced by underscores.
2. Implement a `generate` function similar to the one in
   `mill_llm_llama_cpp.py`.
3. Add a docstring to the module. This docstring serves as the module's README.
4. Put your module anywhere on the Python path of `mill.py`.
5. When using the CLI interface, pass the `-e <llm_id>` argument.
6. When using the CGI interface, pass the `llm_engine=<llm_id>` query-string
   parameter.

If the environment variable `MILL_DEFAULT_LLM` is set to `<llm_id>`, `mill.py`
uses the LLM engine by default.


## Adding example documentation

It's possible to add example documentation for uses of specific combinations of
`language_id` and `llm_id`.

1. Create a new Python module named `mill_example_<language_id>_<llm_id>`.
2. Create a global `example` variable in it and give it a string value. This
   value is printed in the README below the 'Features' list.
3. Create a global `runnable_example` variable in it and give it a string
   value. This value is printed at the end of the README.

The `example` variable is pure documentation. On the other hand, the intent for
the `runnable_example` variable is to have some text that can be executed by
`mill.py`. It should turn the README into an executable document.
"""

import importlib, os, re


def default_language():
    return os.environ.get('MILL_DEFAULT_LANGUAGE', 'markdown')


def default_llm_engine():
    return os.environ.get('MILL_DEFAULT_LLM', 'llama.cpp')


def load_module(name):
    module = 'mill_' + re.sub(r'\W', '_', name).lower()
    return importlib.import_module(module)


def main(language, llm_engine, input_lines):
    with language.parse(input_lines) as p:
        if p.returncode != 0:
            return p.returncode

        with llm_engine.generate(p.llm_vars, p.prompt) as g:
            word = ''
            while True:
                char = g.read(1)
                word += char

                if not char:
                    if word:
                        p.print_generated_text(word)
                        word = ''
                    break

                if re.match(r'\W', char):
                    p.print_generated_text(word)
                    word = ''

            if g.returncode == 0:
                p.print_message_template()

            return g.returncode