bubble/utils.py

import datetime
import re
import time
import urllib.parse as urlparse


UTC = datetime.timezone.utc
GEMTEXT_MARKUP = re.compile(r'^(\s*=>\s*|\* |>\s*|##?#?)')
INNER_LINK_PREFIX = '— '


class GeminiError (Exception):
    def __init__(self, code, msg):
        super().__init__(msg)
        self.code = code


def unescape_ini_gemtext(src):
    unesc = []
    for line in src.split('\n'):
        line = line.strip()
        if line == '|':
            line = ''
        elif line.startswith('&&&'):
            line = '###' + line[3:]
        elif line.startswith('&&'):
            line = '##' + line[2:]
        elif line.startswith('&'):
            line = '#' + line[1:]
        unesc.append(line)
    return '\n'.join(unesc)


def is_valid_name(name):
    if len(name) < 2 or len(name) > 30:
        return False
    return re.match(r'^[\w-]+$', name) != None


def plural_s(i, suffix='s'):
    return '' if i == 1 else suffix


def parse_at_names(text) -> list:
    names = set()
    pattern = re.compile(r'@([\w-]+)')
    pos = 0
    while pos < len(text):
        found = pattern.search(text, pos)
        if not found: break
        names.add(found[1].lower())
        pos = found.end()
    return list(names)


def parse_likely_commit_hashes(text) -> list:
    hashes = set()
    pattern = re.compile(r'\b[0-9a-fA-F]{7,}\b')
    pos = 0
    while pos < len(text):
        found = pattern.search(text, pos)
        if not found: break
        hashes.add(found[0].lower())
        pos = found.end()
    return list(hashes)


def clean_text(text):
    # Clean up the text: ensure that preformatted is closed.
    pre = False
    for line in text.split('\n'):
        if line[:3] == '```':
            pre = not pre
    if pre:
        # Close the preformatted block.
        if not text.endswith('\n'):
            text += '\n'
        text += '```'
    return text.rstrip()


def clean_title(title):
    # Strip `=>` and other Gemini syntax.
    cleaned = []
    pre = False
    unlabeled_link_pattern = re.compile(r'(\w+://[^ ]+) — \1')
    for line in title.split('\n'):
        if line[:3] == '```':
            if not pre:
                pre_label = line[3:].strip()
                if len(pre_label) == 0:
                    pre_label = 'preformatted'
                line = f'[{pre_label}]'
                cleaned.append(line)
            pre = not pre
            continue
        if pre:
            continue
        found = GEMTEXT_MARKUP.match(line)
        if found:
            line = line[found.end():]
        line = unlabeled_link_pattern.sub(r'\1', line)
        line = line.replace('\t', ' ')
        cleaned.append(line)
    title = ' '.join(cleaned).strip()
    return title


def clean_tinylog(text):
    # Clean it up as per Tinylog specification.
    clean = []
    pre = False
    for line in text.split('\n'):
        if line.startswith('```'):
            clean.append(line)
            pre = not pre
            continue
        if pre:
            clean.append(line)
            continue
        m = re.search(r'^(##?)[^#]', line) # only level 3 headings allowed
        if m:
            line = '###' + line[len(m[1]):]
        clean.append(line)
    return '\n'.join(clean)


def prefix_links(src, prefix):
    """Add a prefix to link labels."""
    if not prefix:
        return src
    lines = []
    pattern = re.compile(r'^\s*=>\s*([^ ]+)(\s+(.*))?$')
    for line in src.split('\n'):
        m = pattern.match(line)
        if m:
            label = m[3].strip() if m[3] and len(m[3]) else ''
            if len(label) == 0:
                label = m[1]
                # Omit gemini scheme.
                if label.startswith('gemini://'):
                    label = label[9:]
            line = f'=> {m[1]} {prefix}{label}'
        lines.append(line)
    return '\n'.join(lines)


def shorten_text(text, n):
    """Truncate and cut at white or word boundary."""
    text = text[:n]
    if len(text) == n:
        if text[-1] == ' ':
            return text.strip()
        m = re.search(r'[\w,.]+$', text)
        if m:
            return text[:m.start()].rstrip() + '...'
    return text.strip()


def time_delta_text(sec, date_ts, suffix='ago', now='Now',
                    date_prefix='', date_fmt='%Y-%m-%d', date_sep=' · '):
    if sec < 2:
        return now
    if sec < 60:
        return f'{sec} seconds {suffix}'
    mins = int(sec / 60)
    if sec < 3600:
        return f'{mins} minute{plural_s(mins)} {suffix}'
    hours = int(sec / 3600)
    if hours <= 24:
        return f'{hours} hour{plural_s(hours)} {suffix}'
    days = round(sec / 3600 / 24)
    dt = datetime.datetime.fromtimestamp(date_ts, UTC)
    age = date_prefix + dt.strftime(date_fmt)
    if days < 14:
        return age + f'{date_sep}{days} day{plural_s(days)} {suffix}'
    weeks = round(days / 7)
    if weeks <= 8:
        return age + f'{date_sep}{weeks} week{plural_s(weeks)} {suffix}'
    months = round(days / (365 / 12)) # average month length
    if months < 12:
        return age + f'{date_sep}{months} month{plural_s(months)} {suffix}'
    years = round(days / 365)
    return age + f'{date_sep}{years} year{plural_s(years)} {suffix}'


def ago_text(ts, suffix='ago', now='Now'):
    sec = max(0, int(time.time()) - ts)
    return time_delta_text(sec, ts, suffix, now)


def is_empty_query(req):
    return req.query == None or len(req.query) == 0


def clean_query(req):
    if req.query == None: return ''
    return clean_text(urlparse.unquote(req.query)).strip()


def nonzero(value):
    return 1 if value else 0


def parse_link_segment_query(req) -> tuple:
    if req.query == None:
        return '', ''
    q = urlparse.unquote(req.query).replace('\n', ' ')
    found = re.match(r'^\s*(=>)?\s*([^\s]+)(\s+(.+))?\s*$', q)
    if not found:
        raise GeminiError(59, 'Invalid link syntax (enter URL followed by label, separated with space)')
    seg_url = found.group(2)
    if '://' not in seg_url:
        seg_url = 'gemini://' + seg_url
    parsed = urlparse.urlparse(seg_url)
    if not parsed.scheme or not parsed.netloc:
        raise GeminiError(59, 'Invalid URL')
    if found[4]:
        seg_text = clean_title(found[4])
    else:
        seg_text = ''
    return seg_url, seg_text


def form_link(url_label: tuple):
    url, label = url_label
    if len(url) and len(label):
        return url + ' ' + label
    if len(label) == 0:
        return url
    return ''
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`import datetime`
			`import re`
			`import time`
			`import urllib.parse as urlparse`

Detect and link to Git commits mentioned in issues IssueID #6 2023-05-26 10:21:57 +00:00
Compatibility with older versions of Python 2023-05-13 09:37:06 +00:00			`UTC = datetime.timezone.utc`
Detect and link to Git commits mentioned in issues IssueID #6 2023-05-26 10:21:57 +00:00			`GEMTEXT_MARKUP = re.compile(r'^(\s=>\s\|\* \|>\s*\|##?#?)')`
Added a variable for the inner link label prefix Configurability, perhaps? 2023-05-26 16:07:53 +00:00			`INNER_LINK_PREFIX = '— '`
Compatibility with older versions of Python 2023-05-13 09:37:06 +00:00
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00
			`class GeminiError (Exception):`
			`def __init__(self, code, msg):`
			`super().__init__(msg)`
			`self.code = code`


Added option for a private mode When the "frontpage" configuration value is defined, it will be shown to unregistered users instead of regular content. IssueID #65 2023-05-27 06:13:32 +00:00			`def unescape_ini_gemtext(src):`
			`unesc = []`
			`for line in src.split('\n'):`
			`line = line.strip()`
			`if line == '\|':`
			`line = ''`
			`elif line.startswith('&&&'):`
			`line = '###' + line[3:]`
			`elif line.startswith('&&'):`
			`line = '##' + line[2:]`
			`elif line.startswith('&'):`
			`line = '#' + line[1:]`
			`unesc.append(line)`
			`return '\n'.join(unesc)`


Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`def is_valid_name(name):`
			`if len(name) < 2 or len(name) > 30:`
			`return False`
			`return re.match(r'^[\w-]+$', name) != None`


Fixed single quotes in search terms 2023-05-21 11:54:27 +00:00			`def plural_s(i, suffix='s'):`
			`return '' if i == 1 else suffix`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00

			`def parse_at_names(text) -> list:`
Added notification history and reduced redundant notifying IssueID #59 IssueID #40 2023-05-22 06:36:32 +00:00			`names = set()`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`pattern = re.compile(r'@([\w-]+)')`
			`pos = 0`
			`while pos < len(text):`
			`found = pattern.search(text, pos)`
			`if not found: break`
Added notification history and reduced redundant notifying IssueID #59 IssueID #40 2023-05-22 06:36:32 +00:00			`names.add(found[1].lower())`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`pos = found.end()`
Added notification history and reduced redundant notifying IssueID #59 IssueID #40 2023-05-22 06:36:32 +00:00			`return list(names)`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00

Detect and link to Git commits mentioned in issues IssueID #6 2023-05-26 10:21:57 +00:00			`def parse_likely_commit_hashes(text) -> list:`
			`hashes = set()`
			`pattern = re.compile(r'\b[0-9a-fA-F]{7,}\b')`
			`pos = 0`
			`while pos < len(text):`
			`found = pattern.search(text, pos)`
			`if not found: break`
			`hashes.add(found[0].lower())`
			`pos = found.end()`
			`return list(hashes)`
Basic search; various cleanups; Bubble version; UI improvements 2023-05-08 19:39:56 +00:00
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00
Basic search; various cleanups; Bubble version; UI improvements 2023-05-08 19:39:56 +00:00			`def clean_text(text):`
			`# Clean up the text: ensure that preformatted is closed.`
			`pre = False`
			`for line in text.split('\n'):`
			if line[:3] == '```':
			`pre = not pre`
			`if pre:`
			`# Close the preformatted block.`
			`if not text.endswith('\n'):`
			`text += '\n'`
			text += '```'
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`return text.rstrip()`


			`def clean_title(title):`
			# Strip `=>` and other Gemini syntax.
			`cleaned = []`
			`pre = False`
Commits, issue cross-references, various fixes 2023-05-11 18:44:22 +00:00			`unlabeled_link_pattern = re.compile(r'(\w+://[^ ]+) — \1')`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`for line in title.split('\n'):`
			if line[:3] == '```':
			`if not pre:`
			`pre_label = line[3:].strip()`
			`if len(pre_label) == 0:`
			`pre_label = 'preformatted'`
			`line = f'[{pre_label}]'`
			`cleaned.append(line)`
			`pre = not pre`
			`continue`
			`if pre:`
			`continue`
Basic search; various cleanups; Bubble version; UI improvements 2023-05-08 19:39:56 +00:00			`found = GEMTEXT_MARKUP.match(line)`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`if found:`
			`line = line[found.end():]`
Commits, issue cross-references, various fixes 2023-05-11 18:44:22 +00:00			`line = unlabeled_link_pattern.sub(r'\1', line)`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`line = line.replace('\t', ' ')`
			`cleaned.append(line)`
			`title = ' '.join(cleaned).strip()`
			`return title`


Tinylog formatting cleanup 2023-05-09 19:48:35 +00:00			`def clean_tinylog(text):`
			`# Clean it up as per Tinylog specification.`
			`clean = []`
			`pre = False`
			`for line in text.split('\n'):`
			if line.startswith('```'):
			`clean.append(line)`
			`pre = not pre`
			`continue`
			`if pre:`
			`clean.append(line)`
			`continue`
Tinylog: Tweaked formatting, added "View comments" links 2023-05-22 17:19:05 +00:00			`m = re.search(r'^(##?)[^#]', line) # only level 3 headings allowed`
			`if m:`
			`line = '###' + line[len(m[1]):]`
Tinylog formatting cleanup 2023-05-09 19:48:35 +00:00			`clean.append(line)`
			`return '\n'.join(clean)`


Adjusted link styling; prefix link labels inside comments 2023-05-22 18:40:13 +00:00			`def prefix_links(src, prefix):`
			`"""Add a prefix to link labels."""`
			`if not prefix:`
			`return src`
			`lines = []`
			`pattern = re.compile(r'^\s=>\s([^ ]+)(\s+(.*))?$')`
			`for line in src.split('\n'):`
			`m = pattern.match(line)`
			`if m:`
			`label = m[3].strip() if m[3] and len(m[3]) else ''`
			`if len(label) == 0:`
			`label = m[1]`
			`# Omit gemini scheme.`
			`if label.startswith('gemini://'):`
			`label = label[9:]`
			`line = f'=> {m[1]} {prefix}{label}'`
			`lines.append(line)`
			`return '\n'.join(lines)`


Working on Gempub export Still needs monthly compilation pages for a more book-like end result. 2023-05-09 12:30:23 +00:00			`def shorten_text(text, n):`
			`"""Truncate and cut at white or word boundary."""`
			`text = text[:n]`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`if len(text) == n:`
Working on Gempub export Still needs monthly compilation pages for a more book-like end result. 2023-05-09 12:30:23 +00:00			`if text[-1] == ' ':`
			`return text.strip()`
			`m = re.search(r'[\w,.]+$', text)`
			`if m:`
Tinylog formatting cleanup 2023-05-09 19:48:35 +00:00			`return text[:m.start()].rstrip() + '...'`
Working on Gempub export Still needs monthly compilation pages for a more book-like end result. 2023-05-09 12:30:23 +00:00			`return text.strip()`


Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`def time_delta_text(sec, date_ts, suffix='ago', now='Now',`
			`date_prefix='', date_fmt='%Y-%m-%d', date_sep=' · '):`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`if sec < 2:`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`return now`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`if sec < 60:`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`return f'{sec} seconds {suffix}'`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`mins = int(sec / 60)`
			`if sec < 3600:`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`return f'{mins} minute{plural_s(mins)} {suffix}'`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00			`hours = int(sec / 3600)`
			`if hours <= 24:`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`return f'{hours} hour{plural_s(hours)} {suffix}'`
Rounding of days, weeks, months, and years in the "ago" text 2023-05-27 07:07:53 +00:00			`days = round(sec / 3600 / 24)`
Compatibility with older versions of Python 2023-05-13 09:37:06 +00:00			`dt = datetime.datetime.fromtimestamp(date_ts, UTC)`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`age = date_prefix + dt.strftime(date_fmt)`
			`if days < 14:`
			`return age + f'{date_sep}{days} day{plural_s(days)} {suffix}'`
Rounding of days, weeks, months, and years in the "ago" text 2023-05-27 07:07:53 +00:00			`weeks = round(days / 7)`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`if weeks <= 8:`
			`return age + f'{date_sep}{weeks} week{plural_s(weeks)} {suffix}'`
Rounding of days, weeks, months, and years in the "ago" text 2023-05-27 07:07:53 +00:00			`months = round(days / (365 / 12)) # average month length`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`if months < 12:`
			`return age + f'{date_sep}{months} month{plural_s(months)} {suffix}'`
Rounding of days, weeks, months, and years in the "ago" text 2023-05-27 07:07:53 +00:00			`years = round(days / 365)`
Book-like Gempub exporting 2023-05-09 18:33:07 +00:00			`return age + f'{date_sep}{years} year{plural_s(years)} {suffix}'`


			`def ago_text(ts, suffix='ago', now='Now'):`
			`sec = max(0, int(time.time()) - ts)`
			`return time_delta_text(sec, ts, suffix, now)`
Refactor: Split request handling per domain 2023-05-06 13:29:21 +00:00

			`def is_empty_query(req):`
			`return req.query == None or len(req.query) == 0`


			`def clean_query(req):`
			`if req.query == None: return ''`
			`return clean_text(urlparse.unquote(req.query)).strip()`


			`def nonzero(value):`
			`return 1 if value else 0`


			`def parse_link_segment_query(req) -> tuple:`
			`if req.query == None:`
			`return '', ''`
			`q = urlparse.unquote(req.query).replace('\n', ' ')`
			`found = re.match(r'^\s(=>)?\s([^\s]+)(\s+(.+))?\s*$', q)`
			`if not found:`
			`raise GeminiError(59, 'Invalid link syntax (enter URL followed by label, separated with space)')`
			`seg_url = found.group(2)`
			`if '://' not in seg_url:`
			`seg_url = 'gemini://' + seg_url`
			`parsed = urlparse.urlparse(seg_url)`
			`if not parsed.scheme or not parsed.netloc:`
			`raise GeminiError(59, 'Invalid URL')`
			`if found[4]:`
			`seg_text = clean_title(found[4])`
			`else:`
			`seg_text = ''`
			`return seg_url, seg_text`


			`def form_link(url_label: tuple):`
			`url, label = url_label`
			`if len(url) and len(label):`
			`return url + ' ' + label`
			`if len(label) == 0:`
			`return url`
Compatibility with older versions of Python 2023-05-13 09:37:06 +00:00			`return ''`