bubble/utils.py

232 lines
6.3 KiB
Python
Raw Normal View History

import datetime
import re
import time
import urllib.parse as urlparse
UTC = datetime.timezone.utc
GEMTEXT_MARKUP = re.compile(r'^(\s*=>\s*|\* |>\s*|##?#?)')
INNER_LINK_PREFIX = ''
class GeminiError (Exception):
def __init__(self, code, msg):
super().__init__(msg)
self.code = code
def unescape_ini_gemtext(src):
unesc = []
for line in src.split('\n'):
line = line.strip()
if line == '|':
line = ''
elif line.startswith('&&&'):
line = '###' + line[3:]
elif line.startswith('&&'):
line = '##' + line[2:]
elif line.startswith('&'):
line = '#' + line[1:]
unesc.append(line)
return '\n'.join(unesc)
def is_valid_name(name):
if len(name) < 2 or len(name) > 30:
return False
return re.match(r'^[\w-]+$', name) != None
2023-05-21 11:54:27 +00:00
def plural_s(i, suffix='s'):
return '' if i == 1 else suffix
def parse_at_names(text) -> list:
names = set()
pattern = re.compile(r'@([\w-]+)')
pos = 0
while pos < len(text):
found = pattern.search(text, pos)
if not found: break
names.add(found[1].lower())
pos = found.end()
return list(names)
def parse_likely_commit_hashes(text) -> list:
hashes = set()
pattern = re.compile(r'\b[0-9a-fA-F]{7,}\b')
pos = 0
while pos < len(text):
found = pattern.search(text, pos)
if not found: break
hashes.add(found[0].lower())
pos = found.end()
return list(hashes)
def clean_text(text):
# Clean up the text: ensure that preformatted is closed.
pre = False
for line in text.split('\n'):
if line[:3] == '```':
pre = not pre
if pre:
# Close the preformatted block.
if not text.endswith('\n'):
text += '\n'
text += '```'
return text.rstrip()
def clean_title(title):
# Strip `=>` and other Gemini syntax.
cleaned = []
pre = False
unlabeled_link_pattern = re.compile(r'(\w+://[^ ]+) — \1')
for line in title.split('\n'):
if line[:3] == '```':
if not pre:
pre_label = line[3:].strip()
if len(pre_label) == 0:
pre_label = 'preformatted'
line = f'[{pre_label}]'
cleaned.append(line)
pre = not pre
continue
if pre:
continue
found = GEMTEXT_MARKUP.match(line)
if found:
line = line[found.end():]
line = unlabeled_link_pattern.sub(r'\1', line)
line = line.replace('\t', ' ')
cleaned.append(line)
title = ' '.join(cleaned).strip()
return title
2023-05-09 19:48:35 +00:00
def clean_tinylog(text):
# Clean it up as per Tinylog specification.
clean = []
pre = False
for line in text.split('\n'):
if line.startswith('```'):
clean.append(line)
pre = not pre
continue
if pre:
clean.append(line)
continue
m = re.search(r'^(##?)[^#]', line) # only level 3 headings allowed
if m:
line = '###' + line[len(m[1]):]
2023-05-09 19:48:35 +00:00
clean.append(line)
return '\n'.join(clean)
def prefix_links(src, prefix):
"""Add a prefix to link labels."""
if not prefix:
return src
lines = []
pattern = re.compile(r'^\s*=>\s*([^ ]+)(\s+(.*))?$')
for line in src.split('\n'):
m = pattern.match(line)
if m:
label = m[3].strip() if m[3] and len(m[3]) else ''
if len(label) == 0:
label = m[1]
# Omit gemini scheme.
if label.startswith('gemini://'):
label = label[9:]
line = f'=> {m[1]} {prefix}{label}'
lines.append(line)
return '\n'.join(lines)
def shorten_text(text, n):
"""Truncate and cut at white or word boundary."""
text = text[:n]
2023-05-09 18:33:07 +00:00
if len(text) == n:
if text[-1] == ' ':
return text.strip()
m = re.search(r'[\w,.]+$', text)
if m:
2023-05-09 19:48:35 +00:00
return text[:m.start()].rstrip() + '...'
return text.strip()
2023-05-09 18:33:07 +00:00
def time_delta_text(sec, date_ts, suffix='ago', now='Now',
date_prefix='', date_fmt='%Y-%m-%d', date_sep=' · '):
if sec < 2:
2023-05-09 18:33:07 +00:00
return now
if sec < 60:
2023-05-09 18:33:07 +00:00
return f'{sec} seconds {suffix}'
mins = int(sec / 60)
if sec < 3600:
2023-05-09 18:33:07 +00:00
return f'{mins} minute{plural_s(mins)} {suffix}'
hours = int(sec / 3600)
if hours <= 24:
2023-05-09 18:33:07 +00:00
return f'{hours} hour{plural_s(hours)} {suffix}'
days = round(sec / 3600 / 24)
dt = datetime.datetime.fromtimestamp(date_ts, UTC)
2023-05-09 18:33:07 +00:00
age = date_prefix + dt.strftime(date_fmt)
if days < 14:
return age + f'{date_sep}{days} day{plural_s(days)} {suffix}'
weeks = round(days / 7)
2023-05-09 18:33:07 +00:00
if weeks <= 8:
return age + f'{date_sep}{weeks} week{plural_s(weeks)} {suffix}'
months = round(days / (365 / 12)) # average month length
2023-05-09 18:33:07 +00:00
if months < 12:
return age + f'{date_sep}{months} month{plural_s(months)} {suffix}'
years = round(days / 365)
2023-05-09 18:33:07 +00:00
return age + f'{date_sep}{years} year{plural_s(years)} {suffix}'
def ago_text(ts, suffix='ago', now='Now'):
sec = max(0, int(time.time()) - ts)
return time_delta_text(sec, ts, suffix, now)
def is_empty_query(req):
return req.query == None or len(req.query) == 0
def clean_query(req):
if req.query == None: return ''
return clean_text(urlparse.unquote(req.query)).strip()
def nonzero(value):
return 1 if value else 0
def parse_link_segment_query(req) -> tuple:
if req.query == None:
return '', ''
q = urlparse.unquote(req.query).replace('\n', ' ')
found = re.match(r'^\s*(=>)?\s*([^\s]+)(\s+(.+))?\s*$', q)
if not found:
raise GeminiError(59, 'Invalid link syntax (enter URL followed by label, separated with space)')
seg_url = found.group(2)
if '://' not in seg_url:
seg_url = 'gemini://' + seg_url
parsed = urlparse.urlparse(seg_url)
if not parsed.scheme or not parsed.netloc:
raise GeminiError(59, 'Invalid URL')
if found[4]:
seg_text = clean_title(found[4])
else:
seg_text = ''
return seg_url, seg_text
def form_link(url_label: tuple):
url, label = url_label
if len(url) and len(label):
return url + ' ' + label
if len(label) == 0:
return url
return ''