Add CI checks, closes #6

Reviewed-on: #7
2021-07-24 16:16:03 +00:00 · 2021-07-24 16:16:03 +00:00 · c17e732462
parent 0e8d5a4379 33c206294c
commit c17e732462
6 changed files with 103 additions and 28 deletions
--- a/.drone.yml
+++ b/.drone.yml
@ -0,0 +1,29 @@
+---
+kind: pipeline
+type: docker
+name: default
+
+steps:
+  - name: flake8
+    image: python:3-alpine
+    depends_on:
+      - clone
+    commands:
+      - pip install -r requirements-dev.txt
+      - flake8 .
+
+  - name: isort
+    image: python:3-alpine
+    depends_on:
+      - clone
+    commands:
+      - pip install -r requirements-dev.txt
+      - isort --check --diff .
+
+  - name: mypy
+    image: python:3-alpine
+    depends_on:
+      - clone
+    commands:
+      - pip install -r requirements-dev.txt
+      - mypy .
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,5 @@
+[flake8]
+exclude=build,.cache,.eggs,.git
+# Override the errors that Flake8 ignores by default to lint very hard.
+# Only ignore W503, which is deprecated and conflicts with W504.
+ignore=W503
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 <h1 align="center">fprss</h1>

+[![Build Status](https://drone.tildegit.org/api/badges/lucidiot/fprss/status.svg)](https://drone.tildegit.org/lucidiot/fprss)
+
 A simple script to make a `#fridaypostcard` RSS feed.

 ## Dependencies
--- a/fprss.py
+++ b/fprss.py
@ -1,24 +1,33 @@
 #!/usr/bin/env python3
-from collections import namedtuple
-from datetime import datetime, timezone
-from pathlib import Path
-from requests.exceptions import HTTPError
-from typing import Iterator, Optional
-from urllib.parse import urlparse
 import json
 import re
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, Iterator, NamedTuple, Optional, Tuple
+from urllib.parse import urlparse
+
 import requests
-import xmltodict
+import xmltodict  # type: ignore
+from requests.exceptions import HTTPError

 RSS_DATE_FORMAT = '%a, %d %b %Y %T %z'
-SANITIZE_REGEX = re.compile(r'(?:[\002\017\021\026\035\036\037]|\003(?:[0-9]{1,2}(?:,[0-9]{1,2})?)?|\004(?:[0-9A-F]{,6})?)', re.IGNORECASE)
-URL_REGEX = re.compile(r'https?://[A-Za-z0-9-._~:/?#[\]%@!$&\'()*+,;=]+', re.IGNORECASE)
+SANITIZE_REGEX = re.compile(
+    r'(?:[\002\017\021\026\035\036\037]'
+    r'|\003(?:[0-9]{1,2}(?:,[0-9]{1,2})?)?'
+    r'|\004(?:[0-9A-F]{,6})?)',
+    re.IGNORECASE
+)
+URL_REGEX = re.compile(
+    r'https?://[A-Za-z0-9-._~:/?#[\]%@!$&\'()*+,;=]+',
+    re.IGNORECASE
+)
 LOG_FILE = Path('~archangelic/irc/log').expanduser()
 CACHE_FILE = Path(__file__).absolute().parent / 'cache.json'
 OUTPUT_PATH = Path('~/public_html/fridaypostcard.xml').expanduser()

 IGNORE_USERNAMES = {'quote_bot'}
-# We cannot safely assume we will know all image extensions, but there are some obvious and common extensions that we can ignore.
+# We cannot safely assume we will know all image extensions,
+# but there are some obvious and common extensions that we can ignore.
 IGNORE_EXTENSIONS = {'.html', '.htm', '.xml', '.json'}
 KNOWN_MIME_TYPES = {
    '.jpg': 'image/jpeg',
@ -31,10 +40,17 @@ KNOWN_MIME_TYPES = {
    '.webp': 'image/webp',
 }

-Postcard = namedtuple('Postcard', ['timestamp', 'username', 'url', 'message', 'mime_type', 'length'])
-
 # MIME type and length cache to avoid making hundreds of requests each time
-cache = {}
+cache: Dict[str, Tuple[str, str]] = {}
+
+
+class Postcard(NamedTuple):
+    timestamp: int
+    username: str
+    url: str
+    message: str
+    mime_type: str
+    length: str


 def get_logs() -> Iterator[str]:
@ -52,25 +68,28 @@ def parse_log(log: str) -> Optional[Postcard]:
    timestamp, username, message = log.split("\t", 3)

    if username in IGNORE_USERNAMES:
-        return
+        return None

    message = sanitize_message(message)
    match = URL_REGEX.search(message)
    # Ignore messages with invalid URLs
    if not match:
-        return
+        return None
    url_str = match.group()

-    message = message.replace(url_str, '').replace('#fridaypostcard', '').strip()
+    message = message \
+        .replace(url_str, '') \
+        .replace('#fridaypostcard', '') \
+        .strip()

    try:
        url = urlparse(url_str)
-    except:
-        return
+    except Exception:
+        return None

    extension = Path(url.path).suffix
    if extension in IGNORE_EXTENSIONS:
-        return
+        return None

    # Force-replace https with http to ensure PSP compatibility
    url_str = url_str.replace('https', 'http')
@ -81,24 +100,32 @@ def parse_log(log: str) -> Optional[Postcard]:
        if extension not in KNOWN_MIME_TYPES:
            url_str += '.jpg'

-    mime_type, length = cache.get(url_str, ['', '0'])
+    mime_type, length = cache.get(url_str, ('', '0'))

    if not mime_type:
        try:
-            with requests.get(url_str, allow_redirects=True, stream=True, timeout=5) as resp:
+            with requests.get(
+                    url_str,
+                    allow_redirects=True,
+                    stream=True,
+                    timeout=5) as resp:
                resp.raise_for_status()
                length = resp.headers.get('Content-Length', '0')
-                mime_type = resp.headers.get('Content-Type', KNOWN_MIME_TYPES.get(extension, ''))
+                mime_type = resp.headers.get(
+                    'Content-Type',
+                    KNOWN_MIME_TYPES.get(extension, '')
+                )
        except HTTPError as e:
-            # Dirty hack to avoid repeating lots of requests for images that are now broken.
+            # Dirty hack to avoid repeating lots of requests
+            # for images that are now broken.
            if e.response.status_code >= 400 and e.response.status_code <= 500:
                mime_type = KNOWN_MIME_TYPES.get(extension, 'image/x-error')
                length = '0'
-            cache[url_str] = [mime_type, length]
-            return
+            cache[url_str] = (mime_type, length)
+            return None
        except Exception:
-            return
-        cache[url_str] = [mime_type, length]
+            return None
+        cache[url_str] = (mime_type, length)

    return Postcard(
        timestamp=int(timestamp),
@ -177,7 +204,13 @@ def main():
                "item": list({
                    # Unique by GUID
                    item["guid"]: item
-                    for item in map(to_item, filter(None, map(parse_log, get_logs())))
+                    for item in map(
+                        to_item,
+                        filter(
+                            None,
+                            map(parse_log, get_logs())
+                        )
+                    )
                }.values()),
            }
        }
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -0,0 +1,4 @@
+flake8~=3.9
+isort>=5.9
+mypy~=0.910
+types-requests==2.25.0
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+requests~=2.22
+xmltodict==0.12