simpleertube/peertube.py

from bs4 import BeautifulSoup
import requests
import json
import sys
from datetime import datetime
from utils import FailedRequest

# Helper Class for using caches, you can use any other cache that implements the same API
# Default TTL: 3600s (1h)
class Cache:
    def __init__(self, ttl=3600):
        self.dict = {}
        self.ttl = ttl

    # Use ttl=0 to disable
    def get(self, key, ttl=None):
        if ttl == None: ttl = self.ttl
        if key in self.dict:
            last_time_updated = (self.dict[key])[1]
            time_diff = datetime.now() - last_time_updated

            if time_diff.total_seconds() > ttl:
                # Outdated data
                return None
            # Data still valid according to TTL
            return (self.dict[key])[0]

        else:
            # No data
            return None

    def set(self, key, value):
        self.dict[key] = [ value, datetime.now() ]

class API:
    # The PeertubeAPI is initialized with a caching backend and a default TTL, that can be overriden in specific
    # API request calls. The caching backend should implement a get(key, ttl) and set(key, value) API.
    # Also can specify the SepiaSearch instance here
    def __init__(self, cache, ttl=3600, search="https://search.joinpeertube.org"):
        self.cache = cache
        self.ttl = ttl
        # If search instance has no protocol set, assume https://
        if not search.startswith("http"): search = "https://" + search
        # Remove trailing slash
        if search.endswith('/'): self.search_source = search[0:-1]
        else: self.search_source = search

    # Wrapper around requests.get() so that it cannot fail
    #  If the request succeeds:
    #    - parse as JSON, or return None
    #    - if "error" field in JSON, return None
    #    - return the parsed JSON
    #  Else: return None
    #  In all cases where the function returns None, the errors are logged
    # Use ttl=0 to disable Cache for this request
    # Only use with JSON requests, otherwise use self.(plaintext_request)
    def request(self, url):
        print("[DEBUG] Requesting JSON URL %s" % url)
        try:
            # If serialization fails, we'll end up in `except` block
            parsed_response = json.loads(requests.get(url).text)
            if "error" in parsed_response:
                print("[WARN] Remote peertube returned error for %s:\n%s" % (url, parsed_response["error"]))
                return FailedRequest("Remote peertube server returned an error for URL %s:\n%s" % (url, parsed_response["error"]))
            return parsed_response
        except Exception as e:
            print("[WARN] Error fetching page \"%s\":\n%s" % (url, e))
            return FailedRequest(e)

    # Wrapper around requests.get() so that it cannot fail
    # Use with plaintext requests, for JSON requests use self.request()
    def plaintext_request(self, url):
        print("[DEBUG] Requesting plaintext URL %s" % url)
        try:
            return requests.get(url).text
        except Exception as e:
            print("[WARN] Error fetching page \"%s\":\n%s" % (url, e))
            return FailedRequest(e)

    # Returns an entry from cache, automatically prefixing "peertube-FUNC-" where FUNC is the caller method name
    def cached(self, key, ttl=None):
        if ttl == None: ttl = self.ttl
        caller = sys._getframe(1).f_code.co_name
        key_name = "peertube-" + caller + "-" + key
        res = self.cache.get(key_name, ttl=ttl)
        if res == None:
            print("[CACHE] Entry not found for %s: %s" % (caller, key))
            return None
        print("[CACHE] Found entry for %s: %s" % (caller, key))
        return res

    # Save an entry into cache, automatically prefixing "peertube-FUNC-" where FUNC is the caller method name
    def save(self, key, value):
        caller = sys._getframe(1).f_code.co_name
        key_name = "peertube-" + caller + "-" + key
        print("[CACHE] Saving entry for %s: %s" % (caller, key))
        self.cache.set(key_name, value)

    # Fetch instance name from its HTML source
    def instance_name(self, domain, ttl=None):
        cached = self.cached(domain, ttl=ttl)
        if cached == None:
            url = "https://" + domain
            result = self.plaintext_request(url)
            if not isinstance(result, FailedRequest):
                soup = BeautifulSoup(requests.get("https://" + domain).text, "lxml")
                title = soup.find('title')
                if title:
                    result = title.text
                else:
                    result = "PeerTube Instance"
            self.save(domain, result)
            return result
        return cached

    # Search the configured self.search_source for `query`, returning `count` items after `start`
    def search(self, query, start=0, count=10, ttl=None):
        cache_key = "%s/%s/%s" % (str(start), str(count), query)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = self.search_source + "/api/v1/search/videos?search=" + query + "&start=" + str(start) + "&count=" + str(count)
            res = self.request(url)
            self.save(cache_key, res)
            if isinstance(res, FailedRequest):
                return res
            else:
                return res["data"]
        elif isinstance(cached, FailedRequest):
            return cached
        else:
            return cached["data"]

    # Search a specific Peertube instance  for `query`,
    # returning `count` items after `start`. Slightly different URL format from SepiaSearch (self.search())
    def search_instance(self, domain, term, start=0, count=10, ttl=None):
        cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, term)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            # TODO: Here HTTPS is assumed
            url = "https://" + domain + "/api/v1/search/videos?start=" + str(start) + "&count=" + str(count) + "&search=" + term + "&sort=-match&searchTarget=local"
            res = self.request(url)
            self.save(cache_key, res)
            if isinstance(res, FailedRequest):
                return res
            else:
                return res["data"]
        elif isinstance(cached, FailedRequest):
            return cached
        else:
            return cached["data"]

    # Default filter is local, other filters are: trending, most-liked, recently-added, local
    def instance_videos(self, domain, start=0, count=10, filt="local", ttl=None):
        cache_key = "%s/%s/%s/%s" % (str(start), str(count), filt, domain)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            if filt == "trending":
                filt = None
                sort = "-trending"
            elif filt == "most-liked":
                filt = None
                sort = "-likes"
            elif filt == "local":
                sort = "-publishedAt"
                filt = "local"
            elif filt == "recently-added":
                sort = "publishedAt"
                filt = None
            else:
                return FailedRequest("instance_videos called with bogus filter: %s" % filt)
            url = "https://" + domain + "/api/v1/videos?sort=" + sort + "&start=" + str(start) + "&count=" + str(count)
            if filt: url = url + "&filter=" + filt
            res = self.request(url)
            if not isinstance(res, FailedRequest):
                # Extract `data` entry from API response
                res = res["data"]
            self.save(cache_key, res)
            return res
        else:
            return cached

    def video(self, domain, id, ttl=None):
        cache_key = "%s/%s" % (domain, id)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/videos/" + id
            res = self.request(url)
            self.save(cache_key, res)
            return res
        return cached

    def video_captions(self, domain, id, ttl=None):
        cache_key = "%s/%s" % (domain, id)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/videos/" + id + "/captions"
            res = self.request(url)
            if not isinstance(res, FailedRequest):
                res = res["data"]
            self.save(cache_key, res)
            return res
        return cached

    def video_captions_proxy(self, domain, caption_id, ttl=None):
        # URL is hardcoded to prevent further proxying. URL may change with updates, see captions API
        # eg. https://kolektiva.media/api/v1/videos/9c9de5e8-0a1e-484a-b099-e80766180a6d/captions
        # TODO: What if the captionPath doesn't follow this format on an instance? Should we really proxy ANYTHING returned by API?
        cache_key = "%s/%s" % (domain, caption_id)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/lazy-static/video-captions/" + caption_id
            res = self.plaintext_request(url)
            self.save(cache_key, res)
            return res
        return cached

    def video_comments(self, domain, id, ttl=None):
        cache_key = "%s/%s" % (domain, id)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/videos/" + id + "/comment-threads"
            res = self.request(url)
            if not isinstance(res, FailedRequest):
                res = res["data"]
            self.save(cache_key, res)
            return res
        return cached

    def account(self, domain, name, ttl=None):
        cache_key = "%s/%s" % (domain, name)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/accounts/" + name
            res = self.request(url)
            self.save(cache_key, res)
            return res
        return cached

    def account_channels(self, domain, name, start=0, count=10, ttl=None):
        cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/accounts/" + name + "/video-channels?start=" + str(start) + "&count=" + str(count)
            res = self.request(url)
            if not isinstance(res, FailedRequest):
                res = res["data"]
            self.save(cache_key, res)
            return res
        return cached

    def account_videos(self, domain, name, start=0, count=10, ttl=None):
        cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/accounts/" + name + "/videos?start=" + str(start) + "&count=" + str(count)
            res = self.request(url)
            if not isinstance(res, FailedRequest):
                res = res["data"]
            self.save(cache_key, res)
            return res
        return cached

    def channel(self, domain, name, ttl=None):
        cache_key = "%s/%s" % (domain, name)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/video-channels/" + name
            res = self.request(url)
            self.save(cache_key, res)
            return res
        return cached

    def channel_videos(self, domain, name, start=0, count=10, ttl=None):
        cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/video-channels/" + name + "/videos?start=" + str(start) + "&count=" + str(count)
            res = self.request(url)
            if not isinstance(res, FailedRequest):
                res = res["data"]
            self.save(cache_key, res)
            return res
        return cached

    def channel_playlists(self, domain, name, start=0, count=10, ttl=None):
        cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
        cached = self.cached(cache_key, ttl=ttl)
        if cached == None:
            url = "https://" + domain + "/api/v1/video-channels/" + name + "/video-playlists?start=" + str(start) + "&count=" + str(count)
            res = self.request(url)
            if not isinstance(res, FailedRequest):
                res = res["data"]
            self.save(cache_key, res)
            return res
        return cached