simpleertube/peertube.py

296 lines
12 KiB
Python

from bs4 import BeautifulSoup
import requests
import json
import sys
from datetime import datetime
from utils import FailedRequest
# Helper Class for using caches, you can use any other cache that implements the same API
# Default TTL: 3600s (1h)
class Cache:
def __init__(self, ttl=3600):
self.dict = {}
self.ttl = ttl
# Use ttl=0 to disable
def get(self, key, ttl=None):
if ttl == None: ttl = self.ttl
if key in self.dict:
last_time_updated = (self.dict[key])[1]
time_diff = datetime.now() - last_time_updated
if time_diff.total_seconds() > ttl:
# Outdated data
return None
# Data still valid according to TTL
return (self.dict[key])[0]
else:
# No data
return None
def set(self, key, value):
self.dict[key] = [ value, datetime.now() ]
class API:
# The PeertubeAPI is initialized with a caching backend and a default TTL, that can be overriden in specific
# API request calls. The caching backend should implement a get(key, ttl) and set(key, value) API.
# Also can specify the SepiaSearch instance here
def __init__(self, cache, ttl=3600, search="https://search.joinpeertube.org"):
self.cache = cache
self.ttl = ttl
# If search instance has no protocol set, assume https://
if not search.startswith("http"): search = "https://" + search
# Remove trailing slash
if search.endswith('/'): self.search_source = search[0:-1]
else: self.search_source = search
# Wrapper around requests.get() so that it cannot fail
# If the request succeeds:
# - parse as JSON, or return None
# - if "error" field in JSON, return None
# - return the parsed JSON
# Else: return None
# In all cases where the function returns None, the errors are logged
# Use ttl=0 to disable Cache for this request
# Only use with JSON requests, otherwise use self.(plaintext_request)
def request(self, url):
print("[DEBUG] Requesting JSON URL %s" % url)
try:
# If serialization fails, we'll end up in `except` block
parsed_response = json.loads(requests.get(url).text)
if "error" in parsed_response:
print("[WARN] Remote peertube returned error for %s:\n%s" % (url, parsed_response["error"]))
return FailedRequest("Remote peertube server returned an error for URL %s:\n%s" % (url, parsed_response["error"]))
return parsed_response
except Exception as e:
print("[WARN] Error fetching page \"%s\":\n%s" % (url, e))
return FailedRequest(e)
# Wrapper around requests.get() so that it cannot fail
# Use with plaintext requests, for JSON requests use self.request()
def plaintext_request(self, url):
print("[DEBUG] Requesting plaintext URL %s" % url)
try:
return requests.get(url).text
except Exception as e:
print("[WARN] Error fetching page \"%s\":\n%s" % (url, e))
return FailedRequest(e)
# Returns an entry from cache, automatically prefixing "peertube-FUNC-" where FUNC is the caller method name
def cached(self, key, ttl=None):
if ttl == None: ttl = self.ttl
caller = sys._getframe(1).f_code.co_name
key_name = "peertube-" + caller + "-" + key
res = self.cache.get(key_name, ttl=ttl)
if res == None:
print("[CACHE] Entry not found for %s: %s" % (caller, key))
return None
print("[CACHE] Found entry for %s: %s" % (caller, key))
return res
# Save an entry into cache, automatically prefixing "peertube-FUNC-" where FUNC is the caller method name
def save(self, key, value):
caller = sys._getframe(1).f_code.co_name
key_name = "peertube-" + caller + "-" + key
print("[CACHE] Saving entry for %s: %s" % (caller, key))
self.cache.set(key_name, value)
# Fetch instance name from its HTML source
def instance_name(self, domain, ttl=None):
cached = self.cached(domain, ttl=ttl)
if cached == None:
url = "https://" + domain
result = self.plaintext_request(url)
if not isinstance(result, FailedRequest):
soup = BeautifulSoup(requests.get("https://" + domain).text, "lxml")
title = soup.find('title')
if title:
result = title.text
else:
result = "PeerTube Instance"
self.save(domain, result)
return result
return cached
# Search the configured self.search_source for `query`, returning `count` items after `start`
def search(self, query, start=0, count=10, ttl=None):
cache_key = "%s/%s/%s" % (str(start), str(count), query)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = self.search_source + "/api/v1/search/videos?search=" + query + "&start=" + str(start) + "&count=" + str(count)
res = self.request(url)
self.save(cache_key, res)
if isinstance(res, FailedRequest):
return res
else:
return res["data"]
elif isinstance(cached, FailedRequest):
return cached
else:
return cached["data"]
# Search a specific Peertube instance for `query`,
# returning `count` items after `start`. Slightly different URL format from SepiaSearch (self.search())
def search_instance(self, domain, term, start=0, count=10, ttl=None):
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, term)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
# TODO: Here HTTPS is assumed
url = "https://" + domain + "/api/v1/search/videos?start=" + str(start) + "&count=" + str(count) + "&search=" + term + "&sort=-match&searchTarget=local"
res = self.request(url)
self.save(cache_key, res)
if isinstance(res, FailedRequest):
return res
else:
return res["data"]
elif isinstance(cached, FailedRequest):
return cached
else:
return cached["data"]
# Default filter is local, other filters are: trending, most-liked, recently-added, local
def instance_videos(self, domain, start=0, count=10, filt="local", ttl=None):
cache_key = "%s/%s/%s/%s" % (str(start), str(count), filt, domain)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
if filt == "trending":
filt = None
sort = "-trending"
elif filt == "most-liked":
filt = None
sort = "-likes"
elif filt == "local":
sort = "-publishedAt"
filt = "local"
elif filt == "recently-added":
sort = "publishedAt"
filt = None
else:
return FailedRequest("instance_videos called with bogus filter: %s" % filt)
url = "https://" + domain + "/api/v1/videos?sort=" + sort + "&start=" + str(start) + "&count=" + str(count)
if filt: url = url + "&filter=" + filt
res = self.request(url)
if not isinstance(res, FailedRequest):
# Extract `data` entry from API response
res = res["data"]
self.save(cache_key, res)
return res
else:
return cached
def video(self, domain, id, ttl=None):
cache_key = "%s/%s" % (domain, id)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/videos/" + id
res = self.request(url)
self.save(cache_key, res)
return res
return cached
def video_captions(self, domain, id, ttl=None):
cache_key = "%s/%s" % (domain, id)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/videos/" + id + "/captions"
res = self.request(url)
if not isinstance(res, FailedRequest):
res = res["data"]
self.save(cache_key, res)
return res
return cached
def video_captions_proxy(self, domain, caption_id, ttl=None):
# URL is hardcoded to prevent further proxying. URL may change with updates, see captions API
# eg. https://kolektiva.media/api/v1/videos/9c9de5e8-0a1e-484a-b099-e80766180a6d/captions
# TODO: What if the captionPath doesn't follow this format on an instance? Should we really proxy ANYTHING returned by API?
cache_key = "%s/%s" % (domain, caption_id)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/lazy-static/video-captions/" + caption_id
res = self.plaintext_request(url)
self.save(cache_key, res)
return res
return cached
def video_comments(self, domain, id, ttl=None):
cache_key = "%s/%s" % (domain, id)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/videos/" + id + "/comment-threads"
res = self.request(url)
if not isinstance(res, FailedRequest):
res = res["data"]
self.save(cache_key, res)
return res
return cached
def account(self, domain, name, ttl=None):
cache_key = "%s/%s" % (domain, name)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/accounts/" + name
res = self.request(url)
self.save(cache_key, res)
return res
return cached
def account_channels(self, domain, name, start=0, count=10, ttl=None):
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/accounts/" + name + "/video-channels?start=" + str(start) + "&count=" + str(count)
res = self.request(url)
if not isinstance(res, FailedRequest):
res = res["data"]
self.save(cache_key, res)
return res
return cached
def account_videos(self, domain, name, start=0, count=10, ttl=None):
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/accounts/" + name + "/videos?start=" + str(start) + "&count=" + str(count)
res = self.request(url)
if not isinstance(res, FailedRequest):
res = res["data"]
self.save(cache_key, res)
return res
return cached
def channel(self, domain, name, ttl=None):
cache_key = "%s/%s" % (domain, name)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/video-channels/" + name
res = self.request(url)
self.save(cache_key, res)
return res
return cached
def channel_videos(self, domain, name, start=0, count=10, ttl=None):
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/video-channels/" + name + "/videos?start=" + str(start) + "&count=" + str(count)
res = self.request(url)
if not isinstance(res, FailedRequest):
res = res["data"]
self.save(cache_key, res)
return res
return cached
def channel_playlists(self, domain, name, start=0, count=10, ttl=None):
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
cached = self.cached(cache_key, ttl=ttl)
if cached == None:
url = "https://" + domain + "/api/v1/video-channels/" + name + "/video-playlists?start=" + str(start) + "&count=" + str(count)
res = self.request(url)
if not isinstance(res, FailedRequest):
res = res["data"]
self.save(cache_key, res)
return res
return cached