296 lines
12 KiB
Python
296 lines
12 KiB
Python
from bs4 import BeautifulSoup
|
|
import requests
|
|
import json
|
|
import sys
|
|
from datetime import datetime
|
|
from utils import FailedRequest
|
|
|
|
# Helper Class for using caches, you can use any other cache that implements the same API
|
|
# Default TTL: 3600s (1h)
|
|
class Cache:
|
|
def __init__(self, ttl=3600):
|
|
self.dict = {}
|
|
self.ttl = ttl
|
|
|
|
# Use ttl=0 to disable
|
|
def get(self, key, ttl=None):
|
|
if ttl == None: ttl = self.ttl
|
|
if key in self.dict:
|
|
last_time_updated = (self.dict[key])[1]
|
|
time_diff = datetime.now() - last_time_updated
|
|
|
|
if time_diff.total_seconds() > ttl:
|
|
# Outdated data
|
|
return None
|
|
# Data still valid according to TTL
|
|
return (self.dict[key])[0]
|
|
|
|
else:
|
|
# No data
|
|
return None
|
|
|
|
def set(self, key, value):
|
|
self.dict[key] = [ value, datetime.now() ]
|
|
|
|
class API:
|
|
# The PeertubeAPI is initialized with a caching backend and a default TTL, that can be overriden in specific
|
|
# API request calls. The caching backend should implement a get(key, ttl) and set(key, value) API.
|
|
# Also can specify the SepiaSearch instance here
|
|
def __init__(self, cache, ttl=3600, search="https://search.joinpeertube.org"):
|
|
self.cache = cache
|
|
self.ttl = ttl
|
|
# If search instance has no protocol set, assume https://
|
|
if not search.startswith("http"): search = "https://" + search
|
|
# Remove trailing slash
|
|
if search.endswith('/'): self.search_source = search[0:-1]
|
|
else: self.search_source = search
|
|
|
|
# Wrapper around requests.get() so that it cannot fail
|
|
# If the request succeeds:
|
|
# - parse as JSON, or return None
|
|
# - if "error" field in JSON, return None
|
|
# - return the parsed JSON
|
|
# Else: return None
|
|
# In all cases where the function returns None, the errors are logged
|
|
# Use ttl=0 to disable Cache for this request
|
|
# Only use with JSON requests, otherwise use self.(plaintext_request)
|
|
def request(self, url):
|
|
print("[DEBUG] Requesting JSON URL %s" % url)
|
|
try:
|
|
# If serialization fails, we'll end up in `except` block
|
|
parsed_response = json.loads(requests.get(url).text)
|
|
if "error" in parsed_response:
|
|
print("[WARN] Remote peertube returned error for %s:\n%s" % (url, parsed_response["error"]))
|
|
return FailedRequest("Remote peertube server returned an error for URL %s:\n%s" % (url, parsed_response["error"]))
|
|
return parsed_response
|
|
except Exception as e:
|
|
print("[WARN] Error fetching page \"%s\":\n%s" % (url, e))
|
|
return FailedRequest(e)
|
|
|
|
# Wrapper around requests.get() so that it cannot fail
|
|
# Use with plaintext requests, for JSON requests use self.request()
|
|
def plaintext_request(self, url):
|
|
print("[DEBUG] Requesting plaintext URL %s" % url)
|
|
try:
|
|
return requests.get(url).text
|
|
except Exception as e:
|
|
print("[WARN] Error fetching page \"%s\":\n%s" % (url, e))
|
|
return FailedRequest(e)
|
|
|
|
# Returns an entry from cache, automatically prefixing "peertube-FUNC-" where FUNC is the caller method name
|
|
def cached(self, key, ttl=None):
|
|
if ttl == None: ttl = self.ttl
|
|
caller = sys._getframe(1).f_code.co_name
|
|
key_name = "peertube-" + caller + "-" + key
|
|
res = self.cache.get(key_name, ttl=ttl)
|
|
if res == None:
|
|
print("[CACHE] Entry not found for %s: %s" % (caller, key))
|
|
return None
|
|
print("[CACHE] Found entry for %s: %s" % (caller, key))
|
|
return res
|
|
|
|
# Save an entry into cache, automatically prefixing "peertube-FUNC-" where FUNC is the caller method name
|
|
def save(self, key, value):
|
|
caller = sys._getframe(1).f_code.co_name
|
|
key_name = "peertube-" + caller + "-" + key
|
|
print("[CACHE] Saving entry for %s: %s" % (caller, key))
|
|
self.cache.set(key_name, value)
|
|
|
|
# Fetch instance name from its HTML source
|
|
def instance_name(self, domain, ttl=None):
|
|
cached = self.cached(domain, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain
|
|
result = self.plaintext_request(url)
|
|
if not isinstance(result, FailedRequest):
|
|
soup = BeautifulSoup(requests.get("https://" + domain).text, "lxml")
|
|
title = soup.find('title')
|
|
if title:
|
|
result = title.text
|
|
else:
|
|
result = "PeerTube Instance"
|
|
self.save(domain, result)
|
|
return result
|
|
return cached
|
|
|
|
# Search the configured self.search_source for `query`, returning `count` items after `start`
|
|
def search(self, query, start=0, count=10, ttl=None):
|
|
cache_key = "%s/%s/%s" % (str(start), str(count), query)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = self.search_source + "/api/v1/search/videos?search=" + query + "&start=" + str(start) + "&count=" + str(count)
|
|
res = self.request(url)
|
|
self.save(cache_key, res)
|
|
if isinstance(res, FailedRequest):
|
|
return res
|
|
else:
|
|
return res["data"]
|
|
elif isinstance(cached, FailedRequest):
|
|
return cached
|
|
else:
|
|
return cached["data"]
|
|
|
|
# Search a specific Peertube instance for `query`,
|
|
# returning `count` items after `start`. Slightly different URL format from SepiaSearch (self.search())
|
|
def search_instance(self, domain, term, start=0, count=10, ttl=None):
|
|
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, term)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
# TODO: Here HTTPS is assumed
|
|
url = "https://" + domain + "/api/v1/search/videos?start=" + str(start) + "&count=" + str(count) + "&search=" + term + "&sort=-match&searchTarget=local"
|
|
res = self.request(url)
|
|
self.save(cache_key, res)
|
|
if isinstance(res, FailedRequest):
|
|
return res
|
|
else:
|
|
return res["data"]
|
|
elif isinstance(cached, FailedRequest):
|
|
return cached
|
|
else:
|
|
return cached["data"]
|
|
|
|
# Default filter is local, other filters are: trending, most-liked, recently-added, local
|
|
def instance_videos(self, domain, start=0, count=10, filt="local", ttl=None):
|
|
cache_key = "%s/%s/%s/%s" % (str(start), str(count), filt, domain)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
if filt == "trending":
|
|
filt = None
|
|
sort = "-trending"
|
|
elif filt == "most-liked":
|
|
filt = None
|
|
sort = "-likes"
|
|
elif filt == "local":
|
|
sort = "-publishedAt"
|
|
filt = "local"
|
|
elif filt == "recently-added":
|
|
sort = "publishedAt"
|
|
filt = None
|
|
else:
|
|
return FailedRequest("instance_videos called with bogus filter: %s" % filt)
|
|
url = "https://" + domain + "/api/v1/videos?sort=" + sort + "&start=" + str(start) + "&count=" + str(count)
|
|
if filt: url = url + "&filter=" + filt
|
|
res = self.request(url)
|
|
if not isinstance(res, FailedRequest):
|
|
# Extract `data` entry from API response
|
|
res = res["data"]
|
|
self.save(cache_key, res)
|
|
return res
|
|
else:
|
|
return cached
|
|
|
|
def video(self, domain, id, ttl=None):
|
|
cache_key = "%s/%s" % (domain, id)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/videos/" + id
|
|
res = self.request(url)
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def video_captions(self, domain, id, ttl=None):
|
|
cache_key = "%s/%s" % (domain, id)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/videos/" + id + "/captions"
|
|
res = self.request(url)
|
|
if not isinstance(res, FailedRequest):
|
|
res = res["data"]
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def video_captions_proxy(self, domain, caption_id, ttl=None):
|
|
# URL is hardcoded to prevent further proxying. URL may change with updates, see captions API
|
|
# eg. https://kolektiva.media/api/v1/videos/9c9de5e8-0a1e-484a-b099-e80766180a6d/captions
|
|
# TODO: What if the captionPath doesn't follow this format on an instance? Should we really proxy ANYTHING returned by API?
|
|
cache_key = "%s/%s" % (domain, caption_id)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/lazy-static/video-captions/" + caption_id
|
|
res = self.plaintext_request(url)
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def video_comments(self, domain, id, ttl=None):
|
|
cache_key = "%s/%s" % (domain, id)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/videos/" + id + "/comment-threads"
|
|
res = self.request(url)
|
|
if not isinstance(res, FailedRequest):
|
|
res = res["data"]
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def account(self, domain, name, ttl=None):
|
|
cache_key = "%s/%s" % (domain, name)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/accounts/" + name
|
|
res = self.request(url)
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def account_channels(self, domain, name, start=0, count=10, ttl=None):
|
|
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/accounts/" + name + "/video-channels?start=" + str(start) + "&count=" + str(count)
|
|
res = self.request(url)
|
|
if not isinstance(res, FailedRequest):
|
|
res = res["data"]
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def account_videos(self, domain, name, start=0, count=10, ttl=None):
|
|
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/accounts/" + name + "/videos?start=" + str(start) + "&count=" + str(count)
|
|
res = self.request(url)
|
|
if not isinstance(res, FailedRequest):
|
|
res = res["data"]
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def channel(self, domain, name, ttl=None):
|
|
cache_key = "%s/%s" % (domain, name)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/video-channels/" + name
|
|
res = self.request(url)
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def channel_videos(self, domain, name, start=0, count=10, ttl=None):
|
|
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/video-channels/" + name + "/videos?start=" + str(start) + "&count=" + str(count)
|
|
res = self.request(url)
|
|
if not isinstance(res, FailedRequest):
|
|
res = res["data"]
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|
|
|
|
def channel_playlists(self, domain, name, start=0, count=10, ttl=None):
|
|
cache_key = "%s/%s/%s/%s" % (str(start), str(count), domain, name)
|
|
cached = self.cached(cache_key, ttl=ttl)
|
|
if cached == None:
|
|
url = "https://" + domain + "/api/v1/video-channels/" + name + "/video-playlists?start=" + str(start) + "&count=" + str(count)
|
|
res = self.request(url)
|
|
if not isinstance(res, FailedRequest):
|
|
res = res["data"]
|
|
self.save(cache_key, res)
|
|
return res
|
|
return cached
|