diff --git a/CHANGELOG b/CHANGELOG index 8eb9b0b..590372f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,10 @@ # Offpunk History -## 0.2.1 or 0.3 - Unreleased +## 0.3 - Unreleased New Features: +- support for RSS and Atom feed (you can subscribe to them) - "less full" allows to see the full html page instead of only the article view + (also works with feeds to see descriptions of each post instead of a simple list) - Option --depth to customize your sync. Be warned, more than 1 is crazy. - Option --disable-http to allows deep syncing of gemini-only Other Small Improvements: diff --git a/README.md b/README.md index 08c38a0..9840b06 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,8 @@ Nice to have but not priority: * TODO0: Hard - Make a manual within the git repository and have it automatically deployed as a website. * TODO1: Easy - Update blackbox to reflect cache hits. * TODO2: Hard - "pdf" - Implement retrieving PDF version of pages -* TODO3: Easy - Transparent privacy redirects (twitter->nitter, etc) -* TODO4: Easy - Rendering themes to allow customizing of colors ? (if any interest in the feature) +* TODO3: Medium - Transparent privacy redirects (twitter->nitter, etc) +* TODO4: Medium - Rendering themes to allow customizing of colors ? (if any interest in the feature) * TODO5: Medium - Support for Gopher * TODO6: Hard - "search" - Offline search engine to search in the cache (hard, no idea on how to do that) * TODO7: Easy - "share" - send a page by email @@ -72,6 +72,7 @@ To avoid using unstable or too recent libraries, the rule of thumb is that a lib * [Python-xdg](https://www.freedesktop.org/wiki/Software/pyxdg) will place your data, config and cache in place recommended by the XDG specs (usually it’s .local/share/offpunk, .config/offpunk and .cache/offpunk). Without it, look for ~/.offpunk or ~/.config/offpunk while the cache will be in ~/.cache/offpunk/. If installation is done later, some config files may need to be migrated by hand. * [Python-requests](http://python-requests.org) is needed to handle http/https requests natively (apt-get install python3-requests). Without it, http links will be opened in an external browser * [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup) and [Readability](https://github.com/buriy/python-readability) are both needed to render HTML. Without them, HTML will not be rendered or be sent to an external parser like Lynx. (apt-get install python3-bs4 python3-readability or pip3 install readability-lxml) +* [Python-feedparser](https://github.com/kurtmckee/feedparser) will allow parsing of RSS/Atom feeds and thus subscriptions to them. (apt-get install python3-feedparser) * The [ansiwrap library](https://pypi.org/project/ansiwrap/) may result in neater display of text which makes use of ANSI escape codes to control colour (not in Debian?). * The [cryptography library](https://pypi.org/project/cryptography/) will diff --git a/offpunk.py b/offpunk.py index 85285af..5748ae8 100755 --- a/offpunk.py +++ b/offpunk.py @@ -12,6 +12,8 @@ # - Björn Wärmedal # - +_VERSION = "0.2" + import argparse import cmd import cgi @@ -80,7 +82,12 @@ try: _DO_HTML = True except ModuleNotFoundError: _DO_HTML = False -_VERSION = "0.2" + +try: + import feedparser + _DO_FEED = True +except ModuleNotFoundError: + _DO_FEED = False ## Config directories # There are two conflicting xdg modules, we try to work with both @@ -301,6 +308,53 @@ class GemtextRenderer(): rendered_text += wrap_line(line).rstrip() + "\n" return rendered_text, links +class FeedRenderer(): + def __init__(self,content): + self.body = content + self.rendered_text = None + self.links = None + self.title = None + + def get_body(self,readable=True): + if readable: + if not self.rendered_text: + self.rendered_text = self.render_feed(self.body) + return self.rendered_text + else: + return self.render_feed(self.body,full=True) + + def get_links(self): + if not self.links: + self.render_feed(self.body) + return self.links + + def get_title(self): + return "# Title of the feed" + + def render_feed(self,content,full=False): + self.links = [] + if _DO_FEED: + parsed = feedparser.parse(content) + page = self.get_title() + page += "\n" + else: + page = "Please install python-feedparser to handle RSS/Atom feeds\n" + return page + if parsed.bozo: + page += "Invalid RSS feed\n\n" + page += parsed.bozo_exception + else: + for i in parsed.entries: + self.links.append(i.link) + page += "## %s [%s] \n"%(i.title,len(self.links)) + page += "by %s on %s\n\n"%(i.author,i.published) + if full: + page += i.summary + page += "\n\n" + return page + + + class HtmlRenderer(): def __init__(self,content): self.body = content @@ -489,7 +543,7 @@ class HtmlRenderer(): _FORMAT_RENDERERS = { "text/gemini": GemtextRenderer, "text/html" : HtmlRenderer, - "text/xml" : HtmlRenderer + "text/xml" : FeedRenderer } # Offpunk is organized as follow: # - a GeminiClient instance which handles the browsing of GeminiItems (= pages). @@ -750,14 +804,14 @@ class GeminiItem(): path = self.path else: path = self._cache_path - if _HAS_MAGIC: + if path.endswith(".gmi"): + mime = "text/gemini" + elif _HAS_MAGIC: mime = magic.from_file(path,mime=True) else: mime,encoding = mimetypes.guess_type(path,strict=False) #gmi Mimetype is not recognized yet - if not mime and path.endswith(".gmi"): - mime = "text/gemini" - elif not _HAS_MAGIC : + if not mime and not _HAS_MAGIC : print("Cannot guess the mime type of the file. Install Python-magic") if mime.startswith("text") and mime not in _FORMAT_RENDERERS: #by default, we consider it’s gemini except for html