Basic support for RSS/Atom feeds

2022-02-06 14:08:25 +01:00 · 2022-02-06 14:08:25 +01:00 · 8b1e28427b
parent 81f3ff9f5b
commit 8b1e28427b
3 changed files with 66 additions and 9 deletions
--- a/4
+++ b/4
@ -1,8 +1,10 @@
 # Offpunk History

-## 0.2.1 or 0.3 - Unreleased
+## 0.3 - Unreleased
 New Features:
+- support for RSS and Atom feed (you can subscribe to them)
 - "less full" allows to see the full html page instead of only the article view
+ 	(also works with feeds to see descriptions of each post instead of a simple list)
 - Option --depth to customize your sync. Be warned, more than 1 is crazy.
 - Option --disable-http to allows deep syncing of gemini-only
 Other Small Improvements:
--- a/README.md
+++ b/README.md
@ -47,8 +47,8 @@ Nice to have but not priority:
 * TODO0: Hard - Make a manual within the git repository and have it automatically deployed as a website.
 * TODO1: Easy - Update blackbox to reflect cache hits.
 * TODO2: Hard - "pdf" - Implement retrieving PDF version of pages
-* TODO3: Easy - Transparent privacy redirects (twitter->nitter, etc)
-* TODO4: Easy - Rendering themes to allow customizing of colors ? (if any interest in the feature)
+* TODO3: Medium - Transparent privacy redirects (twitter->nitter, etc)
+* TODO4: Medium - Rendering themes to allow customizing of colors ? (if any interest in the feature)
 * TODO5: Medium - Support for Gopher
 * TODO6: Hard - "search" - Offline search engine to search in the cache (hard, no idea on how to do that)
 * TODO7: Easy - "share" - send a page by email
@ -72,6 +72,7 @@ To avoid using unstable or too recent libraries, the rule of thumb is that a lib
 * [Python-xdg](https://www.freedesktop.org/wiki/Software/pyxdg) will place your data, config and cache in place recommended by the XDG specs (usually it’s .local/share/offpunk, .config/offpunk and .cache/offpunk). Without it, look for ~/.offpunk or ~/.config/offpunk while the cache will be in ~/.cache/offpunk/. If installation is done later, some config files may need to be migrated by hand.
 * [Python-requests](http://python-requests.org) is needed to handle http/https requests natively (apt-get install python3-requests). Without it, http links will be opened in an external browser
 * [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup) and [Readability](https://github.com/buriy/python-readability) are both needed to render HTML. Without them, HTML will not be rendered or be sent to an external parser like Lynx. (apt-get install python3-bs4 python3-readability or pip3 install readability-lxml)
+* [Python-feedparser](https://github.com/kurtmckee/feedparser) will allow parsing of RSS/Atom feeds and thus subscriptions to them. (apt-get install python3-feedparser)
 * The [ansiwrap library](https://pypi.org/project/ansiwrap/) may result in
  neater display of text which makes use of ANSI escape codes to control colour (not in Debian?).
 * The [cryptography library](https://pypi.org/project/cryptography/) will
--- a/offpunk.py
+++ b/offpunk.py
@ -12,6 +12,8 @@
 #  - Björn Wärmedal <bjorn.warmedal@gmail.com>
 #  - <jake@rmgr.dev>

+_VERSION = "0.2"
+
 import argparse
 import cmd
 import cgi
@ -80,7 +82,12 @@ try:
    _DO_HTML = True
 except ModuleNotFoundError:
    _DO_HTML = False
-_VERSION = "0.2"
+
+try:
+    import feedparser
+    _DO_FEED = True
+except ModuleNotFoundError:
+    _DO_FEED = False

 ## Config directories
 # There are two conflicting xdg modules, we try to work with both
@ -301,6 +308,53 @@ class GemtextRenderer():
                rendered_text += wrap_line(line).rstrip() + "\n"
        return rendered_text, links

+class FeedRenderer():
+    def __init__(self,content):
+        self.body = content
+        self.rendered_text = None
+        self.links = None
+        self.title = None
+
+    def get_body(self,readable=True):
+        if readable:
+            if not self.rendered_text:
+                self.rendered_text = self.render_feed(self.body)
+            return self.rendered_text
+        else:
+            return self.render_feed(self.body,full=True)
+        
+    def get_links(self):
+        if not self.links:
+            self.render_feed(self.body)
+        return self.links
+
+    def get_title(self):
+        return "# Title of the feed"
+
+    def render_feed(self,content,full=False):
+        self.links = []
+        if _DO_FEED:
+            parsed = feedparser.parse(content)
+            page = self.get_title()
+            page += "\n"
+        else:
+            page = "Please install python-feedparser to handle RSS/Atom feeds\n"
+            return page
+        if parsed.bozo:
+            page += "Invalid RSS feed\n\n"
+            page += parsed.bozo_exception
+        else:
+            for i in parsed.entries:
+                self.links.append(i.link)
+                page += "## %s [%s] \n"%(i.title,len(self.links))
+                page += "by %s on %s\n\n"%(i.author,i.published)
+                if full:
+                    page += i.summary
+                    page += "\n\n"
+        return page
+
+
+
 class HtmlRenderer():
    def __init__(self,content):
        self.body = content
@ -489,7 +543,7 @@ class HtmlRenderer():
 _FORMAT_RENDERERS = {
    "text/gemini":  GemtextRenderer,
    "text/html" :   HtmlRenderer,
-    "text/xml" : HtmlRenderer
+    "text/xml" : FeedRenderer
 }
 # Offpunk is organized as follow:
 # - a GeminiClient instance which handles the browsing of GeminiItems (= pages).
@ -750,14 +804,14 @@ class GeminiItem():
                path = self.path
            else:
                path = self._cache_path
-            if _HAS_MAGIC:
+            if path.endswith(".gmi"):
+                mime = "text/gemini"
+            elif _HAS_MAGIC:
                mime = magic.from_file(path,mime=True)
            else:
                mime,encoding = mimetypes.guess_type(path,strict=False)
            #gmi Mimetype is not recognized yet
-            if not mime and path.endswith(".gmi"):
-                mime = "text/gemini"
-            elif not _HAS_MAGIC :
+            if not mime and not _HAS_MAGIC :
                print("Cannot guess the mime type of the file. Install Python-magic")
            if mime.startswith("text") and mime not in _FORMAT_RENDERERS:
                #by default, we consider it’s gemini except for html