use my diffbot replacement using php-readability

This commit is contained in:
Ben Harris 2022-08-17 15:43:48 -04:00
parent 8fce00add7
commit c3b9cab8a9
1 changed files with 5 additions and 25 deletions

View File

@ -1,26 +1,15 @@
# typed: false
class DiffBot
cattr_accessor :DIFFBOT_API_KEY
# this needs to be overridden in config/initializers/production.rb
@@DIFFBOT_API_KEY = nil
DIFFBOT_API_URL = "http://www.diffbot.com/api/article".freeze
# source https://tildegit.org/ben/tilde/src/branch/master/fulltext/fulltext.php
API_URL = "https://tilde.team/~ben/fulltext/fulltext.php".freeze
def self.get_story_text(story)
if !@@DIFFBOT_API_KEY
return
end
return "" if story.url.nil?
# XXX: diffbot tries to read pdfs as text, so disable for now
if /\.pdf$/i.match?(story.url.to_s)
if story.url.to_s.match(/\.pdf$/i)
return nil
end
db_url = "#{DIFFBOT_API_URL}?token=#{@@DIFFBOT_API_KEY}&url=#{CGI.escape(story.url)}"
db_url = "#{API_URL}?url=#{CGI.escape(story.url)}"
begin
s = Sponge.new
@ -28,16 +17,7 @@ class DiffBot
s.timeout = 45
res = s.fetch(db_url).body
if res.present?
j = JSON.parse(res)
# turn newlines into double newlines, so they become paragraphs
j["text"] = j["text"].to_s.gsub("\n", "\n\n")
while j["text"].include?("\n\n\n")
j["text"].gsub!("\n\n\n", "\n\n")
end
return j["text"]
return res
end
rescue => e
Rails.logger.error "error fetching #{db_url} #{e.backtrace.first} #{e.message}"