use my diffbot replacement using php-readability
This commit is contained in:
parent
8fce00add7
commit
c3b9cab8a9
|
@ -1,26 +1,15 @@
|
||||||
# typed: false
|
# typed: false
|
||||||
|
|
||||||
class DiffBot
|
class DiffBot
|
||||||
cattr_accessor :DIFFBOT_API_KEY
|
# source https://tildegit.org/ben/tilde/src/branch/master/fulltext/fulltext.php
|
||||||
|
API_URL = "https://tilde.team/~ben/fulltext/fulltext.php".freeze
|
||||||
# this needs to be overridden in config/initializers/production.rb
|
|
||||||
@@DIFFBOT_API_KEY = nil
|
|
||||||
|
|
||||||
DIFFBOT_API_URL = "http://www.diffbot.com/api/article".freeze
|
|
||||||
|
|
||||||
def self.get_story_text(story)
|
def self.get_story_text(story)
|
||||||
if !@@DIFFBOT_API_KEY
|
if story.url.to_s.match(/\.pdf$/i)
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
return "" if story.url.nil?
|
|
||||||
|
|
||||||
# XXX: diffbot tries to read pdfs as text, so disable for now
|
|
||||||
if /\.pdf$/i.match?(story.url.to_s)
|
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
|
||||||
db_url = "#{DIFFBOT_API_URL}?token=#{@@DIFFBOT_API_KEY}&url=#{CGI.escape(story.url)}"
|
db_url = "#{API_URL}?url=#{CGI.escape(story.url)}"
|
||||||
|
|
||||||
begin
|
begin
|
||||||
s = Sponge.new
|
s = Sponge.new
|
||||||
|
@ -28,16 +17,7 @@ class DiffBot
|
||||||
s.timeout = 45
|
s.timeout = 45
|
||||||
res = s.fetch(db_url).body
|
res = s.fetch(db_url).body
|
||||||
if res.present?
|
if res.present?
|
||||||
j = JSON.parse(res)
|
return res
|
||||||
|
|
||||||
# turn newlines into double newlines, so they become paragraphs
|
|
||||||
j["text"] = j["text"].to_s.gsub("\n", "\n\n")
|
|
||||||
|
|
||||||
while j["text"].include?("\n\n\n")
|
|
||||||
j["text"].gsub!("\n\n\n", "\n\n")
|
|
||||||
end
|
|
||||||
|
|
||||||
return j["text"]
|
|
||||||
end
|
end
|
||||||
rescue => e
|
rescue => e
|
||||||
Rails.logger.error "error fetching #{db_url} #{e.backtrace.first} #{e.message}"
|
Rails.logger.error "error fetching #{db_url} #{e.backtrace.first} #{e.message}"
|
||||||
|
|
Loading…
Reference in New Issue