use my diffbot replacement using php-readability
This commit is contained in:
parent
81c28ceacd
commit
578e9f3fc1
|
@ -1,22 +1,13 @@
|
|||
class DiffBot
|
||||
cattr_accessor :DIFFBOT_API_KEY
|
||||
|
||||
# this needs to be overridden in config/initializers/production.rb
|
||||
@@DIFFBOT_API_KEY = nil
|
||||
|
||||
DIFFBOT_API_URL = "http://www.diffbot.com/api/article".freeze
|
||||
# source https://tildegit.org/ben/tilde/src/branch/master/fulltext/fulltext.php
|
||||
API_URL = "https://tilde.team/~ben/fulltext/fulltext.php".freeze
|
||||
|
||||
def self.get_story_text(story)
|
||||
if !@@DIFFBOT_API_KEY
|
||||
return
|
||||
end
|
||||
|
||||
# XXX: diffbot tries to read pdfs as text, so disable for now
|
||||
if story.url.to_s.match(/\.pdf$/i)
|
||||
return nil
|
||||
end
|
||||
|
||||
db_url = "#{DIFFBOT_API_URL}?token=#{@@DIFFBOT_API_KEY}&url=#{CGI.escape(story.url)}"
|
||||
db_url = "#{API_URL}?url=#{CGI.escape(story.url)}"
|
||||
|
||||
begin
|
||||
s = Sponge.new
|
||||
|
@ -24,16 +15,7 @@ class DiffBot
|
|||
s.timeout = 45
|
||||
res = s.fetch(db_url).body
|
||||
if res.present?
|
||||
j = JSON.parse(res)
|
||||
|
||||
# turn newlines into double newlines, so they become paragraphs
|
||||
j["text"] = j["text"].to_s.gsub("\n", "\n\n")
|
||||
|
||||
while j["text"].match("\n\n\n")
|
||||
j["text"].gsub!("\n\n\n", "\n\n")
|
||||
end
|
||||
|
||||
return j["text"]
|
||||
return res
|
||||
end
|
||||
|
||||
rescue => e
|
||||
|
|
Loading…
Reference in New Issue