Add youtube duplicate detection (#1174)
This commit is contained in:
parent
64ba31c4d0
commit
f3ab19ee1c
|
@ -237,6 +237,28 @@ class Story < ApplicationRecord
|
||||||
urls = urls2.uniq
|
urls = urls2.uniq
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# www.youtube.com
|
||||||
|
# m.youtube.com
|
||||||
|
# youtube.com redirects to www.youtube.com
|
||||||
|
# youtu.be redirects to www.youtube.com
|
||||||
|
# www.m.youtube.com doesn't work
|
||||||
|
# www.youtu.be doesn't exist
|
||||||
|
# m.youtu.be doesn't exist
|
||||||
|
if /^https?:\/\/((?:www\d*|m)\.)?(youtube\.com|youtu\.be)/i.match(url)
|
||||||
|
urls.each do |u|
|
||||||
|
id = /^https?:\/\/(?:(?:m|www)\.)?(?:youtube\.com\/watch\?v=|youtu\.be\/)([A-z0-9\-_]+)/i
|
||||||
|
.match(u)[1]
|
||||||
|
|
||||||
|
urls2.push "https://www.youtube.com/watch?v=#{id}"
|
||||||
|
# In theory, youtube redirects https://youtube.com to https://www.youtube.com
|
||||||
|
# let's check it just in case
|
||||||
|
urls2.push "https://youtube.com/watch?v=#{id}"
|
||||||
|
urls2.push "https://youtu.be/#{id}"
|
||||||
|
urls2.push "https://m.youtube.com/watch?v=#{id}"
|
||||||
|
end
|
||||||
|
urls = urls2.uniq
|
||||||
|
end
|
||||||
|
|
||||||
# https
|
# https
|
||||||
urls.each do |u|
|
urls.each do |u|
|
||||||
urls2.push u.gsub(/^http:\/\//i, "https://")
|
urls2.push u.gsub(/^http:\/\//i, "https://")
|
||||||
|
|
|
@ -327,6 +327,28 @@ describe Story do
|
||||||
expect(s1.similar_stories).to eq([s2])
|
expect(s1.similar_stories).to eq([s2])
|
||||||
expect(s2.similar_stories).to eq([s1])
|
expect(s2.similar_stories).to eq([s1])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "finds similar www.youtube and youtu.be URLs" do
|
||||||
|
s1 = create(:story,
|
||||||
|
url: 'https://www.youtube.com/watch?v=7Pq-S557XQU',
|
||||||
|
created_at: (Story::RECENT_DAYS + 1).days.ago)
|
||||||
|
|
||||||
|
s2 = create(:story, url: 'https://youtu.be/7Pq-S557XQU')
|
||||||
|
|
||||||
|
expect(s1.similar_stories).to eq([s2])
|
||||||
|
expect(s2.similar_stories).to eq([s1])
|
||||||
|
end
|
||||||
|
|
||||||
|
it "finds similar www.youtube and m.youtube URLs" do
|
||||||
|
s1 = create(:story,
|
||||||
|
url: 'https://www.youtube.com/watch?v=7Pq-S557XQU',
|
||||||
|
created_at: (Story::RECENT_DAYS + 1).days.ago)
|
||||||
|
|
||||||
|
s2 = create(:story, url: 'https://m.youtube.com/watch?v=7Pq-S557XQU')
|
||||||
|
|
||||||
|
expect(s1.similar_stories).to eq([s2])
|
||||||
|
expect(s2.similar_stories).to eq([s1])
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "#calculated_hotness" do
|
describe "#calculated_hotness" do
|
||||||
|
|
Loading…
Reference in New Issue