From f3ab19ee1ce5067b3264dc1dd46225a092784e3a Mon Sep 17 00:00:00 2001 From: Patryk Date: Wed, 26 Apr 2023 15:21:05 +0200 Subject: [PATCH] Add youtube duplicate detection (#1174) --- app/models/story.rb | 22 ++++++++++++++++++++++ spec/models/story_spec.rb | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/app/models/story.rb b/app/models/story.rb index c4ad8c5a..7d4f3f15 100644 --- a/app/models/story.rb +++ b/app/models/story.rb @@ -237,6 +237,28 @@ class Story < ApplicationRecord urls = urls2.uniq end + # www.youtube.com + # m.youtube.com + # youtube.com redirects to www.youtube.com + # youtu.be redirects to www.youtube.com + # www.m.youtube.com doesn't work + # www.youtu.be doesn't exist + # m.youtu.be doesn't exist + if /^https?:\/\/((?:www\d*|m)\.)?(youtube\.com|youtu\.be)/i.match(url) + urls.each do |u| + id = /^https?:\/\/(?:(?:m|www)\.)?(?:youtube\.com\/watch\?v=|youtu\.be\/)([A-z0-9\-_]+)/i + .match(u)[1] + + urls2.push "https://www.youtube.com/watch?v=#{id}" + # In theory, youtube redirects https://youtube.com to https://www.youtube.com + # let's check it just in case + urls2.push "https://youtube.com/watch?v=#{id}" + urls2.push "https://youtu.be/#{id}" + urls2.push "https://m.youtube.com/watch?v=#{id}" + end + urls = urls2.uniq + end + # https urls.each do |u| urls2.push u.gsub(/^http:\/\//i, "https://") diff --git a/spec/models/story_spec.rb b/spec/models/story_spec.rb index 8e8f2702..071c74f5 100644 --- a/spec/models/story_spec.rb +++ b/spec/models/story_spec.rb @@ -327,6 +327,28 @@ describe Story do expect(s1.similar_stories).to eq([s2]) expect(s2.similar_stories).to eq([s1]) end + + it "finds similar www.youtube and youtu.be URLs" do + s1 = create(:story, + url: 'https://www.youtube.com/watch?v=7Pq-S557XQU', + created_at: (Story::RECENT_DAYS + 1).days.ago) + + s2 = create(:story, url: 'https://youtu.be/7Pq-S557XQU') + + expect(s1.similar_stories).to eq([s2]) + expect(s2.similar_stories).to eq([s1]) + end + + it "finds similar www.youtube and m.youtube URLs" do + s1 = create(:story, + url: 'https://www.youtube.com/watch?v=7Pq-S557XQU', + created_at: (Story::RECENT_DAYS + 1).days.ago) + + s2 = create(:story, url: 'https://m.youtube.com/watch?v=7Pq-S557XQU') + + expect(s1.similar_stories).to eq([s2]) + expect(s2.similar_stories).to eq([s1]) + end end describe "#calculated_hotness" do