From 3327c4be9b2f0e560945427ae3b77657ac646d8f Mon Sep 17 00:00:00 2001 From: Micah Magruder Date: Fri, 1 Dec 2023 16:39:51 -0500 Subject: [PATCH 1/2] Normalize rfc-editor.org links --- extras/utils.rb | 5 +++++ spec/extras/normalize_url_spec.rb | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/extras/utils.rb b/extras/utils.rb index 63eeee16..c3377a7e 100644 --- a/extras/utils.rb +++ b/extras/utils.rb @@ -25,6 +25,11 @@ class Utils # unify arxiv page and pdf based on their identifier https://arxiv.org/help/arxiv_identifier url = url.sub %r{^arxiv\.org/(?:abs|pdf)/(?\d{4}\.\d{4,5})(?:\.pdf)?}, 'arxiv.org/abs/\k' + # unify rfc-editor.org pages based on their URL structures: + # https://www.rfc-editor.org/rfc/rfc9338.html + # https://www.rfc-editor.org/info/rfc9338 + url = url.sub %r{rfc-editor\.org/(?:rfc|info)/rfc(\d+)[^/]*$}, 'rfc-editor.org/rfc/\1' + url = url.sub %r{^m\.youtube\.com/}, "youtube.com/" url = url.sub %r{^youtu\.be/}, "youtube.com/watch?v=" url = url.sub %r{^youtube\.com/.*v=(?[A-Za-z0-9\-_]+).*}, 'youtube.com/watch?v=\k' diff --git a/spec/extras/normalize_url_spec.rb b/spec/extras/normalize_url_spec.rb index efdc17a7..67a5d430 100644 --- a/spec/extras/normalize_url_spec.rb +++ b/spec/extras/normalize_url_spec.rb @@ -27,6 +27,17 @@ describe "normalize_url" do "https://arxiv.org/pdf/1234.12345" => "arxiv.org/abs/1234.12345", "https://arxiv.org/abs/1234.12345.pdf" => "arxiv.org/abs/1234.12345", + "https://www.rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338", + "https://www.rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338", + "https://www.rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338", + "https://www.rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338", + "https://www.rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338", + "https://rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338", + "https://rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338", + "https://rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338", + "https://rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338", + "https://rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338", + "https://youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf", "https://youtube.com/watch?v=asdf_123" => "youtube.com/watch?v=asdf_123", "https://www.youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf", From 348d7544287f29beba0ae43ad80b19d25c1ae61c Mon Sep 17 00:00:00 2001 From: Micah Magruder Date: Fri, 1 Dec 2023 16:51:06 -0500 Subject: [PATCH 2/2] Add rfc-editor info links to spec test --- spec/extras/normalize_url_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/extras/normalize_url_spec.rb b/spec/extras/normalize_url_spec.rb index 67a5d430..23b03dd6 100644 --- a/spec/extras/normalize_url_spec.rb +++ b/spec/extras/normalize_url_spec.rb @@ -32,11 +32,13 @@ describe "normalize_url" do "https://www.rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338", "https://www.rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338", "https://www.rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338", + "https://www.rfc-editor.org/info/rfc9338" => "rfc-editor.org/rfc/9338", "https://rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338", "https://rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338", "https://rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338", "https://rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338", "https://rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338", + "https://rfc-editor.org/info/rfc9338" => "rfc-editor.org/rfc/9338", "https://youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf", "https://youtube.com/watch?v=asdf_123" => "youtube.com/watch?v=asdf_123",