Normalize rfc-editor.org links #1225 #1223

This commit is contained in:
Peter Bhat Harkins 2023-12-06 09:26:28 -06:00 committed by GitHub
commit 9500e23ef1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 0 deletions

View File

@ -25,6 +25,11 @@ class Utils
# unify arxiv page and pdf based on their identifier https://arxiv.org/help/arxiv_identifier
url = url.sub %r{^arxiv\.org/(?:abs|pdf)/(?<id>\d{4}\.\d{4,5})(?:\.pdf)?}, 'arxiv.org/abs/\k<id>'
# unify rfc-editor.org pages based on their URL structures:
# https://www.rfc-editor.org/rfc/rfc9338.html
# https://www.rfc-editor.org/info/rfc9338
url = url.sub %r{rfc-editor\.org/(?:rfc|info)/rfc(\d+)[^/]*$}, 'rfc-editor.org/rfc/\1'
url = url.sub %r{^m\.youtube\.com/}, "youtube.com/"
url = url.sub %r{^youtu\.be/}, "youtube.com/watch?v="
url = url.sub %r{^youtube\.com/.*v=(?<id>[A-Za-z0-9\-_]+).*}, 'youtube.com/watch?v=\k<id>'

View File

@ -27,6 +27,19 @@ describe "normalize_url" do
"https://arxiv.org/pdf/1234.12345" => "arxiv.org/abs/1234.12345",
"https://arxiv.org/abs/1234.12345.pdf" => "arxiv.org/abs/1234.12345",
"https://www.rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/info/rfc9338" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/info/rfc9338" => "rfc-editor.org/rfc/9338",
"https://youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf",
"https://youtube.com/watch?v=asdf_123" => "youtube.com/watch?v=asdf_123",
"https://www.youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf",