Normalize rfc-editor.org links

This commit is contained in:
Micah Magruder 2023-12-01 16:39:51 -05:00
parent 8722f4229c
commit 3327c4be9b
2 changed files with 16 additions and 0 deletions

View File

@ -25,6 +25,11 @@ class Utils
# unify arxiv page and pdf based on their identifier https://arxiv.org/help/arxiv_identifier
url = url.sub %r{^arxiv\.org/(?:abs|pdf)/(?<id>\d{4}\.\d{4,5})(?:\.pdf)?}, 'arxiv.org/abs/\k<id>'
# unify rfc-editor.org pages based on their URL structures:
# https://www.rfc-editor.org/rfc/rfc9338.html
# https://www.rfc-editor.org/info/rfc9338
url = url.sub %r{rfc-editor\.org/(?:rfc|info)/rfc(\d+)[^/]*$}, 'rfc-editor.org/rfc/\1'
url = url.sub %r{^m\.youtube\.com/}, "youtube.com/"
url = url.sub %r{^youtu\.be/}, "youtube.com/watch?v="
url = url.sub %r{^youtube\.com/.*v=(?<id>[A-Za-z0-9\-_]+).*}, 'youtube.com/watch?v=\k<id>'

View File

@ -27,6 +27,17 @@ describe "normalize_url" do
"https://arxiv.org/pdf/1234.12345" => "arxiv.org/abs/1234.12345",
"https://arxiv.org/abs/1234.12345.pdf" => "arxiv.org/abs/1234.12345",
"https://www.rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338",
"https://youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf",
"https://youtube.com/watch?v=asdf_123" => "youtube.com/watch?v=asdf_123",
"https://www.youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf",