tilde.news/spec/extras/normalize_url_spec.rb

64 lines
3.4 KiB
Ruby

# typed: false
require "rails_helper"
describe "normalize_url" do
{
"https://example.com" => "example.com", # basic
"http://www.e.com" => "e.com", # http + https same
"https://e.com" => "e.com", # short domain, for easier examples
"https://e.com/" => "e.com", # trailing slash
"https://e.com#foo" => "e.com", # anchor
"https://e.com/#foo" => "e.com", # trailing slash and anchor
"https://www.e.com" => "e.com", # remove www.
"https://www4.e.com" => "e.com", # remove www4.
"https://web.e.com" => "web.e.com", # keep web.
"https://foo.e.com" => "foo.e.com", # keep subdomains
"https://e.co.uk" => "e.co.uk", # keep other TLDs
"https://e.com/index.html" => "e.com", # remove index.html
"https://e.com/asdf.html" => "e.com/asdf.html", # end .html ok
"https://e.com/asdf.htm" => "e.com/asdf.html", # .htm -> .html
"https://e.com?b=2&a=1" => "e.com?a=1&b=2", # sort query args
"https://e.com?a=1?b=2" => "e.com?a=1&b=2", # normalize ? to &
"https://e.com?c=3&a=1?b=2&" => "e.com?a=1&b=2&c=3", # combined; trailing &
"https://www.arxiv.org" => "arxiv.org",
"https://arxiv.org/abs/1234.12345" => "arxiv.org/abs/1234.12345",
"https://arxiv.org/pdf/1234.12345" => "arxiv.org/abs/1234.12345",
"https://arxiv.org/abs/1234.12345.pdf" => "arxiv.org/abs/1234.12345",
"https://www.rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338",
"https://www.rfc-editor.org/info/rfc9338" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.html" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.txt" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.pdf" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338.xml" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/rfc/rfc9338" => "rfc-editor.org/rfc/9338",
"https://rfc-editor.org/info/rfc9338" => "rfc-editor.org/rfc/9338",
"https://youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf",
"https://youtube.com/watch?v=asdf_123" => "youtube.com/watch?v=asdf_123",
"https://www.youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf",
"https://m.youtube.com/watch?v=asdf" => "youtube.com/watch?v=asdf",
"https://youtu.be/asdf" => "youtube.com/watch?v=asdf",
"https://youtube.com/watch?v=asdf&list=foo" => "youtube.com/watch?v=asdf",
"https://youtube.com/playlist?list=foo" => "youtube.com/playlist?list=foo",
"https://youtube.com/playlist?list=foo&index=1" => "youtube.com/playlist?list=foo",
"https://youtube.com/playlist?index=1&list=foo" => "youtube.com/playlist?list=foo",
# no exceptions on real URLs we've seen (output not particularly important)
"http://aaonline.fr/search.php?search&criteria[title-contains]=debian" =>
"aaonline.fr/search.php?criteria[title-contains]=debian&search",
"https://wiki.freebsd.org/VCSWhy (" => "wiki.freebsd.org/VCSWhy ("
}.each do |input, output|
it "normalizes" do
ret = Utils.normalize_url(input)
expect(ret).to eq(output), "normalize_url(#{input}) expected #{output} but got #{ret}"
end
end
end