class Utils # URI.parse is not lenient enough def self.normalize_url url return "" if url == "" return nil if url.nil? url = url.dup # copy in case frozen url.slice! %r{#.*$} # remove anchor url.slice! %r{/$} # remove trailing slash url.slice! %r{/index.html$} # remove index.html url = url.sub %r{\.htm$}, ".html" # fix microsoft naming url.slice! %r{https?://} # consider http and https the same url.slice! %r{^(www\d*\.)} # remove www\d* from domain url, *args = url.split(/[&\?]/) # trivia: ?a=1?c=2 is a valid uri if args.any? url += "?" url += { |arg| arg.split("=") } { |arg| arg.join("=") }.join("&") end # unify arxiv page and pdf based on their identifier url = url.sub %r{^arxiv\.org/(?:abs|pdf)/(?\d{4}\.\d{4,5})(?:\.pdf)?}, '\k' url = url.sub %r{^m\.youtube\.com/}, "" url = url.sub %r{^youtu\.be/}, "" url = url.sub %r{^youtube\.com/.*v=(?[A-Za-z0-9\-_]+).*}, '\k' url.sub( %r{^youtube\.com/playlist\?.*list=(?[A-Za-z0-9\-_]+).*}, '\k' ) end def self.random_str(len) str = "" while str.length < len chr = OpenSSL::Random.random_bytes(1) ord = chr.unpack1("C") # 0 9 A Z a z if (ord >= 48 && ord <= 57) || (ord >= 65 && ord <= 90) || (ord >= 97 && ord <= 122) str += chr end end str end def self.silence_stream(*streams) on_hold = streams.collect(&:dup) streams.each do |stream| stream.reopen("/dev/null") stream.sync = true end yield ensure streams.each_with_index do |stream, i| stream.reopen(on_hold[i]) end end end