From 872aeffe77f3352603007fee9c3904d826d51eda Mon Sep 17 00:00:00 2001 From: Solene Rapenne Date: Sun, 21 Aug 2022 13:16:17 +0200 Subject: [PATCH] use Network.URI instead of a regex --- app/Vger.hs | 3 +- flake.nix | 3 ++ lib/Gemini.hs | 145 +++++++++++++++++++++++++++----------------------- tests/Main.hs | 57 +------------------- vger.cabal | 4 +- 5 files changed, 85 insertions(+), 127 deletions(-) diff --git a/app/Vger.hs b/app/Vger.hs index 4875cb2..b6d0c77 100644 --- a/app/Vger.hs +++ b/app/Vger.hs @@ -35,8 +35,7 @@ opts = info (sample <**> helper) main :: IO () main = do options <- execParser opts - url <- get_request - let request = parse_to_gemini (parse_url url) + request <- get_request answer <- create_answer request (language options) (baseDir options) (virtualhost options) putStr (make_reply answer) putStr (content answer) diff --git a/flake.nix b/flake.nix index 01a3231..921a969 100644 --- a/flake.nix +++ b/flake.nix @@ -21,6 +21,7 @@ "vger" "base" "optparse-applicative" + "network-uri" ]; }; @@ -37,6 +38,7 @@ "utf8-string" "directory" "text" + "network-uri" ]; }; @@ -50,6 +52,7 @@ "vger" "base" "HUnit" + "network-uri" ]; }; }; diff --git a/lib/Gemini.hs b/lib/Gemini.hs index 0b261fc..eed5d67 100644 --- a/lib/Gemini.hs +++ b/lib/Gemini.hs @@ -4,6 +4,7 @@ import Control.Exception import Data.ByteString.UTF8 import Data.Text import Network.Mime +import Network.URI import System.Directory (doesDirectoryExist, doesFileExist) import Text.Regex import Text.Regex.PCRE @@ -19,17 +20,15 @@ getFile s = do Right ex -> return (Just ex) False -> return Nothing --- return components of the url --- gemini:// | hostname | uri | ? | query -parse_url :: String -> [[String]] -parse_url url = - (sanitize_uri url) =~ "^(gemini:\\/\\/)([^\\/]*)\\/?([^\\?]*)(\\?)?(.*)?(\\r\\n)$" - -- remove any .. in the uri that could escape the location sanitize_uri :: String -> String sanitize_uri path = subRegex (mkRegex "\\.\\.\\/") path "" +remove_crnlf :: String -> String +remove_crnlf uri = + subRegex (mkRegex "\r\n$") uri "" + -- return a file as a string read_file :: String -> IO String read_file path = do @@ -42,12 +41,6 @@ get_request = do stdin <- getContents pure stdin -data Gemini = MkGemini - { domain :: String, - file :: String, - query :: String - } - data Answer = MkAnswer { code :: Int, lang :: String, @@ -59,71 +52,87 @@ get_mime :: String -> String get_mime filename | (fileNameExtensions (pack filename)) == [(pack "gmi" :: Extension)] = "text/gemini" | (fileNameExtensions (pack filename)) == [(pack "gemini" :: Extension)] = "text/gemini" + | (fileNameExtensions (pack filename)) == [(pack "md" :: Extension)] = "text/markdown" | otherwise = (toString (defaultMimeLookup (pack filename))) -parse_to_gemini :: [[String]] -> Gemini -parse_to_gemini tab = - MkGemini - { domain = tab !! 0 !! 2, - file = tab !! 0 !! 3, - query = tab !! 0 !! 5 +geminiInvalidURI :: Answer +geminiInvalidURI = + MkAnswer + { code = 50, + lang = "", + mime = "", + content = "" } -create_answer :: Gemini -> String -> String -> Bool -> IO Answer -create_answer (MkGemini domain file query) language baseDir vhost = do - let baseDir' = - if vhost - then domain ++ "/" ++ baseDir - else baseDir - - content <- getFile $ baseDir' ++ file - case content of - Just x -> - return - ( MkAnswer - { code = 20, - lang = language, - mime = get_mime file, - content = x - } - ) - Nothing -> do - content' <- getFile $ baseDir' ++ file ++ "index.gmi" - case content' of - Just y -> - return - ( MkAnswer - { code = 20, - lang = language, - mime = get_mime $ file ++ "index.gmi", - content = y - } - ) - Nothing -> do - isdir <- doesDirectoryExist $ baseDir' ++ file - if isdir - then +create_answer :: String -> String -> String -> Bool -> IO Answer +create_answer request language baseDir vhost = do + -- parse the URI + case parseURI (remove_crnlf (sanitize_uri request)) of + Nothing -> return geminiInvalidURI + Just uri -> do + -- look for authority info + case uriAuthority uri of + Nothing -> pure geminiInvalidURI + -- request is valid from here + Just auth -> do + let domain = uriRegName auth + let file = uriPath uri + let baseDir' = + if vhost + then baseDir ++ "/" ++ domain + else baseDir + content <- getFile $ baseDir' ++ file + case content of + -- file has been found + Just x -> return ( MkAnswer - { code = 31, - lang = "", - mime = "/" ++ file ++ "/", - content = "" - } - ) - else - return - ( MkAnswer - { code = 50, + { code = 20, lang = language, - mime = "", - content = "error ici " ++ baseDir' ++ file ++ "/index.gmi" + mime = get_mime file, + content = x } ) + Nothing -> do + -- no file + -- try /index.gmi + content' <- getFile $ baseDir' ++ file ++ "index.gmi" + case content' of + Just y -> + return + ( MkAnswer + { code = 20, + lang = language, + mime = get_mime $ file ++ "index.gmi", + content = y + } + ) + Nothing -> do + isdir <- doesDirectoryExist $ baseDir' ++ file + if isdir + then + return + ( MkAnswer + { code = 31, + lang = "", + mime = file ++ "/", + content = "" + } + ) + else + return + ( MkAnswer + { code = 52, + lang = language, + mime = "", + content = "can't find " ++ baseDir' ++ file + } + ) make_reply :: Answer -> String make_reply (MkAnswer code lang content mime) - | lang /= "" = (show code) ++ " " ++ mime ++ "; lang=" ++ lang ++ " \r\n" - | code == 20 && lang == "" = (show code) ++ " " ++ mime ++ "; \r\n" - | code == 31 = "31 " ++ mime ++ "\r\n" - | otherwise = "50\r\n" + | code == 20 && mime == "text/gemini" && lang /= "" = "20 " ++ mime ++ "; lang=" ++ lang ++ " \r\n" + | code == 20 && mime == "text/gemini" && lang == "" = "20 " ++ mime ++ "; \r\n" + | code == 20 && mime /= "text/gemini" = "20 " ++ mime ++ "\r\n" + | code == 31 = "31 " ++ mime ++ "\r\n" + | otherwise = "55\r\n" diff --git a/tests/Main.hs b/tests/Main.hs index 0487c04..8af3e26 100644 --- a/tests/Main.hs +++ b/tests/Main.hs @@ -3,52 +3,6 @@ module Main where import Test.HUnit import Gemini - -regex_1 = TestCase (assertEqual - "single file request" - [["gemini://perso.pw/index.gmi\r\n", "gemini://", "perso.pw", "index.gmi", "", "", "\r\n"]] - (parse_url "gemini://perso.pw/index.gmi\r\n")) - -regex_2 = TestCase (assertEqual - "full request with file and query" - [["gemini://perso.pw/index.gmi?query=value\r\n", "gemini://", "perso.pw", "index.gmi", "?", "query=value", "\r\n"]] - (parse_url "gemini://perso.pw/index.gmi?query=value\r\n")) - -regex_3 = TestCase (assertEqual - "missing newline return" - [] - (parse_url "gemini://perso.pw/")) - -regex_4 = TestCase (assertEqual - "query without a file" - [["gemini://perso.pw/?query=value\r\n", "gemini://", "perso.pw", "", "?", "query=value", "\r\n"]] - (parse_url "gemini://perso.pw/?query=value\r\n")) - -regex_5 = TestCase (assertEqual - "domain only" - [["gemini://perso.pw\r\n", "gemini://", "perso.pw", "", "", "", "\r\n"]] - (parse_url "gemini://perso.pw\r\n")) - -regex_6 = TestCase (assertEqual - "directory requested" - [["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]] - (parse_url "gemini://perso.pw/directory/\r\n")) - -regex_7 = TestCase (assertEqual - "path traversal attempt" - [["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]] - (parse_url "gemini://perso.pw/../../directory/\r\n")) - -regex_8 = TestCase (assertEqual - "directory requested with a query" - [["gemini://perso.pw/directory/?query=value\r\n", "gemini://", "perso.pw", "directory/", "?", "query=value", "\r\n"]] - (parse_url "gemini://perso.pw/directory/?query=value\r\n")) - -regex_9 = TestCase (assertEqual - "directory requested with a query without a trailing slash" - [["gemini://perso.pw/directory?query=value\r\n", "gemini://", "perso.pw", "directory", "?", "query=value", "\r\n"]] - (parse_url "gemini://perso.pw/directory?query=value\r\n")) - regex_10 = TestCase (assertEqual "ensure sanitization works" "gemini://perso.pw/main.gmi/passwd" @@ -75,16 +29,7 @@ mime_4 = TestCase (assertEqual (get_mime "picture.png")) tests = TestList - [ regex_1 - , regex_2 - , regex_3 - , regex_4 - , regex_5 - , regex_6 - , regex_7 - , regex_8 - , regex_9 - , regex_10 + [ regex_10 , mime_1 , mime_2 , mime_3 diff --git a/vger.cabal b/vger.cabal index ffbf494..df6ab83 100644 --- a/vger.cabal +++ b/vger.cabal @@ -22,6 +22,7 @@ library base , directory , mime-types + , network-uri , regex-compat , regex-pcre , text @@ -37,8 +38,8 @@ executable vger Paths_vger build-depends: base + , network-uri , optparse-applicative - , text , vger default-language: Haskell2010 @@ -53,5 +54,6 @@ test-suite test build-depends: HUnit , base + , network-uri , vger default-language: Haskell2010