use Network.URI instead of a regex

This commit is contained in:
Solene Rapenne 2022-08-21 13:16:17 +02:00
parent 9144140aff
commit 872aeffe77
5 changed files with 85 additions and 127 deletions

View File

@ -35,8 +35,7 @@ opts = info (sample <**> helper)
main :: IO () main :: IO ()
main = do main = do
options <- execParser opts options <- execParser opts
url <- get_request request <- get_request
let request = parse_to_gemini (parse_url url)
answer <- create_answer request (language options) (baseDir options) (virtualhost options) answer <- create_answer request (language options) (baseDir options) (virtualhost options)
putStr (make_reply answer) putStr (make_reply answer)
putStr (content answer) putStr (content answer)

View File

@ -21,6 +21,7 @@
"vger" "vger"
"base" "base"
"optparse-applicative" "optparse-applicative"
"network-uri"
]; ];
}; };
@ -37,6 +38,7 @@
"utf8-string" "utf8-string"
"directory" "directory"
"text" "text"
"network-uri"
]; ];
}; };
@ -50,6 +52,7 @@
"vger" "vger"
"base" "base"
"HUnit" "HUnit"
"network-uri"
]; ];
}; };
}; };

View File

@ -4,6 +4,7 @@ import Control.Exception
import Data.ByteString.UTF8 import Data.ByteString.UTF8
import Data.Text import Data.Text
import Network.Mime import Network.Mime
import Network.URI
import System.Directory (doesDirectoryExist, doesFileExist) import System.Directory (doesDirectoryExist, doesFileExist)
import Text.Regex import Text.Regex
import Text.Regex.PCRE import Text.Regex.PCRE
@ -19,17 +20,15 @@ getFile s = do
Right ex -> return (Just ex) Right ex -> return (Just ex)
False -> return Nothing False -> return Nothing
-- return components of the url
-- gemini:// | hostname | uri | ? | query
parse_url :: String -> [[String]]
parse_url url =
(sanitize_uri url) =~ "^(gemini:\\/\\/)([^\\/]*)\\/?([^\\?]*)(\\?)?(.*)?(\\r\\n)$"
-- remove any .. in the uri that could escape the location -- remove any .. in the uri that could escape the location
sanitize_uri :: String -> String sanitize_uri :: String -> String
sanitize_uri path = sanitize_uri path =
subRegex (mkRegex "\\.\\.\\/") path "" subRegex (mkRegex "\\.\\.\\/") path ""
remove_crnlf :: String -> String
remove_crnlf uri =
subRegex (mkRegex "\r\n$") uri ""
-- return a file as a string -- return a file as a string
read_file :: String -> IO String read_file :: String -> IO String
read_file path = do read_file path = do
@ -42,12 +41,6 @@ get_request = do
stdin <- getContents stdin <- getContents
pure stdin pure stdin
data Gemini = MkGemini
{ domain :: String,
file :: String,
query :: String
}
data Answer = MkAnswer data Answer = MkAnswer
{ code :: Int, { code :: Int,
lang :: String, lang :: String,
@ -59,71 +52,87 @@ get_mime :: String -> String
get_mime filename get_mime filename
| (fileNameExtensions (pack filename)) == [(pack "gmi" :: Extension)] = "text/gemini" | (fileNameExtensions (pack filename)) == [(pack "gmi" :: Extension)] = "text/gemini"
| (fileNameExtensions (pack filename)) == [(pack "gemini" :: Extension)] = "text/gemini" | (fileNameExtensions (pack filename)) == [(pack "gemini" :: Extension)] = "text/gemini"
| (fileNameExtensions (pack filename)) == [(pack "md" :: Extension)] = "text/markdown"
| otherwise = (toString (defaultMimeLookup (pack filename))) | otherwise = (toString (defaultMimeLookup (pack filename)))
parse_to_gemini :: [[String]] -> Gemini geminiInvalidURI :: Answer
parse_to_gemini tab = geminiInvalidURI =
MkGemini MkAnswer
{ domain = tab !! 0 !! 2, { code = 50,
file = tab !! 0 !! 3, lang = "",
query = tab !! 0 !! 5 mime = "",
content = ""
} }
create_answer :: Gemini -> String -> String -> Bool -> IO Answer create_answer :: String -> String -> String -> Bool -> IO Answer
create_answer (MkGemini domain file query) language baseDir vhost = do create_answer request language baseDir vhost = do
let baseDir' = -- parse the URI
if vhost case parseURI (remove_crnlf (sanitize_uri request)) of
then domain ++ "/" ++ baseDir Nothing -> return geminiInvalidURI
else baseDir Just uri -> do
-- look for authority info
content <- getFile $ baseDir' ++ file case uriAuthority uri of
case content of Nothing -> pure geminiInvalidURI
Just x -> -- request is valid from here
return Just auth -> do
( MkAnswer let domain = uriRegName auth
{ code = 20, let file = uriPath uri
lang = language, let baseDir' =
mime = get_mime file, if vhost
content = x then baseDir ++ "/" ++ domain
} else baseDir
) content <- getFile $ baseDir' ++ file
Nothing -> do case content of
content' <- getFile $ baseDir' ++ file ++ "index.gmi" -- file has been found
case content' of Just x ->
Just y ->
return
( MkAnswer
{ code = 20,
lang = language,
mime = get_mime $ file ++ "index.gmi",
content = y
}
)
Nothing -> do
isdir <- doesDirectoryExist $ baseDir' ++ file
if isdir
then
return return
( MkAnswer ( MkAnswer
{ code = 31, { code = 20,
lang = "",
mime = "/" ++ file ++ "/",
content = ""
}
)
else
return
( MkAnswer
{ code = 50,
lang = language, lang = language,
mime = "", mime = get_mime file,
content = "error ici " ++ baseDir' ++ file ++ "/index.gmi" content = x
} }
) )
Nothing -> do
-- no file
-- try /index.gmi
content' <- getFile $ baseDir' ++ file ++ "index.gmi"
case content' of
Just y ->
return
( MkAnswer
{ code = 20,
lang = language,
mime = get_mime $ file ++ "index.gmi",
content = y
}
)
Nothing -> do
isdir <- doesDirectoryExist $ baseDir' ++ file
if isdir
then
return
( MkAnswer
{ code = 31,
lang = "",
mime = file ++ "/",
content = ""
}
)
else
return
( MkAnswer
{ code = 52,
lang = language,
mime = "",
content = "can't find " ++ baseDir' ++ file
}
)
make_reply :: Answer -> String make_reply :: Answer -> String
make_reply (MkAnswer code lang content mime) make_reply (MkAnswer code lang content mime)
| lang /= "" = (show code) ++ " " ++ mime ++ "; lang=" ++ lang ++ " \r\n" | code == 20 && mime == "text/gemini" && lang /= "" = "20 " ++ mime ++ "; lang=" ++ lang ++ " \r\n"
| code == 20 && lang == "" = (show code) ++ " " ++ mime ++ "; \r\n" | code == 20 && mime == "text/gemini" && lang == "" = "20 " ++ mime ++ "; \r\n"
| code == 31 = "31 " ++ mime ++ "\r\n" | code == 20 && mime /= "text/gemini" = "20 " ++ mime ++ "\r\n"
| otherwise = "50\r\n" | code == 31 = "31 " ++ mime ++ "\r\n"
| otherwise = "55\r\n"

View File

@ -3,52 +3,6 @@ module Main where
import Test.HUnit import Test.HUnit
import Gemini import Gemini
regex_1 = TestCase (assertEqual
"single file request"
[["gemini://perso.pw/index.gmi\r\n", "gemini://", "perso.pw", "index.gmi", "", "", "\r\n"]]
(parse_url "gemini://perso.pw/index.gmi\r\n"))
regex_2 = TestCase (assertEqual
"full request with file and query"
[["gemini://perso.pw/index.gmi?query=value\r\n", "gemini://", "perso.pw", "index.gmi", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/index.gmi?query=value\r\n"))
regex_3 = TestCase (assertEqual
"missing newline return"
[]
(parse_url "gemini://perso.pw/"))
regex_4 = TestCase (assertEqual
"query without a file"
[["gemini://perso.pw/?query=value\r\n", "gemini://", "perso.pw", "", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/?query=value\r\n"))
regex_5 = TestCase (assertEqual
"domain only"
[["gemini://perso.pw\r\n", "gemini://", "perso.pw", "", "", "", "\r\n"]]
(parse_url "gemini://perso.pw\r\n"))
regex_6 = TestCase (assertEqual
"directory requested"
[["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]]
(parse_url "gemini://perso.pw/directory/\r\n"))
regex_7 = TestCase (assertEqual
"path traversal attempt"
[["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]]
(parse_url "gemini://perso.pw/../../directory/\r\n"))
regex_8 = TestCase (assertEqual
"directory requested with a query"
[["gemini://perso.pw/directory/?query=value\r\n", "gemini://", "perso.pw", "directory/", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/directory/?query=value\r\n"))
regex_9 = TestCase (assertEqual
"directory requested with a query without a trailing slash"
[["gemini://perso.pw/directory?query=value\r\n", "gemini://", "perso.pw", "directory", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/directory?query=value\r\n"))
regex_10 = TestCase (assertEqual regex_10 = TestCase (assertEqual
"ensure sanitization works" "ensure sanitization works"
"gemini://perso.pw/main.gmi/passwd" "gemini://perso.pw/main.gmi/passwd"
@ -75,16 +29,7 @@ mime_4 = TestCase (assertEqual
(get_mime "picture.png")) (get_mime "picture.png"))
tests = TestList tests = TestList
[ regex_1 [ regex_10
, regex_2
, regex_3
, regex_4
, regex_5
, regex_6
, regex_7
, regex_8
, regex_9
, regex_10
, mime_1 , mime_1
, mime_2 , mime_2
, mime_3 , mime_3

View File

@ -22,6 +22,7 @@ library
base base
, directory , directory
, mime-types , mime-types
, network-uri
, regex-compat , regex-compat
, regex-pcre , regex-pcre
, text , text
@ -37,8 +38,8 @@ executable vger
Paths_vger Paths_vger
build-depends: build-depends:
base base
, network-uri
, optparse-applicative , optparse-applicative
, text
, vger , vger
default-language: Haskell2010 default-language: Haskell2010
@ -53,5 +54,6 @@ test-suite test
build-depends: build-depends:
HUnit HUnit
, base , base
, network-uri
, vger , vger
default-language: Haskell2010 default-language: Haskell2010