use Network.URI instead of a regex

This commit is contained in:
Solene Rapenne 2022-08-21 13:16:17 +02:00
parent 9144140aff
commit 872aeffe77
5 changed files with 85 additions and 127 deletions

View File

@ -35,8 +35,7 @@ opts = info (sample <**> helper)
main :: IO ()
main = do
options <- execParser opts
url <- get_request
let request = parse_to_gemini (parse_url url)
request <- get_request
answer <- create_answer request (language options) (baseDir options) (virtualhost options)
putStr (make_reply answer)
putStr (content answer)

View File

@ -21,6 +21,7 @@
"vger"
"base"
"optparse-applicative"
"network-uri"
];
};
@ -37,6 +38,7 @@
"utf8-string"
"directory"
"text"
"network-uri"
];
};
@ -50,6 +52,7 @@
"vger"
"base"
"HUnit"
"network-uri"
];
};
};

View File

@ -4,6 +4,7 @@ import Control.Exception
import Data.ByteString.UTF8
import Data.Text
import Network.Mime
import Network.URI
import System.Directory (doesDirectoryExist, doesFileExist)
import Text.Regex
import Text.Regex.PCRE
@ -19,17 +20,15 @@ getFile s = do
Right ex -> return (Just ex)
False -> return Nothing
-- return components of the url
-- gemini:// | hostname | uri | ? | query
parse_url :: String -> [[String]]
parse_url url =
(sanitize_uri url) =~ "^(gemini:\\/\\/)([^\\/]*)\\/?([^\\?]*)(\\?)?(.*)?(\\r\\n)$"
-- remove any .. in the uri that could escape the location
sanitize_uri :: String -> String
sanitize_uri path =
subRegex (mkRegex "\\.\\.\\/") path ""
remove_crnlf :: String -> String
remove_crnlf uri =
subRegex (mkRegex "\r\n$") uri ""
-- return a file as a string
read_file :: String -> IO String
read_file path = do
@ -42,12 +41,6 @@ get_request = do
stdin <- getContents
pure stdin
data Gemini = MkGemini
{ domain :: String,
file :: String,
query :: String
}
data Answer = MkAnswer
{ code :: Int,
lang :: String,
@ -59,71 +52,87 @@ get_mime :: String -> String
get_mime filename
| (fileNameExtensions (pack filename)) == [(pack "gmi" :: Extension)] = "text/gemini"
| (fileNameExtensions (pack filename)) == [(pack "gemini" :: Extension)] = "text/gemini"
| (fileNameExtensions (pack filename)) == [(pack "md" :: Extension)] = "text/markdown"
| otherwise = (toString (defaultMimeLookup (pack filename)))
parse_to_gemini :: [[String]] -> Gemini
parse_to_gemini tab =
MkGemini
{ domain = tab !! 0 !! 2,
file = tab !! 0 !! 3,
query = tab !! 0 !! 5
geminiInvalidURI :: Answer
geminiInvalidURI =
MkAnswer
{ code = 50,
lang = "",
mime = "",
content = ""
}
create_answer :: Gemini -> String -> String -> Bool -> IO Answer
create_answer (MkGemini domain file query) language baseDir vhost = do
let baseDir' =
if vhost
then domain ++ "/" ++ baseDir
else baseDir
content <- getFile $ baseDir' ++ file
case content of
Just x ->
return
( MkAnswer
{ code = 20,
lang = language,
mime = get_mime file,
content = x
}
)
Nothing -> do
content' <- getFile $ baseDir' ++ file ++ "index.gmi"
case content' of
Just y ->
return
( MkAnswer
{ code = 20,
lang = language,
mime = get_mime $ file ++ "index.gmi",
content = y
}
)
Nothing -> do
isdir <- doesDirectoryExist $ baseDir' ++ file
if isdir
then
create_answer :: String -> String -> String -> Bool -> IO Answer
create_answer request language baseDir vhost = do
-- parse the URI
case parseURI (remove_crnlf (sanitize_uri request)) of
Nothing -> return geminiInvalidURI
Just uri -> do
-- look for authority info
case uriAuthority uri of
Nothing -> pure geminiInvalidURI
-- request is valid from here
Just auth -> do
let domain = uriRegName auth
let file = uriPath uri
let baseDir' =
if vhost
then baseDir ++ "/" ++ domain
else baseDir
content <- getFile $ baseDir' ++ file
case content of
-- file has been found
Just x ->
return
( MkAnswer
{ code = 31,
lang = "",
mime = "/" ++ file ++ "/",
content = ""
}
)
else
return
( MkAnswer
{ code = 50,
{ code = 20,
lang = language,
mime = "",
content = "error ici " ++ baseDir' ++ file ++ "/index.gmi"
mime = get_mime file,
content = x
}
)
Nothing -> do
-- no file
-- try /index.gmi
content' <- getFile $ baseDir' ++ file ++ "index.gmi"
case content' of
Just y ->
return
( MkAnswer
{ code = 20,
lang = language,
mime = get_mime $ file ++ "index.gmi",
content = y
}
)
Nothing -> do
isdir <- doesDirectoryExist $ baseDir' ++ file
if isdir
then
return
( MkAnswer
{ code = 31,
lang = "",
mime = file ++ "/",
content = ""
}
)
else
return
( MkAnswer
{ code = 52,
lang = language,
mime = "",
content = "can't find " ++ baseDir' ++ file
}
)
make_reply :: Answer -> String
make_reply (MkAnswer code lang content mime)
| lang /= "" = (show code) ++ " " ++ mime ++ "; lang=" ++ lang ++ " \r\n"
| code == 20 && lang == "" = (show code) ++ " " ++ mime ++ "; \r\n"
| code == 31 = "31 " ++ mime ++ "\r\n"
| otherwise = "50\r\n"
| code == 20 && mime == "text/gemini" && lang /= "" = "20 " ++ mime ++ "; lang=" ++ lang ++ " \r\n"
| code == 20 && mime == "text/gemini" && lang == "" = "20 " ++ mime ++ "; \r\n"
| code == 20 && mime /= "text/gemini" = "20 " ++ mime ++ "\r\n"
| code == 31 = "31 " ++ mime ++ "\r\n"
| otherwise = "55\r\n"

View File

@ -3,52 +3,6 @@ module Main where
import Test.HUnit
import Gemini
regex_1 = TestCase (assertEqual
"single file request"
[["gemini://perso.pw/index.gmi\r\n", "gemini://", "perso.pw", "index.gmi", "", "", "\r\n"]]
(parse_url "gemini://perso.pw/index.gmi\r\n"))
regex_2 = TestCase (assertEqual
"full request with file and query"
[["gemini://perso.pw/index.gmi?query=value\r\n", "gemini://", "perso.pw", "index.gmi", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/index.gmi?query=value\r\n"))
regex_3 = TestCase (assertEqual
"missing newline return"
[]
(parse_url "gemini://perso.pw/"))
regex_4 = TestCase (assertEqual
"query without a file"
[["gemini://perso.pw/?query=value\r\n", "gemini://", "perso.pw", "", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/?query=value\r\n"))
regex_5 = TestCase (assertEqual
"domain only"
[["gemini://perso.pw\r\n", "gemini://", "perso.pw", "", "", "", "\r\n"]]
(parse_url "gemini://perso.pw\r\n"))
regex_6 = TestCase (assertEqual
"directory requested"
[["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]]
(parse_url "gemini://perso.pw/directory/\r\n"))
regex_7 = TestCase (assertEqual
"path traversal attempt"
[["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]]
(parse_url "gemini://perso.pw/../../directory/\r\n"))
regex_8 = TestCase (assertEqual
"directory requested with a query"
[["gemini://perso.pw/directory/?query=value\r\n", "gemini://", "perso.pw", "directory/", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/directory/?query=value\r\n"))
regex_9 = TestCase (assertEqual
"directory requested with a query without a trailing slash"
[["gemini://perso.pw/directory?query=value\r\n", "gemini://", "perso.pw", "directory", "?", "query=value", "\r\n"]]
(parse_url "gemini://perso.pw/directory?query=value\r\n"))
regex_10 = TestCase (assertEqual
"ensure sanitization works"
"gemini://perso.pw/main.gmi/passwd"
@ -75,16 +29,7 @@ mime_4 = TestCase (assertEqual
(get_mime "picture.png"))
tests = TestList
[ regex_1
, regex_2
, regex_3
, regex_4
, regex_5
, regex_6
, regex_7
, regex_8
, regex_9
, regex_10
[ regex_10
, mime_1
, mime_2
, mime_3

View File

@ -22,6 +22,7 @@ library
base
, directory
, mime-types
, network-uri
, regex-compat
, regex-pcre
, text
@ -37,8 +38,8 @@ executable vger
Paths_vger
build-depends:
base
, network-uri
, optparse-applicative
, text
, vger
default-language: Haskell2010
@ -53,5 +54,6 @@ test-suite test
build-depends:
HUnit
, base
, network-uri
, vger
default-language: Haskell2010