use Network.URI instead of a regex
This commit is contained in:
parent
9144140aff
commit
872aeffe77
|
@ -35,8 +35,7 @@ opts = info (sample <**> helper)
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
options <- execParser opts
|
options <- execParser opts
|
||||||
url <- get_request
|
request <- get_request
|
||||||
let request = parse_to_gemini (parse_url url)
|
|
||||||
answer <- create_answer request (language options) (baseDir options) (virtualhost options)
|
answer <- create_answer request (language options) (baseDir options) (virtualhost options)
|
||||||
putStr (make_reply answer)
|
putStr (make_reply answer)
|
||||||
putStr (content answer)
|
putStr (content answer)
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
"vger"
|
"vger"
|
||||||
"base"
|
"base"
|
||||||
"optparse-applicative"
|
"optparse-applicative"
|
||||||
|
"network-uri"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -37,6 +38,7 @@
|
||||||
"utf8-string"
|
"utf8-string"
|
||||||
"directory"
|
"directory"
|
||||||
"text"
|
"text"
|
||||||
|
"network-uri"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -50,6 +52,7 @@
|
||||||
"vger"
|
"vger"
|
||||||
"base"
|
"base"
|
||||||
"HUnit"
|
"HUnit"
|
||||||
|
"network-uri"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
145
lib/Gemini.hs
145
lib/Gemini.hs
|
@ -4,6 +4,7 @@ import Control.Exception
|
||||||
import Data.ByteString.UTF8
|
import Data.ByteString.UTF8
|
||||||
import Data.Text
|
import Data.Text
|
||||||
import Network.Mime
|
import Network.Mime
|
||||||
|
import Network.URI
|
||||||
import System.Directory (doesDirectoryExist, doesFileExist)
|
import System.Directory (doesDirectoryExist, doesFileExist)
|
||||||
import Text.Regex
|
import Text.Regex
|
||||||
import Text.Regex.PCRE
|
import Text.Regex.PCRE
|
||||||
|
@ -19,17 +20,15 @@ getFile s = do
|
||||||
Right ex -> return (Just ex)
|
Right ex -> return (Just ex)
|
||||||
False -> return Nothing
|
False -> return Nothing
|
||||||
|
|
||||||
-- return components of the url
|
|
||||||
-- gemini:// | hostname | uri | ? | query
|
|
||||||
parse_url :: String -> [[String]]
|
|
||||||
parse_url url =
|
|
||||||
(sanitize_uri url) =~ "^(gemini:\\/\\/)([^\\/]*)\\/?([^\\?]*)(\\?)?(.*)?(\\r\\n)$"
|
|
||||||
|
|
||||||
-- remove any .. in the uri that could escape the location
|
-- remove any .. in the uri that could escape the location
|
||||||
sanitize_uri :: String -> String
|
sanitize_uri :: String -> String
|
||||||
sanitize_uri path =
|
sanitize_uri path =
|
||||||
subRegex (mkRegex "\\.\\.\\/") path ""
|
subRegex (mkRegex "\\.\\.\\/") path ""
|
||||||
|
|
||||||
|
remove_crnlf :: String -> String
|
||||||
|
remove_crnlf uri =
|
||||||
|
subRegex (mkRegex "\r\n$") uri ""
|
||||||
|
|
||||||
-- return a file as a string
|
-- return a file as a string
|
||||||
read_file :: String -> IO String
|
read_file :: String -> IO String
|
||||||
read_file path = do
|
read_file path = do
|
||||||
|
@ -42,12 +41,6 @@ get_request = do
|
||||||
stdin <- getContents
|
stdin <- getContents
|
||||||
pure stdin
|
pure stdin
|
||||||
|
|
||||||
data Gemini = MkGemini
|
|
||||||
{ domain :: String,
|
|
||||||
file :: String,
|
|
||||||
query :: String
|
|
||||||
}
|
|
||||||
|
|
||||||
data Answer = MkAnswer
|
data Answer = MkAnswer
|
||||||
{ code :: Int,
|
{ code :: Int,
|
||||||
lang :: String,
|
lang :: String,
|
||||||
|
@ -59,71 +52,87 @@ get_mime :: String -> String
|
||||||
get_mime filename
|
get_mime filename
|
||||||
| (fileNameExtensions (pack filename)) == [(pack "gmi" :: Extension)] = "text/gemini"
|
| (fileNameExtensions (pack filename)) == [(pack "gmi" :: Extension)] = "text/gemini"
|
||||||
| (fileNameExtensions (pack filename)) == [(pack "gemini" :: Extension)] = "text/gemini"
|
| (fileNameExtensions (pack filename)) == [(pack "gemini" :: Extension)] = "text/gemini"
|
||||||
|
| (fileNameExtensions (pack filename)) == [(pack "md" :: Extension)] = "text/markdown"
|
||||||
| otherwise = (toString (defaultMimeLookup (pack filename)))
|
| otherwise = (toString (defaultMimeLookup (pack filename)))
|
||||||
|
|
||||||
parse_to_gemini :: [[String]] -> Gemini
|
geminiInvalidURI :: Answer
|
||||||
parse_to_gemini tab =
|
geminiInvalidURI =
|
||||||
MkGemini
|
MkAnswer
|
||||||
{ domain = tab !! 0 !! 2,
|
{ code = 50,
|
||||||
file = tab !! 0 !! 3,
|
lang = "",
|
||||||
query = tab !! 0 !! 5
|
mime = "",
|
||||||
|
content = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
create_answer :: Gemini -> String -> String -> Bool -> IO Answer
|
create_answer :: String -> String -> String -> Bool -> IO Answer
|
||||||
create_answer (MkGemini domain file query) language baseDir vhost = do
|
create_answer request language baseDir vhost = do
|
||||||
let baseDir' =
|
-- parse the URI
|
||||||
if vhost
|
case parseURI (remove_crnlf (sanitize_uri request)) of
|
||||||
then domain ++ "/" ++ baseDir
|
Nothing -> return geminiInvalidURI
|
||||||
else baseDir
|
Just uri -> do
|
||||||
|
-- look for authority info
|
||||||
content <- getFile $ baseDir' ++ file
|
case uriAuthority uri of
|
||||||
case content of
|
Nothing -> pure geminiInvalidURI
|
||||||
Just x ->
|
-- request is valid from here
|
||||||
return
|
Just auth -> do
|
||||||
( MkAnswer
|
let domain = uriRegName auth
|
||||||
{ code = 20,
|
let file = uriPath uri
|
||||||
lang = language,
|
let baseDir' =
|
||||||
mime = get_mime file,
|
if vhost
|
||||||
content = x
|
then baseDir ++ "/" ++ domain
|
||||||
}
|
else baseDir
|
||||||
)
|
content <- getFile $ baseDir' ++ file
|
||||||
Nothing -> do
|
case content of
|
||||||
content' <- getFile $ baseDir' ++ file ++ "index.gmi"
|
-- file has been found
|
||||||
case content' of
|
Just x ->
|
||||||
Just y ->
|
|
||||||
return
|
|
||||||
( MkAnswer
|
|
||||||
{ code = 20,
|
|
||||||
lang = language,
|
|
||||||
mime = get_mime $ file ++ "index.gmi",
|
|
||||||
content = y
|
|
||||||
}
|
|
||||||
)
|
|
||||||
Nothing -> do
|
|
||||||
isdir <- doesDirectoryExist $ baseDir' ++ file
|
|
||||||
if isdir
|
|
||||||
then
|
|
||||||
return
|
return
|
||||||
( MkAnswer
|
( MkAnswer
|
||||||
{ code = 31,
|
{ code = 20,
|
||||||
lang = "",
|
|
||||||
mime = "/" ++ file ++ "/",
|
|
||||||
content = ""
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else
|
|
||||||
return
|
|
||||||
( MkAnswer
|
|
||||||
{ code = 50,
|
|
||||||
lang = language,
|
lang = language,
|
||||||
mime = "",
|
mime = get_mime file,
|
||||||
content = "error ici " ++ baseDir' ++ file ++ "/index.gmi"
|
content = x
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
Nothing -> do
|
||||||
|
-- no file
|
||||||
|
-- try /index.gmi
|
||||||
|
content' <- getFile $ baseDir' ++ file ++ "index.gmi"
|
||||||
|
case content' of
|
||||||
|
Just y ->
|
||||||
|
return
|
||||||
|
( MkAnswer
|
||||||
|
{ code = 20,
|
||||||
|
lang = language,
|
||||||
|
mime = get_mime $ file ++ "index.gmi",
|
||||||
|
content = y
|
||||||
|
}
|
||||||
|
)
|
||||||
|
Nothing -> do
|
||||||
|
isdir <- doesDirectoryExist $ baseDir' ++ file
|
||||||
|
if isdir
|
||||||
|
then
|
||||||
|
return
|
||||||
|
( MkAnswer
|
||||||
|
{ code = 31,
|
||||||
|
lang = "",
|
||||||
|
mime = file ++ "/",
|
||||||
|
content = ""
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else
|
||||||
|
return
|
||||||
|
( MkAnswer
|
||||||
|
{ code = 52,
|
||||||
|
lang = language,
|
||||||
|
mime = "",
|
||||||
|
content = "can't find " ++ baseDir' ++ file
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
make_reply :: Answer -> String
|
make_reply :: Answer -> String
|
||||||
make_reply (MkAnswer code lang content mime)
|
make_reply (MkAnswer code lang content mime)
|
||||||
| lang /= "" = (show code) ++ " " ++ mime ++ "; lang=" ++ lang ++ " \r\n"
|
| code == 20 && mime == "text/gemini" && lang /= "" = "20 " ++ mime ++ "; lang=" ++ lang ++ " \r\n"
|
||||||
| code == 20 && lang == "" = (show code) ++ " " ++ mime ++ "; \r\n"
|
| code == 20 && mime == "text/gemini" && lang == "" = "20 " ++ mime ++ "; \r\n"
|
||||||
| code == 31 = "31 " ++ mime ++ "\r\n"
|
| code == 20 && mime /= "text/gemini" = "20 " ++ mime ++ "\r\n"
|
||||||
| otherwise = "50\r\n"
|
| code == 31 = "31 " ++ mime ++ "\r\n"
|
||||||
|
| otherwise = "55\r\n"
|
||||||
|
|
|
@ -3,52 +3,6 @@ module Main where
|
||||||
import Test.HUnit
|
import Test.HUnit
|
||||||
import Gemini
|
import Gemini
|
||||||
|
|
||||||
|
|
||||||
regex_1 = TestCase (assertEqual
|
|
||||||
"single file request"
|
|
||||||
[["gemini://perso.pw/index.gmi\r\n", "gemini://", "perso.pw", "index.gmi", "", "", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw/index.gmi\r\n"))
|
|
||||||
|
|
||||||
regex_2 = TestCase (assertEqual
|
|
||||||
"full request with file and query"
|
|
||||||
[["gemini://perso.pw/index.gmi?query=value\r\n", "gemini://", "perso.pw", "index.gmi", "?", "query=value", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw/index.gmi?query=value\r\n"))
|
|
||||||
|
|
||||||
regex_3 = TestCase (assertEqual
|
|
||||||
"missing newline return"
|
|
||||||
[]
|
|
||||||
(parse_url "gemini://perso.pw/"))
|
|
||||||
|
|
||||||
regex_4 = TestCase (assertEqual
|
|
||||||
"query without a file"
|
|
||||||
[["gemini://perso.pw/?query=value\r\n", "gemini://", "perso.pw", "", "?", "query=value", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw/?query=value\r\n"))
|
|
||||||
|
|
||||||
regex_5 = TestCase (assertEqual
|
|
||||||
"domain only"
|
|
||||||
[["gemini://perso.pw\r\n", "gemini://", "perso.pw", "", "", "", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw\r\n"))
|
|
||||||
|
|
||||||
regex_6 = TestCase (assertEqual
|
|
||||||
"directory requested"
|
|
||||||
[["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw/directory/\r\n"))
|
|
||||||
|
|
||||||
regex_7 = TestCase (assertEqual
|
|
||||||
"path traversal attempt"
|
|
||||||
[["gemini://perso.pw/directory/\r\n", "gemini://", "perso.pw", "directory/", "", "", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw/../../directory/\r\n"))
|
|
||||||
|
|
||||||
regex_8 = TestCase (assertEqual
|
|
||||||
"directory requested with a query"
|
|
||||||
[["gemini://perso.pw/directory/?query=value\r\n", "gemini://", "perso.pw", "directory/", "?", "query=value", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw/directory/?query=value\r\n"))
|
|
||||||
|
|
||||||
regex_9 = TestCase (assertEqual
|
|
||||||
"directory requested with a query without a trailing slash"
|
|
||||||
[["gemini://perso.pw/directory?query=value\r\n", "gemini://", "perso.pw", "directory", "?", "query=value", "\r\n"]]
|
|
||||||
(parse_url "gemini://perso.pw/directory?query=value\r\n"))
|
|
||||||
|
|
||||||
regex_10 = TestCase (assertEqual
|
regex_10 = TestCase (assertEqual
|
||||||
"ensure sanitization works"
|
"ensure sanitization works"
|
||||||
"gemini://perso.pw/main.gmi/passwd"
|
"gemini://perso.pw/main.gmi/passwd"
|
||||||
|
@ -75,16 +29,7 @@ mime_4 = TestCase (assertEqual
|
||||||
(get_mime "picture.png"))
|
(get_mime "picture.png"))
|
||||||
|
|
||||||
tests = TestList
|
tests = TestList
|
||||||
[ regex_1
|
[ regex_10
|
||||||
, regex_2
|
|
||||||
, regex_3
|
|
||||||
, regex_4
|
|
||||||
, regex_5
|
|
||||||
, regex_6
|
|
||||||
, regex_7
|
|
||||||
, regex_8
|
|
||||||
, regex_9
|
|
||||||
, regex_10
|
|
||||||
, mime_1
|
, mime_1
|
||||||
, mime_2
|
, mime_2
|
||||||
, mime_3
|
, mime_3
|
||||||
|
|
|
@ -22,6 +22,7 @@ library
|
||||||
base
|
base
|
||||||
, directory
|
, directory
|
||||||
, mime-types
|
, mime-types
|
||||||
|
, network-uri
|
||||||
, regex-compat
|
, regex-compat
|
||||||
, regex-pcre
|
, regex-pcre
|
||||||
, text
|
, text
|
||||||
|
@ -37,8 +38,8 @@ executable vger
|
||||||
Paths_vger
|
Paths_vger
|
||||||
build-depends:
|
build-depends:
|
||||||
base
|
base
|
||||||
|
, network-uri
|
||||||
, optparse-applicative
|
, optparse-applicative
|
||||||
, text
|
|
||||||
, vger
|
, vger
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
@ -53,5 +54,6 @@ test-suite test
|
||||||
build-depends:
|
build-depends:
|
||||||
HUnit
|
HUnit
|
||||||
, base
|
, base
|
||||||
|
, network-uri
|
||||||
, vger
|
, vger
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
Loading…
Reference in New Issue