96 lines
2.0 KiB
Haskell
96 lines
2.0 KiB
Haskell
-- {-# LANGUAGE UnicodeSyntax #-}
|
||
|
||
-- Brzozowski derivatives
|
||
|
||
-- | Type of regexes
|
||
data Re a
|
||
= Nil
|
||
| Eps
|
||
| Chr a
|
||
| Cat (Re a) (Re a)
|
||
| Alt (Re a) (Re a)
|
||
| Star (Re a)
|
||
|
||
instance Show a => Show (Re a) where
|
||
show r =
|
||
case r of
|
||
Nil -> "Nil"
|
||
Eps -> "Eps"
|
||
Chr c -> "C " ++ (show c)
|
||
Cat r1 r2 -> (show r1) ++ "; " ++ (show r2)
|
||
Alt r1 r2 -> (show r1) ++ "| " ++ (show r2)
|
||
Star r -> (show r) ++ "* "
|
||
|
||
-- | Check if ε can be derived from a regex
|
||
εderiv :: Eq a => Re a -> Bool
|
||
εderiv r =
|
||
case r of
|
||
Nil -> False
|
||
Eps -> True
|
||
Chr _ -> False
|
||
Cat r1 r2 -> (εderiv r1) && (εderiv r2)
|
||
Alt r1 r2 -> (εderiv r1) || (εderiv r2)
|
||
Star _ -> True
|
||
|
||
-- | Find Brzozowski derivative of a regex with respect to a letter
|
||
deriv :: Eq a => a -> Re a -> Re a
|
||
deriv a r =
|
||
case r of
|
||
Nil -> Nil
|
||
Eps -> Nil
|
||
Chr c ->
|
||
if c==a then Eps
|
||
else Nil
|
||
Cat r1 r2 ->
|
||
if εderiv r1 then deriv a r2
|
||
else Cat (deriv a r1) r2
|
||
Alt r1 r2 -> Alt (deriv a r1) (deriv a r2)
|
||
Star rr -> Cat (deriv a rr) (Star rr)
|
||
|
||
|
||
-- | See if a string matches a regex
|
||
-- exact match only
|
||
match :: Eq a => [a] -> Re a -> Bool
|
||
match [] r =
|
||
if εderiv r then True
|
||
else False
|
||
match (x:xs) r =
|
||
let newr = deriv x r in
|
||
case newr of
|
||
Nil -> False
|
||
_ -> match xs newr
|
||
-- λ> match "" (Star (Chr 'a'))
|
||
-- True
|
||
-- λ> match "aaaaa" (Star (Chr 'a'))
|
||
-- True
|
||
-- λ> match "aaaaab" (Star (Chr 'a'))
|
||
-- False
|
||
|
||
-- Couldn't figure out how to make unicode work
|
||
(/\) = Cat
|
||
(\/) = Alt
|
||
-- (@) = Chr
|
||
|
||
(|=) :: Eq a => [a] -> Re a -> Bool
|
||
(|=) = match
|
||
|
||
-- lower precedence for smaller numbers
|
||
-- infixr 7 @
|
||
infixr 6 /\
|
||
infixr 5 \/
|
||
infixr 4 |=
|
||
|
||
-- ↑0;(↑1│↑0∗);↑1
|
||
-- egr1 = Cat (Chr 0) (Cat (Alt (Chr 1) (Chr 2)) (Chr 3))
|
||
egr1 = (Chr 0) /\ ((Chr 1) \/ (Chr 2)) /\ (Chr 3)
|
||
-- egr1 = (@ 0) /\ ((Chr 1) \/ (Chr 2)) /\ (Chr 3)
|
||
-- λ> match [0,2,3] egr1
|
||
-- True
|
||
-- λ> match [0,1,3] egr1
|
||
-- True
|
||
-- λ> match [0,1,2,3] egr1
|
||
-- False
|
||
--
|
||
-- λ> [0,2,3] |= egr1
|
||
-- True
|