playground/haskell/brzozowksi.hs

96 lines
2.0 KiB
Haskell
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- {-# LANGUAGE UnicodeSyntax #-}
-- Brzozowski derivatives
-- | Type of regexes
data Re a
= Nil
| Eps
| Chr a
| Cat (Re a) (Re a)
| Alt (Re a) (Re a)
| Star (Re a)
instance Show a => Show (Re a) where
show r =
case r of
Nil -> "Nil"
Eps -> "Eps"
Chr c -> "C " ++ (show c)
Cat r1 r2 -> (show r1) ++ "; " ++ (show r2)
Alt r1 r2 -> (show r1) ++ "| " ++ (show r2)
Star r -> (show r) ++ "* "
-- | Check if ε can be derived from a regex
εderiv :: Eq a => Re a -> Bool
εderiv r =
case r of
Nil -> False
Eps -> True
Chr _ -> False
Cat r1 r2 -> (εderiv r1) && (εderiv r2)
Alt r1 r2 -> (εderiv r1) || (εderiv r2)
Star _ -> True
-- | Find Brzozowski derivative of a regex with respect to a letter
deriv :: Eq a => a -> Re a -> Re a
deriv a r =
case r of
Nil -> Nil
Eps -> Nil
Chr c ->
if c==a then Eps
else Nil
Cat r1 r2 ->
if εderiv r1 then deriv a r2
else Cat (deriv a r1) r2
Alt r1 r2 -> Alt (deriv a r1) (deriv a r2)
Star rr -> Cat (deriv a rr) (Star rr)
-- | See if a string matches a regex
-- exact match only
match :: Eq a => [a] -> Re a -> Bool
match [] r =
if εderiv r then True
else False
match (x:xs) r =
let newr = deriv x r in
case newr of
Nil -> False
_ -> match xs newr
-- λ> match "" (Star (Chr 'a'))
-- True
-- λ> match "aaaaa" (Star (Chr 'a'))
-- True
-- λ> match "aaaaab" (Star (Chr 'a'))
-- False
-- Couldn't figure out how to make unicode work
(/\) = Cat
(\/) = Alt
-- (@) = Chr
(|=) :: Eq a => [a] -> Re a -> Bool
(|=) = match
-- lower precedence for smaller numbers
-- infixr 7 @
infixr 6 /\
infixr 5 \/
infixr 4 |=
-- ↑0;(↑1│↑0);↑1
-- egr1 = Cat (Chr 0) (Cat (Alt (Chr 1) (Chr 2)) (Chr 3))
egr1 = (Chr 0) /\ ((Chr 1) \/ (Chr 2)) /\ (Chr 3)
-- egr1 = (@ 0) /\ ((Chr 1) \/ (Chr 2)) /\ (Chr 3)
-- λ> match [0,2,3] egr1
-- True
-- λ> match [0,1,3] egr1
-- True
-- λ> match [0,1,2,3] egr1
-- False
--
-- λ> [0,2,3] |= egr1
-- True