playground/sml/thompson-nfa.sml

265 lines
7.0 KiB
Standard ML

structure Re : sig
(* Type of regular expressions. Int is used as type for now *)
datatype re
= Null
| Eps
| Chr of int
| Cat of re * re
| Alt of re * re
| Star of re
(* eqtype re *)
(* https://github.com/smlnj/smlnj/blob/4fd2ef86aa1341e9c43e118ab675738cd3e77135/system/Basis/Implementation/string.sig#L12 *)
val /\ : re * re -> re
val \/ : re * re -> re
end = struct
datatype re
= Null
| Eps
| Chr of int
| Cat of re * re
| Alt of re * re
| Star of re
(* Some 'notations' *)
fun op /\ (x, y) = Cat (x, y)
fun op \/ (x, y) = Alt (x, y)
infixr 8 /\
infixr 9 \/
end
structure Instr :
sig
(* Type of instructions of VM *)
datatype instr
= Char of int
| Halt
| Fork of int
| Jump of int
val toString: instr -> string
end = struct
datatype instr
= Char of int
| Halt
| Fork of int
| Jump of int
fun toString (Char c) = "Char " ^ (Int.toString c)
| toString Halt = "Halt"
| toString (Fork ind) = "Fork " ^ (Int.toString ind)
| toString (Jump ind) = "Jump " ^ (Int.toString ind)
end
structure Prog : sig
(* Type of programs used by VM *)
type prog = Instr.instr list
(* Convert regex to program *)
val compile: Re.re -> prog
val toString: prog -> string
val halted: IntListSet.set -> Instr.instr list -> bool
val ecloseOne_aux: IntListSet.set -> int -> Instr.instr list -> IntListSet.set
val ecloseOne: int -> Instr.instr list -> IntListSet.set
val eclose: IntListSet.set -> Instr.instr list -> IntListSet.set
val one: int -> Instr.instr list -> int -> IntListSet.set
val onea: IntListSet.set -> Instr.instr list -> int -> IntListSet.set
val run_aux: IntListSet.set -> int list -> Instr.instr list -> bool
(* VM simulation *)
val run: prog -> int list -> bool
end = struct
open Instr
open Re
type prog = instr list
fun compile_aux curr Null = [Halt]
| compile_aux curr Eps = []
| compile_aux curr (Chr c) = [Char c]
| compile_aux curr (Cat (r1,r2)) =
let
val p1 = compile_aux curr r1
val n1 = length p1
in
p1 @ (compile_aux n1 r2)
end
| compile_aux curr (Alt (r1,r2)) =
let
val p1 = compile_aux (curr+1) r1
val n1 = length p1
val p2 = compile_aux (curr+1+n1+1) r2
val n2 = length p2
val offset1 = curr+n1+2 (* +2 for jump and next *)
val offset2 = curr+n1+2+n2
in
[Fork offset1] @ p1 @ [Jump offset2] @ p2
end
| compile_aux curr (Star r) =
let
val p = compile_aux (curr+1) r
val n = length p
val offset = curr+1+n+1
in
[Fork offset] @ p @ [Jump curr]
end
fun compile r = (compile_aux 0 r) @ [Halt]
fun toString p =
let
val inds = List.tabulate (length p, fn x=>Int.toString x)
val aug = ListPair.zip (inds, p)
in
foldl
(fn ((ind,i), acc) => acc ^ ind ^ "| " ^ (Instr.toString i) ^ "\n")
"" aug
end
(* halted: {int} -> prog -> bool *)
(* Check whether one of current instructions is a [Halt]. *)
fun halted inds p = IntListSet.exists
(fn ind =>
if ind >= length p then false
else
case (List.nth (p, ind)) of
Halt => true
| _ => false) inds
fun ecloseOne_aux acc ind prog =
if ind >= length prog then IntListSet.empty
else if IntListSet.member (acc, ind) then IntListSet.empty
else
case (List.nth (prog, ind)) of
Char _ => IntListSet.singleton ind
| Fork k =>
let val acc' = IntListSet.add (acc, ind) in
IntListSet.union (ecloseOne_aux acc' (ind+1) prog, ecloseOne_aux acc' k prog)
end
| Jump k =>
let val acc' = IntListSet.add (acc, ind) in
ecloseOne_aux acc' k prog
end
| Halt => IntListSet.singleton ind
(* ecloseOne: int -> prog -> {int} *)
(* Take epsilon-closure of one instruction. *)
fun ecloseOne ind prog = ecloseOne_aux IntListSet.empty ind prog
(* eclose: {int} -> prog -> {int} *)
(* Take epsilon-closure of a set of instructions. *)
fun eclose inds prog = IntListSet.foldl
(fn (ind,acc) => IntListSet.union (acc, ecloseOne ind prog))
IntListSet.empty
inds
(* one: int -> prog -> a -> {int} *)
(* VM simulation for handling a single input symbol for a single instruction/pc. *)
fun one i p a =
if i >= length p then IntListSet.empty
else
case (List.nth (p, i)) of
Char c =>
if a=c then IntListSet.singleton (i+1)
else IntListSet.empty
| _ => IntListSet.empty (* This branch shouldn't be possible *)
(* cis is closure-i's *)
(* onea: {int} -> prog -> int -> {int} *)
(* VM simulation for handling a single input symbol for multiple instructions/pc. *)
fun onea cis p a = IntListSet.foldl
(fn (i,acc) => IntListSet.union (acc, one i p a))
IntListSet.empty cis
(* run_aux: inds -> inp -> prog -> res_bool *)
fun run_aux cis [] p =
if halted cis p then true
else false
| run_aux cis (a::aa) p =
if IntListSet.isEmpty cis then false
else
let
val inds = onea cis p a
in
if halted inds p then true
else
run_aux (eclose inds p) aa p
end
(* Entire VM simulation. *)
fun run p inp = run_aux (ecloseOne 0 p) inp p
end
(*************** Examples *******************)
open Re
open Instr
open Prog
val eg1 = Cat (Chr 1, Chr 2)
(* val eg1 = (Chr 1) /\ (Chr 2) *)
(* compile eg1; *)
(* val it = [Char 1,Char 2,Halt true] : instr list *)
val eg2 = Star (Chr 1)
(* compile eg2; *)
(* val it = [Fork 3,Char 1,Jump 0,Halt true] : instr list *)
val eg3 = Star (Star (Chr 1))
(* compile eg3; *)
(* val it = [Fork 5,Fork 3,Char 1,Jump 0,Jump 0,Halt true] : instr list *)
(* print (Prog.toString (compile eg3)); *)
(* 0| Fork 5 *)
(* 1| Fork 4 *)
(* 2| Char 1 *)
(* 3| Jump 1 *)
(* 4| Jump 0 *)
(* 5| Halt true *)
val eg4 = Cat (Chr 0, Alt (Chr 1, Star (Chr 2)))
(* val eg4 = (Chr 0) /\ ((Chr 1) \/ (Star (Chr 2))) *)
(* compile eg4; *)
(* val it = [Char 0,Fork 3,Char 1,Jump 5,Fork 3,Char 2,Jump 0,Halt true] *)
(* : instr list *)
(* *)
(* print (Prog.toString (compile eg4)); *)
(* 0| Char 0 *)
(* 1| Fork 4 *)
(* 2| Char 1 *)
(* 3| Jump 7 *)
(* 4| Fork 7 *)
(* 5| Char 2 *)
(* 6| Jump 4 *)
(* 7| Halt true *)
val eg5 = Alt (Chr 0, Chr 1)
(* val eg5 = (Chr 0) \/ (Chr 1) *)
(* print (Prog.toString (compile eg5)); *)
(* 0| Fork 3 *)
(* 1| Char 0 *)
(* 2| Jump 4 *)
(* 3| Char 1 *)
(* 4| Halt true *)
val prog1 = compile eg1
val prog2 = compile eg2
val prog3 = compile eg3
val prog4 = compile eg4
val prog5 = compile eg5
(*
- run prog5 [1];
val it = true : bool
- run prog5 [2];
val it = false : bool
- run prog5 [0];
val it = true : bool
- run prog3 [1,1,1,1,1,1,1];
val it = true : bool
- run prog3 [1,1,1,1,1,1,1,2];
val it = false : bool
- run prog3 [];
val it = true : bool
*)