265 lines
7.0 KiB
Standard ML
265 lines
7.0 KiB
Standard ML
structure Re : sig
|
|
(* Type of regular expressions. Int is used as type for now *)
|
|
datatype re
|
|
= Null
|
|
| Eps
|
|
| Chr of int
|
|
| Cat of re * re
|
|
| Alt of re * re
|
|
| Star of re
|
|
(* eqtype re *)
|
|
|
|
(* https://github.com/smlnj/smlnj/blob/4fd2ef86aa1341e9c43e118ab675738cd3e77135/system/Basis/Implementation/string.sig#L12 *)
|
|
val /\ : re * re -> re
|
|
val \/ : re * re -> re
|
|
end = struct
|
|
datatype re
|
|
= Null
|
|
| Eps
|
|
| Chr of int
|
|
| Cat of re * re
|
|
| Alt of re * re
|
|
| Star of re
|
|
|
|
(* Some 'notations' *)
|
|
fun op /\ (x, y) = Cat (x, y)
|
|
fun op \/ (x, y) = Alt (x, y)
|
|
infixr 8 /\
|
|
infixr 9 \/
|
|
end
|
|
|
|
structure Instr :
|
|
sig
|
|
(* Type of instructions of VM *)
|
|
datatype instr
|
|
= Char of int
|
|
| Halt
|
|
| Fork of int
|
|
| Jump of int
|
|
val toString: instr -> string
|
|
end = struct
|
|
datatype instr
|
|
= Char of int
|
|
| Halt
|
|
| Fork of int
|
|
| Jump of int
|
|
|
|
fun toString (Char c) = "Char " ^ (Int.toString c)
|
|
| toString Halt = "Halt"
|
|
| toString (Fork ind) = "Fork " ^ (Int.toString ind)
|
|
| toString (Jump ind) = "Jump " ^ (Int.toString ind)
|
|
end
|
|
|
|
structure Prog : sig
|
|
(* Type of programs used by VM *)
|
|
type prog = Instr.instr list
|
|
|
|
(* Convert regex to program *)
|
|
val compile: Re.re -> prog
|
|
|
|
val toString: prog -> string
|
|
|
|
val halted: IntListSet.set -> Instr.instr list -> bool
|
|
val ecloseOne_aux: IntListSet.set -> int -> Instr.instr list -> IntListSet.set
|
|
val ecloseOne: int -> Instr.instr list -> IntListSet.set
|
|
val eclose: IntListSet.set -> Instr.instr list -> IntListSet.set
|
|
val one: int -> Instr.instr list -> int -> IntListSet.set
|
|
val onea: IntListSet.set -> Instr.instr list -> int -> IntListSet.set
|
|
val run_aux: IntListSet.set -> int list -> Instr.instr list -> bool
|
|
|
|
|
|
(* VM simulation *)
|
|
val run: prog -> int list -> bool
|
|
|
|
end = struct
|
|
|
|
open Instr
|
|
open Re
|
|
type prog = instr list
|
|
|
|
fun compile_aux curr Null = [Halt]
|
|
| compile_aux curr Eps = []
|
|
| compile_aux curr (Chr c) = [Char c]
|
|
| compile_aux curr (Cat (r1,r2)) =
|
|
let
|
|
val p1 = compile_aux curr r1
|
|
val n1 = length p1
|
|
in
|
|
p1 @ (compile_aux n1 r2)
|
|
end
|
|
| compile_aux curr (Alt (r1,r2)) =
|
|
let
|
|
val p1 = compile_aux (curr+1) r1
|
|
val n1 = length p1
|
|
val p2 = compile_aux (curr+1+n1+1) r2
|
|
val n2 = length p2
|
|
val offset1 = curr+n1+2 (* +2 for jump and next *)
|
|
val offset2 = curr+n1+2+n2
|
|
in
|
|
[Fork offset1] @ p1 @ [Jump offset2] @ p2
|
|
end
|
|
| compile_aux curr (Star r) =
|
|
let
|
|
val p = compile_aux (curr+1) r
|
|
val n = length p
|
|
val offset = curr+1+n+1
|
|
in
|
|
[Fork offset] @ p @ [Jump curr]
|
|
end
|
|
fun compile r = (compile_aux 0 r) @ [Halt]
|
|
|
|
fun toString p =
|
|
let
|
|
val inds = List.tabulate (length p, fn x=>Int.toString x)
|
|
val aug = ListPair.zip (inds, p)
|
|
in
|
|
foldl
|
|
(fn ((ind,i), acc) => acc ^ ind ^ "| " ^ (Instr.toString i) ^ "\n")
|
|
"" aug
|
|
end
|
|
|
|
(* halted: {int} -> prog -> bool *)
|
|
(* Check whether one of current instructions is a [Halt]. *)
|
|
fun halted inds p = IntListSet.exists
|
|
(fn ind =>
|
|
if ind >= length p then false
|
|
else
|
|
case (List.nth (p, ind)) of
|
|
Halt => true
|
|
| _ => false) inds
|
|
|
|
fun ecloseOne_aux acc ind prog =
|
|
if ind >= length prog then IntListSet.empty
|
|
else if IntListSet.member (acc, ind) then IntListSet.empty
|
|
else
|
|
case (List.nth (prog, ind)) of
|
|
Char _ => IntListSet.singleton ind
|
|
| Fork k =>
|
|
let val acc' = IntListSet.add (acc, ind) in
|
|
IntListSet.union (ecloseOne_aux acc' (ind+1) prog, ecloseOne_aux acc' k prog)
|
|
end
|
|
| Jump k =>
|
|
let val acc' = IntListSet.add (acc, ind) in
|
|
ecloseOne_aux acc' k prog
|
|
end
|
|
| Halt => IntListSet.singleton ind
|
|
(* ecloseOne: int -> prog -> {int} *)
|
|
(* Take epsilon-closure of one instruction. *)
|
|
fun ecloseOne ind prog = ecloseOne_aux IntListSet.empty ind prog
|
|
|
|
(* eclose: {int} -> prog -> {int} *)
|
|
(* Take epsilon-closure of a set of instructions. *)
|
|
fun eclose inds prog = IntListSet.foldl
|
|
(fn (ind,acc) => IntListSet.union (acc, ecloseOne ind prog))
|
|
IntListSet.empty
|
|
inds
|
|
|
|
(* one: int -> prog -> a -> {int} *)
|
|
(* VM simulation for handling a single input symbol for a single instruction/pc. *)
|
|
fun one i p a =
|
|
if i >= length p then IntListSet.empty
|
|
else
|
|
case (List.nth (p, i)) of
|
|
Char c =>
|
|
if a=c then IntListSet.singleton (i+1)
|
|
else IntListSet.empty
|
|
| _ => IntListSet.empty (* This branch shouldn't be possible *)
|
|
|
|
(* cis is closure-i's *)
|
|
(* onea: {int} -> prog -> int -> {int} *)
|
|
(* VM simulation for handling a single input symbol for multiple instructions/pc. *)
|
|
fun onea cis p a = IntListSet.foldl
|
|
(fn (i,acc) => IntListSet.union (acc, one i p a))
|
|
IntListSet.empty cis
|
|
|
|
|
|
(* run_aux: inds -> inp -> prog -> res_bool *)
|
|
fun run_aux cis [] p =
|
|
if halted cis p then true
|
|
else false
|
|
| run_aux cis (a::aa) p =
|
|
if IntListSet.isEmpty cis then false
|
|
else
|
|
let
|
|
val inds = onea cis p a
|
|
in
|
|
if halted inds p then true
|
|
else
|
|
run_aux (eclose inds p) aa p
|
|
end
|
|
(* Entire VM simulation. *)
|
|
fun run p inp = run_aux (ecloseOne 0 p) inp p
|
|
end
|
|
|
|
(*************** Examples *******************)
|
|
|
|
open Re
|
|
open Instr
|
|
open Prog
|
|
|
|
|
|
val eg1 = Cat (Chr 1, Chr 2)
|
|
(* val eg1 = (Chr 1) /\ (Chr 2) *)
|
|
(* compile eg1; *)
|
|
(* val it = [Char 1,Char 2,Halt true] : instr list *)
|
|
val eg2 = Star (Chr 1)
|
|
(* compile eg2; *)
|
|
(* val it = [Fork 3,Char 1,Jump 0,Halt true] : instr list *)
|
|
val eg3 = Star (Star (Chr 1))
|
|
(* compile eg3; *)
|
|
(* val it = [Fork 5,Fork 3,Char 1,Jump 0,Jump 0,Halt true] : instr list *)
|
|
(* print (Prog.toString (compile eg3)); *)
|
|
(* 0| Fork 5 *)
|
|
(* 1| Fork 4 *)
|
|
(* 2| Char 1 *)
|
|
(* 3| Jump 1 *)
|
|
(* 4| Jump 0 *)
|
|
(* 5| Halt true *)
|
|
val eg4 = Cat (Chr 0, Alt (Chr 1, Star (Chr 2)))
|
|
(* val eg4 = (Chr 0) /\ ((Chr 1) \/ (Star (Chr 2))) *)
|
|
(* compile eg4; *)
|
|
(* val it = [Char 0,Fork 3,Char 1,Jump 5,Fork 3,Char 2,Jump 0,Halt true] *)
|
|
(* : instr list *)
|
|
(* *)
|
|
(* print (Prog.toString (compile eg4)); *)
|
|
(* 0| Char 0 *)
|
|
(* 1| Fork 4 *)
|
|
(* 2| Char 1 *)
|
|
(* 3| Jump 7 *)
|
|
(* 4| Fork 7 *)
|
|
(* 5| Char 2 *)
|
|
(* 6| Jump 4 *)
|
|
(* 7| Halt true *)
|
|
val eg5 = Alt (Chr 0, Chr 1)
|
|
(* val eg5 = (Chr 0) \/ (Chr 1) *)
|
|
(* print (Prog.toString (compile eg5)); *)
|
|
(* 0| Fork 3 *)
|
|
(* 1| Char 0 *)
|
|
(* 2| Jump 4 *)
|
|
(* 3| Char 1 *)
|
|
(* 4| Halt true *)
|
|
|
|
|
|
val prog1 = compile eg1
|
|
val prog2 = compile eg2
|
|
val prog3 = compile eg3
|
|
val prog4 = compile eg4
|
|
val prog5 = compile eg5
|
|
|
|
(*
|
|
- run prog5 [1];
|
|
val it = true : bool
|
|
- run prog5 [2];
|
|
val it = false : bool
|
|
- run prog5 [0];
|
|
val it = true : bool
|
|
- run prog3 [1,1,1,1,1,1,1];
|
|
val it = true : bool
|
|
- run prog3 [1,1,1,1,1,1,1,2];
|
|
val it = false : bool
|
|
- run prog3 [];
|
|
val it = true : bool
|
|
*)
|
|
|
|
|