200 lines
4.7 KiB
Standard ML
200 lines
4.7 KiB
Standard ML
structure Re : sig
|
|
(* Type of regular expressions. Int is used as type for now *)
|
|
datatype re
|
|
= Null
|
|
| Eps
|
|
| Chr of int
|
|
| Cat of re * re
|
|
| Alt of re * re
|
|
| Star of re
|
|
(* eqtype re *)
|
|
|
|
(* https://github.com/smlnj/smlnj/blob/4fd2ef86aa1341e9c43e118ab675738cd3e77135/system/Basis/Implementation/string.sig#L12 *)
|
|
val /\ : (re * re) -> re
|
|
val \/ : (re * re) -> re
|
|
end = struct
|
|
datatype re
|
|
= Null
|
|
| Eps
|
|
| Chr of int
|
|
| Cat of re * re
|
|
| Alt of re * re
|
|
| Star of re
|
|
|
|
(* Some 'notations' *)
|
|
fun op /\ (x, y) = Cat (x, y)
|
|
fun op \/ (x, y) = Alt (x, y)
|
|
infixr 8 /\
|
|
infixr 9 \/
|
|
end
|
|
|
|
structure Instr :
|
|
sig
|
|
(* Type of instructions of VM *)
|
|
datatype instr
|
|
= Char of int
|
|
| Halt
|
|
| Fork of int
|
|
| Jump of int
|
|
val toString: instr -> string
|
|
end = struct
|
|
datatype instr
|
|
= Char of int
|
|
| Halt
|
|
| Fork of int
|
|
| Jump of int
|
|
|
|
fun toString (Char c) = "Char " ^ (Int.toString c)
|
|
| toString Halt = "Halt"
|
|
| toString (Fork ind) = "Fork " ^ (Int.toString ind)
|
|
| toString (Jump ind) = "Jump " ^ (Int.toString ind)
|
|
end
|
|
|
|
structure Prog : sig
|
|
type prog = Instr.instr list
|
|
val compile: Re.re -> prog
|
|
val toString: prog -> string
|
|
val ecloseOne: int -> prog -> IntListSet.set
|
|
end = struct
|
|
open Instr
|
|
open Re
|
|
type prog = instr list
|
|
|
|
fun compile_aux curr Null = [Halt]
|
|
| compile_aux curr Eps = []
|
|
| compile_aux curr (Chr c) = [Char c]
|
|
| compile_aux curr (Cat (r1,r2)) =
|
|
let
|
|
val p1 = compile_aux curr r1
|
|
val n1 = length p1
|
|
in
|
|
p1 @ (compile_aux n1 r2)
|
|
end
|
|
| compile_aux curr (Alt (r1,r2)) =
|
|
let
|
|
val p1 = compile_aux (curr+1) r1
|
|
val n1 = length p1
|
|
val p2 = compile_aux (curr+1+n1+1) r2
|
|
val n2 = length p2
|
|
val offset1 = curr+n1+2 (* +2 for jump and next *)
|
|
val offset2 = curr+n1+2+n2
|
|
in
|
|
[Fork offset1] @ p1 @ [Jump offset2] @ p2
|
|
end
|
|
| compile_aux curr (Star r) =
|
|
let
|
|
val p = compile_aux (curr+1) r
|
|
val n = length p
|
|
val offset = curr+1+n+1
|
|
in
|
|
[Fork offset] @ p @ [Jump curr]
|
|
end
|
|
fun compile r = (compile_aux 0 r) @ [Halt]
|
|
|
|
fun toString p =
|
|
let
|
|
val inds = List.tabulate (length p, fn x=>Int.toString x)
|
|
val aug = ListPair.zip (inds, p)
|
|
in
|
|
foldl
|
|
(fn ((ind,i), acc) => acc ^ ind ^ "| " ^ (Instr.toString i) ^ "\n")
|
|
"" aug
|
|
end
|
|
|
|
|
|
fun ecloseOne_aux acc ind prog =
|
|
if ind >= length prog then IntListSet.empty
|
|
else if IntListSet.member (acc, ind) then IntListSet.empty
|
|
else
|
|
case (List.nth (prog, ind)) of
|
|
Char _ => IntListSet.singleton ind
|
|
| Fork k =>
|
|
let val acc' = IntListSet.add (acc, ind) in
|
|
IntListSet.union (ecloseOne_aux acc' (ind+1) prog, ecloseOne_aux acc' k prog)
|
|
end
|
|
| Jump k =>
|
|
let val acc' = IntListSet.add (acc, ind) in
|
|
ecloseOne_aux acc' k prog
|
|
end
|
|
| Halt => IntListSet.singleton ind
|
|
(* ecloseOne: int -> prog -> {int} *)
|
|
(* Take epsilon-closure of one instruction. *)
|
|
fun ecloseOne ind prog = ecloseOne_aux IntListSet.empty ind prog
|
|
end
|
|
|
|
open Re
|
|
open Instr
|
|
open Prog
|
|
|
|
fun getChars_aux ((ind, Char _)::p) = ind::(getChars_aux p)
|
|
| getChars_aux (_::p) = getChars_aux p
|
|
| getChars_aux [] = []
|
|
(* getChars: prog -> [int] *)
|
|
|
|
|
|
fun getChars' p =
|
|
let
|
|
val inds = List.tabulate (length p, fn x=>x)
|
|
val aug = ListPair.zip (inds, p)
|
|
in
|
|
getChars_aux aug
|
|
end
|
|
|
|
(* Find location of all [Char] *)
|
|
fun getChars p =
|
|
let
|
|
val chrs = getChars' p
|
|
val indsc = List.tabulate (length chrs, fn x=>x+1)
|
|
in
|
|
ListPair.zip (chrs, indsc)
|
|
end
|
|
(* Find location of all [Halt] *)
|
|
fun getHalts p =
|
|
let
|
|
val temp = foldl (fn (x,(acc, ctr)) =>
|
|
case x of
|
|
Halt => (ctr::acc, ctr+1)
|
|
| _ => (acc, ctr+1)
|
|
) ([],0) p
|
|
in
|
|
map (fn x => (x,0)) (#1 temp)
|
|
end
|
|
|
|
fun getTrTb p = (getHalts p) @ (getChars p)
|
|
|
|
|
|
val eg1 = Cat (Chr 1, Chr 2)
|
|
val eg2 = Star (Chr 1)
|
|
val eg3 = Star (Star (Chr 1))
|
|
val eg4 = Cat (Chr 0, Alt (Chr 1, Star (Chr 2)))
|
|
val eg5 = Alt (Chr 0, Chr 1)
|
|
|
|
val prog1 = compile eg1
|
|
val prog2 = compile eg2
|
|
val prog3 = compile eg3
|
|
val prog4 = compile eg4
|
|
(* 0| Char 0 *)
|
|
(* 1| Fork 4 *)
|
|
(* 2| Char 1 *)
|
|
(* 3| Jump 7 *)
|
|
(* 4| Fork 7 *)
|
|
(* 5| Char 2 *)
|
|
(* 6| Jump 4 *)
|
|
(* 7| Halt *)
|
|
|
|
val prog5 = compile eg5
|
|
(* 0| Fork 3 *)
|
|
(* 1| Char 0 *)
|
|
(* 2| Jump 4 *)
|
|
(* 3| Char 1 *)
|
|
(* 4| Halt *)
|
|
|
|
(* val chrs4 = getChars prog4 *)
|
|
(* val halt4 = getHalts prog4 *)
|
|
val trtb4 = getTrTb prog4
|
|
(* val trtb4' = map (fn (a,b) => (b,a)) trtb4; *)
|
|
val charIdxs = getChars' prog4
|
|
val t1 = ecloseOne x prog4
|
|
|
|
(* fun foo ind prog *)
|