diff --git a/pvs/factorial.pvs b/pvs/factorial.pvs new file mode 100644 index 0000000..96727bd --- /dev/null +++ b/pvs/factorial.pvs @@ -0,0 +1,9 @@ +THEORY factorial +BEGIN + n: VAR nat + + factorial(n): RECURSIVE posnat = + (IF n = 0 THEN 1 ELSE n * factorial(n-1) ENDIF) + MEASURE n + +END factorial diff --git a/sml/parser-combinator.sml b/sml/parser-combinator.sml new file mode 100644 index 0000000..185f1e1 --- /dev/null +++ b/sml/parser-combinator.sml @@ -0,0 +1,50 @@ +(* +https://www.smlnj.org/doc/smlnj-lib/Util/str-ParserComb.html +https://github.com/smlnj/smlnj/blob/main/smlnj-lib/Util/parser-comb.sml +*) + +datatype 'a re + = Null + | Eps + | Chr of 'a + | Cat of 'a re * 'a re + | Alt of 'a re * 'a re + | Star of 'a re + +fun op /\ (x, y) = Cat (x, y) +fun op \/ (x, y) = Alt (x, y) +infixr 8 /\ +infixr 9 \/ + +(* type 'a result = ('a * string) option *) +(* type 'a parser = string -> 'a result *) + +(* fun chr c [] = NONE *) +(* | chr c (x::xs) = SOME (x, xs) *) +(* chr c : char list -> char result *) +fun chr c = + fn [] => NONE + | (x::xs) => SOME (x, xs) +(* chr #"a" (explode "abcd");; *) +(* val it = SOME (#"a",[#"b",#"c",#"d"]) : (char * char list) option *) + + + +(* chr: char -> char parser *) +(* fun chr (c:char) : char parser = *) +(* let *) +(* val pf: string -> char result = *) +(* fn [] => NONE *) +(* | x::xs => SOME (x, xs) *) +(* in {runP=pf} *) +(* end *) + +(* (#runP (chr #"a")) "abc" *) + +(* fun chr (c:char) = *) +(* let *) +(* val (pf:string->char result) = *) +(* fn [] => NONE *) +(* | x::(xs:string) => SOME (x, xs) *) +(* in {runP=pf} *) +(* end *) diff --git a/sml/thompson-nfa-vec.sml b/sml/thompson-nfa-vec.sml new file mode 100644 index 0000000..a3e9927 --- /dev/null +++ b/sml/thompson-nfa-vec.sml @@ -0,0 +1,199 @@ +structure Re : sig + (* Type of regular expressions. Int is used as type for now *) + datatype re + = Null + | Eps + | Chr of int + | Cat of re * re + | Alt of re * re + | Star of re + (* eqtype re *) + + (* https://github.com/smlnj/smlnj/blob/4fd2ef86aa1341e9c43e118ab675738cd3e77135/system/Basis/Implementation/string.sig#L12 *) + val /\ : (re * re) -> re + val \/ : (re * re) -> re +end = struct + datatype re + = Null + | Eps + | Chr of int + | Cat of re * re + | Alt of re * re + | Star of re + + (* Some 'notations' *) + fun op /\ (x, y) = Cat (x, y) + fun op \/ (x, y) = Alt (x, y) + infixr 8 /\ + infixr 9 \/ +end + +structure Instr : +sig + (* Type of instructions of VM *) + datatype instr + = Char of int + | Halt + | Fork of int + | Jump of int + val toString: instr -> string +end = struct + datatype instr + = Char of int + | Halt + | Fork of int + | Jump of int + + fun toString (Char c) = "Char " ^ (Int.toString c) + | toString Halt = "Halt" + | toString (Fork ind) = "Fork " ^ (Int.toString ind) + | toString (Jump ind) = "Jump " ^ (Int.toString ind) +end + +structure Prog : sig + type prog = Instr.instr list + val compile: Re.re -> prog + val toString: prog -> string + val ecloseOne: int -> prog -> IntListSet.set +end = struct + open Instr + open Re + type prog = instr list + + fun compile_aux curr Null = [Halt] + | compile_aux curr Eps = [] + | compile_aux curr (Chr c) = [Char c] + | compile_aux curr (Cat (r1,r2)) = + let + val p1 = compile_aux curr r1 + val n1 = length p1 + in + p1 @ (compile_aux n1 r2) + end + | compile_aux curr (Alt (r1,r2)) = + let + val p1 = compile_aux (curr+1) r1 + val n1 = length p1 + val p2 = compile_aux (curr+1+n1+1) r2 + val n2 = length p2 + val offset1 = curr+n1+2 (* +2 for jump and next *) + val offset2 = curr+n1+2+n2 + in + [Fork offset1] @ p1 @ [Jump offset2] @ p2 + end + | compile_aux curr (Star r) = + let + val p = compile_aux (curr+1) r + val n = length p + val offset = curr+1+n+1 + in + [Fork offset] @ p @ [Jump curr] + end + fun compile r = (compile_aux 0 r) @ [Halt] + + fun toString p = + let + val inds = List.tabulate (length p, fn x=>Int.toString x) + val aug = ListPair.zip (inds, p) + in + foldl + (fn ((ind,i), acc) => acc ^ ind ^ "| " ^ (Instr.toString i) ^ "\n") + "" aug + end + + + fun ecloseOne_aux acc ind prog = + if ind >= length prog then IntListSet.empty + else if IntListSet.member (acc, ind) then IntListSet.empty + else + case (List.nth (prog, ind)) of + Char _ => IntListSet.singleton ind + | Fork k => + let val acc' = IntListSet.add (acc, ind) in + IntListSet.union (ecloseOne_aux acc' (ind+1) prog, ecloseOne_aux acc' k prog) + end + | Jump k => + let val acc' = IntListSet.add (acc, ind) in + ecloseOne_aux acc' k prog + end + | Halt => IntListSet.singleton ind + (* ecloseOne: int -> prog -> {int} *) + (* Take epsilon-closure of one instruction. *) + fun ecloseOne ind prog = ecloseOne_aux IntListSet.empty ind prog +end + +open Re +open Instr +open Prog + +fun getChars_aux ((ind, Char _)::p) = ind::(getChars_aux p) + | getChars_aux (_::p) = getChars_aux p + | getChars_aux [] = [] +(* getChars: prog -> [int] *) + + +fun getChars' p = + let + val inds = List.tabulate (length p, fn x=>x) + val aug = ListPair.zip (inds, p) + in + getChars_aux aug + end + +(* Find location of all [Char] *) +fun getChars p = + let + val chrs = getChars' p + val indsc = List.tabulate (length chrs, fn x=>x+1) + in + ListPair.zip (chrs, indsc) + end +(* Find location of all [Halt] *) +fun getHalts p = + let + val temp = foldl (fn (x,(acc, ctr)) => + case x of + Halt => (ctr::acc, ctr+1) + | _ => (acc, ctr+1) + ) ([],0) p + in + map (fn x => (x,0)) (#1 temp) + end + +fun getTrTb p = (getHalts p) @ (getChars p) + + +val eg1 = Cat (Chr 1, Chr 2) +val eg2 = Star (Chr 1) +val eg3 = Star (Star (Chr 1)) +val eg4 = Cat (Chr 0, Alt (Chr 1, Star (Chr 2))) +val eg5 = Alt (Chr 0, Chr 1) + +val prog1 = compile eg1 +val prog2 = compile eg2 +val prog3 = compile eg3 +val prog4 = compile eg4 +(* 0| Char 0 *) +(* 1| Fork 4 *) +(* 2| Char 1 *) +(* 3| Jump 7 *) +(* 4| Fork 7 *) +(* 5| Char 2 *) +(* 6| Jump 4 *) +(* 7| Halt *) + +val prog5 = compile eg5 +(* 0| Fork 3 *) +(* 1| Char 0 *) +(* 2| Jump 4 *) +(* 3| Char 1 *) +(* 4| Halt *) + +(* val chrs4 = getChars prog4 *) +(* val halt4 = getHalts prog4 *) +val trtb4 = getTrTb prog4 +(* val trtb4' = map (fn (a,b) => (b,a)) trtb4; *) +val charIdxs = getChars' prog4 +(* val t1 = ecloseOne x prog4 *) + +(* fun foo ind prog *) diff --git a/sml/thompson-nfa.sml b/sml/thompson-nfa.sml new file mode 100644 index 0000000..bba6209 --- /dev/null +++ b/sml/thompson-nfa.sml @@ -0,0 +1,264 @@ +structure Re : sig + (* Type of regular expressions. Int is used as type for now *) + datatype re + = Null + | Eps + | Chr of int + | Cat of re * re + | Alt of re * re + | Star of re + (* eqtype re *) + + (* https://github.com/smlnj/smlnj/blob/4fd2ef86aa1341e9c43e118ab675738cd3e77135/system/Basis/Implementation/string.sig#L12 *) + val /\ : re * re -> re + val \/ : re * re -> re +end = struct + datatype re + = Null + | Eps + | Chr of int + | Cat of re * re + | Alt of re * re + | Star of re + + (* Some 'notations' *) + fun op /\ (x, y) = Cat (x, y) + fun op \/ (x, y) = Alt (x, y) + infixr 8 /\ + infixr 9 \/ +end + +structure Instr : +sig + (* Type of instructions of VM *) + datatype instr + = Char of int + | Halt + | Fork of int + | Jump of int + val toString: instr -> string +end = struct + datatype instr + = Char of int + | Halt + | Fork of int + | Jump of int + + fun toString (Char c) = "Char " ^ (Int.toString c) + | toString Halt = "Halt" + | toString (Fork ind) = "Fork " ^ (Int.toString ind) + | toString (Jump ind) = "Jump " ^ (Int.toString ind) +end + +structure Prog : sig + (* Type of programs used by VM *) + type prog = Instr.instr list + + (* Convert regex to program *) + val compile: Re.re -> prog + + val toString: prog -> string + + val halted: IntListSet.set -> Instr.instr list -> bool + val ecloseOne_aux: IntListSet.set -> int -> Instr.instr list -> IntListSet.set + val ecloseOne: int -> Instr.instr list -> IntListSet.set + val eclose: IntListSet.set -> Instr.instr list -> IntListSet.set + val one: int -> Instr.instr list -> int -> IntListSet.set + val onea: IntListSet.set -> Instr.instr list -> int -> IntListSet.set + val run_aux: IntListSet.set -> int list -> Instr.instr list -> bool + + + (* VM simulation *) + val run: prog -> int list -> bool + +end = struct + + open Instr + open Re + type prog = instr list + + fun compile_aux curr Null = [Halt] + | compile_aux curr Eps = [] + | compile_aux curr (Chr c) = [Char c] + | compile_aux curr (Cat (r1,r2)) = + let + val p1 = compile_aux curr r1 + val n1 = length p1 + in + p1 @ (compile_aux n1 r2) + end + | compile_aux curr (Alt (r1,r2)) = + let + val p1 = compile_aux (curr+1) r1 + val n1 = length p1 + val p2 = compile_aux (curr+1+n1+1) r2 + val n2 = length p2 + val offset1 = curr+n1+2 (* +2 for jump and next *) + val offset2 = curr+n1+2+n2 + in + [Fork offset1] @ p1 @ [Jump offset2] @ p2 + end + | compile_aux curr (Star r) = + let + val p = compile_aux (curr+1) r + val n = length p + val offset = curr+1+n+1 + in + [Fork offset] @ p @ [Jump curr] + end + fun compile r = (compile_aux 0 r) @ [Halt] + + fun toString p = + let + val inds = List.tabulate (length p, fn x=>Int.toString x) + val aug = ListPair.zip (inds, p) + in + foldl + (fn ((ind,i), acc) => acc ^ ind ^ "| " ^ (Instr.toString i) ^ "\n") + "" aug + end + + (* halted: {int} -> prog -> bool *) + (* Check whether one of current instructions is a [Halt]. *) + fun halted inds p = IntListSet.exists + (fn ind => + if ind >= length p then false + else + case (List.nth (p, ind)) of + Halt => true + | _ => false) inds + + fun ecloseOne_aux acc ind prog = + if ind >= length prog then IntListSet.empty + else if IntListSet.member (acc, ind) then IntListSet.empty + else + case (List.nth (prog, ind)) of + Char _ => IntListSet.singleton ind + | Fork k => + let val acc' = IntListSet.add (acc, ind) in + IntListSet.union (ecloseOne_aux acc' (ind+1) prog, ecloseOne_aux acc' k prog) + end + | Jump k => + let val acc' = IntListSet.add (acc, ind) in + ecloseOne_aux acc' k prog + end + | Halt => IntListSet.singleton ind + (* ecloseOne: int -> prog -> {int} *) + (* Take epsilon-closure of one instruction. *) + fun ecloseOne ind prog = ecloseOne_aux IntListSet.empty ind prog + + (* eclose: {int} -> prog -> {int} *) + (* Take epsilon-closure of a set of instructions. *) + fun eclose inds prog = IntListSet.foldl + (fn (ind,acc) => IntListSet.union (acc, ecloseOne ind prog)) + IntListSet.empty + inds + + (* one: int -> prog -> a -> {int} *) + (* VM simulation for handling a single input symbol for a single instruction/pc. *) + fun one i p a = + if i >= length p then IntListSet.empty + else + case (List.nth (p, i)) of + Char c => + if a=c then IntListSet.singleton (i+1) + else IntListSet.empty + | _ => IntListSet.empty (* This branch shouldn't be possible *) + + (* cis is closure-i's *) + (* onea: {int} -> prog -> int -> {int} *) + (* VM simulation for handling a single input symbol for multiple instructions/pc. *) + fun onea cis p a = IntListSet.foldl + (fn (i,acc) => IntListSet.union (acc, one i p a)) + IntListSet.empty cis + + + (* run_aux: inds -> inp -> prog -> res_bool *) + fun run_aux cis [] p = + if halted cis p then true + else false + | run_aux cis (a::aa) p = + if IntListSet.isEmpty cis then false + else + let + val inds = onea cis p a + in + if halted inds p then true + else + run_aux (eclose inds p) aa p + end + (* Entire VM simulation. *) + fun run p inp = run_aux (ecloseOne 0 p) inp p +end + +(*************** Examples *******************) + +open Re +open Instr +open Prog + + +val eg1 = Cat (Chr 1, Chr 2) +(* val eg1 = (Chr 1) /\ (Chr 2) *) +(* compile eg1; *) +(* val it = [Char 1,Char 2,Halt true] : instr list *) +val eg2 = Star (Chr 1) +(* compile eg2; *) +(* val it = [Fork 3,Char 1,Jump 0,Halt true] : instr list *) +val eg3 = Star (Star (Chr 1)) +(* compile eg3; *) +(* val it = [Fork 5,Fork 3,Char 1,Jump 0,Jump 0,Halt true] : instr list *) +(* print (Prog.toString (compile eg3)); *) +(* 0| Fork 5 *) +(* 1| Fork 4 *) +(* 2| Char 1 *) +(* 3| Jump 1 *) +(* 4| Jump 0 *) +(* 5| Halt true *) +val eg4 = Cat (Chr 0, Alt (Chr 1, Star (Chr 2))) +(* val eg4 = (Chr 0) /\ ((Chr 1) \/ (Star (Chr 2))) *) +(* compile eg4; *) +(* val it = [Char 0,Fork 3,Char 1,Jump 5,Fork 3,Char 2,Jump 0,Halt true] *) +(* : instr list *) +(* *) +(* print (Prog.toString (compile eg4)); *) +(* 0| Char 0 *) +(* 1| Fork 4 *) +(* 2| Char 1 *) +(* 3| Jump 7 *) +(* 4| Fork 7 *) +(* 5| Char 2 *) +(* 6| Jump 4 *) +(* 7| Halt true *) +val eg5 = Alt (Chr 0, Chr 1) +(* val eg5 = (Chr 0) \/ (Chr 1) *) +(* print (Prog.toString (compile eg5)); *) +(* 0| Fork 3 *) +(* 1| Char 0 *) +(* 2| Jump 4 *) +(* 3| Char 1 *) +(* 4| Halt true *) + + +val prog1 = compile eg1 +val prog2 = compile eg2 +val prog3 = compile eg3 +val prog4 = compile eg4 +val prog5 = compile eg5 + +(* + - run prog5 [1]; +val it = true : bool + - run prog5 [2]; +val it = false : bool + - run prog5 [0]; +val it = true : bool + - run prog3 [1,1,1,1,1,1,1]; +val it = true : bool + - run prog3 [1,1,1,1,1,1,1,2]; +val it = false : bool + - run prog3 []; +val it = true : bool +*) + +