442 - string 'split'

This commit is contained in:
Kartik K. Agaram 2014-12-19 22:18:41 -08:00
parent f45ea0cb5c
commit 4630b4aee8
2 changed files with 158 additions and 0 deletions

72
mu.arc
View File

@ -97,6 +97,9 @@
byte-address (obj size 1 address t elem '(byte))
string (obj array t elem '(byte)) ; inspired by Go
string-address (obj size 1 address t elem '(string))
string-address-address (obj size 1 address t elem '(string-address))
string-address-array (obj array t elem '(string-address))
string-address-array-address (obj size 1 address t elem '(string-address-array))
character (obj size 1) ; int32 like a Go rune
character-address (obj size 1 address t elem '(character))
; isolating function calls
@ -1472,6 +1475,75 @@
}
(reply idx:integer))
(init-fn split ; string, character -> string-address-array-address
(default-scope:scope-address <- new scope:literal 30:literal)
(s:string-address <- next-input)
(delim:character <- next-input) ; todo: unicode chars
; empty string? return empty array
(len:integer <- length s:string-address/deref)
{ begin
(empty?:boolean <- equal len:integer 0:literal)
(break-unless empty?:boolean)
(result:string-address-array-address <- new string-address-array:literal 0:literal)
(reply result:string-address-array-address)
}
; count #pieces we need room for
(count:integer <- copy 1:literal) ; n delimiters = n+1 pieces
(idx:integer <- copy 0:literal)
{ begin
(idx:integer <- find-next s:string-address delim:character idx:integer)
(done?:boolean <- greater-or-equal idx:integer len:integer)
(break-if done?:boolean)
(idx:integer <- add idx:integer 1:literal)
(count:integer <- add count:integer 1:literal)
(loop)
}
; allocate space
;? (print-primitive (("alloc: " literal)))
;? (print-primitive count:integer)
;? (print-primitive (("\n" literal)))
(result:string-address-array-address <- new string-address-array:literal count:integer)
; repeatedly copy slices (start..end) until delimiter into result[curr-result]
(curr-result:integer <- copy 0:literal)
(start:integer <- copy 0:literal)
{ begin
; while next delim exists
(done?:boolean <- greater-or-equal start:integer len:integer)
(break-if done?:boolean)
(end:integer <- find-next s:string-address delim:character start:integer)
;? (print-primitive (("i: " literal)))
;? (print-primitive start:integer)
;? (print-primitive (("-" literal)))
;? (print-primitive end:integer)
;? (print-primitive ((" => " literal)))
;? (print-primitive curr-result:integer)
;? (print-primitive (("\n" literal)))
; compute length of slice
(slice-len:integer <- subtract end:integer start:integer)
; allocate result[curr-result]
(dest:string-address-address <- index-address result:string-address-array-address/deref curr-result:integer)
(dest:string-address-address/deref <- new string:literal slice-len:integer)
; copy start..end into result[curr-result]
(src-idx:integer <- copy start:integer)
(dest-idx:integer <- copy 0:literal)
{ begin
(end-copy?:boolean <- greater-or-equal src-idx:integer end:integer)
(break-if end-copy?:boolean)
(src:character <- index s:string-address/deref src-idx:integer)
(tmp:character-address <- index-address dest:string-address-address/deref/deref dest-idx:integer)
(tmp:character-address/deref <- copy src:character)
(src-idx:integer <- add src-idx:integer 1:literal)
(dest-idx:integer <- add dest-idx:integer 1:literal)
(loop)
}
; slide over to next slice
(start:integer <- add end:integer 1:literal)
(curr-result:integer <- add curr-result:integer 1:literal)
(loop)
}
(reply result:string-address-array-address)
)
) ; section 100 for system software
;; load all provided files and start at 'main'

View File

@ -3409,6 +3409,92 @@
(if (~is memory*.2 4)
(prn "F - 'find-next' finds second of multiple options"))
(reset)
(new-trace "string-split")
(add-code '((function main [
(1:string-address <- new "a/b")
(2:string-address-array-address <- split 1:string-address ((#\/ literal)))
])))
;? (set dump-trace*)
(run 'main)
(each routine completed-routines*
(aif rep.routine!error (prn "error - " it)))
(let base memory*.2
;? (prn base " " memory*.base)
(if (or (~is memory*.base 2)
;? (do1 nil prn.111)
(~memory-contains-array (memory* (+ base 1)) "a")
;? (do1 nil prn.111)
(~memory-contains-array (memory* (+ base 2)) "b"))
(prn "F - 'split' cuts string at delimiter")))
(reset)
(new-trace "string-split2")
(add-code '((function main [
(1:string-address <- new "a/b/c")
(2:string-address-array-address <- split 1:string-address ((#\/ literal)))
])))
;? (set dump-trace*)
(run 'main)
(each routine completed-routines*
(aif rep.routine!error (prn "error - " it)))
(let base memory*.2
;? (prn base " " memory*.base)
(if (or (~is memory*.base 3)
;? (do1 nil prn.111)
(~memory-contains-array (memory* (+ base 1)) "a")
;? (do1 nil prn.111)
(~memory-contains-array (memory* (+ base 2)) "b")
;? (do1 nil prn.111)
(~memory-contains-array (memory* (+ base 3)) "c"))
(prn "F - 'split' cuts string at two delimiters")))
(reset)
(new-trace "string-split-missing")
(add-code '((function main [
(1:string-address <- new "abc")
(2:string-address-array-address <- split 1:string-address ((#\/ literal)))
])))
(run 'main)
(each routine completed-routines*
(aif rep.routine!error (prn "error - " it)))
(let base memory*.2
(if (or (~is memory*.base 1)
(~memory-contains-array (memory* (+ base 1)) "abc"))
(prn "F - 'split' handles missing delimiter")))
(reset)
(new-trace "string-split-empty")
(add-code '((function main [
(1:string-address <- new "")
(2:string-address-array-address <- split 1:string-address ((#\/ literal)))
])))
;? (= dump-trace* (obj whitelist '("run")))
(run 'main)
(each routine completed-routines*
(aif rep.routine!error (prn "error - " it)))
(let base memory*.2
;? (prn base " " memory*.base)
(if (~is memory*.base 0)
(prn "F - 'split' handles empty string")))
(reset)
(new-trace "string-split-empty-piece")
(add-code '((function main [
(1:string-address <- new "a/b//c")
(2:string-address-array-address <- split 1:string-address ((#\/ literal)))
])))
(run 'main)
(each routine completed-routines*
(aif rep.routine!error (prn "error - " it)))
(let base memory*.2
(if (or (~is memory*.base 4)
(~memory-contains-array (memory* (+ base 1)) "a")
(~memory-contains-array (memory* (+ base 2)) "b")
(~memory-contains-array (memory* (+ base 3)) "")
(~memory-contains-array (memory* (+ base 4)) "c"))
(prn "F - 'split' cuts string at two delimiters")))
) ; section 100 for string utilities
(reset)