mu/061text.mu

1428 lines
30 KiB
Plaintext
Raw Normal View History

# Some useful helpers for dealing with text (arrays of characters)
2015-04-17 18:22:59 +00:00
2016-09-17 17:30:24 +00:00
def equal a:text, b:text -> result:bool [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
an:num, bn:num <- deaddress a, b
2016-11-23 22:01:07 +00:00
address-equal?:boolean <- equal an, bn
return-if address-equal?, true
return-unless a, false
return-unless b, false
2016-09-17 17:28:25 +00:00
a-len:num <- length *a
b-len:num <- length *b
2015-04-03 19:53:33 +00:00
# compare lengths
2017-09-24 01:31:26 +00:00
trace 99, [text-equal], [comparing lengths]
length-equal?:bool <- equal a-len, b-len
return-unless length-equal?, false
2015-04-03 19:53:33 +00:00
# compare each corresponding character
trace 99, [text-equal], [comparing characters]
2016-09-17 17:28:25 +00:00
i:num <- copy 0
2015-04-03 19:53:33 +00:00
{
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal i, a-len
2015-07-29 21:37:57 +00:00
break-if done?
a2:char <- index *a, i
b2:char <- index *b, i
2017-09-24 01:31:26 +00:00
chars-match?:bool <- equal a2, b2
return-unless chars-match?, false
2015-07-29 21:37:57 +00:00
i <- add i, 1
2015-04-03 19:53:33 +00:00
loop
}
return true
2015-04-03 19:53:33 +00:00
]
scenario text-equal-reflexive [
local-scope
x:text <- new [abc]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- equal x, x
]
memory-should-contain [
10 <- 1 # x == x for all x
]
]
scenario text-equal-identical [
local-scope
x:text <- new [abc]
y:text <- new [abc]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- equal x, y
]
memory-should-contain [
10 <- 1 # abc == abc
]
]
scenario text-equal-distinct-lengths [
local-scope
x:text <- new [abc]
y:text <- new [abcd]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- equal x, y
]
memory-should-contain [
10 <- 0 # abc != abcd
]
trace-should-contain [
text-equal: comparing lengths
2015-04-13 17:09:46 +00:00
]
trace-should-not-contain [
text-equal: comparing characters
2015-04-13 17:09:46 +00:00
]
]
scenario text-equal-with-empty [
local-scope
x:text <- new []
y:text <- new [abcd]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- equal x, y
]
memory-should-contain [
10 <- 0 # "" != abcd
]
]
2016-11-23 22:01:07 +00:00
scenario text-equal-with-null [
local-scope
x:text <- new [abcd]
2018-06-17 18:20:53 +00:00
y:text <- copy null
2016-11-23 22:01:07 +00:00
run [
2018-06-17 18:20:53 +00:00
10:bool/raw <- equal x, null
11:bool/raw <- equal null, x
2016-11-23 22:01:07 +00:00
12:bool/raw <- equal x, y
13:bool/raw <- equal y, x
14:bool/raw <- equal y, y
]
memory-should-contain [
10 <- 0
11 <- 0
12 <- 0
13 <- 0
14 <- 1
]
check-trace-count-for-label 0, [error]
]
scenario text-equal-common-lengths-but-distinct [
local-scope
x:text <- new [abc]
y:text <- new [abd]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- equal x, y
]
memory-should-contain [
10 <- 0 # abc != abd
]
]
2015-04-18 03:36:25 +00:00
# A new type to help incrementally construct texts.
container buffer:_elem [
2016-09-17 17:28:25 +00:00
length:num
data:&:@:_elem
2015-04-18 03:36:25 +00:00
]
def new-buffer capacity:num -> result:&:buffer:_elem [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
result <- new {(buffer _elem): type}
*result <- put *result, length:offset, 0
{
break-if capacity
# capacity not provided
capacity <- copy 10
}
data:&:@:_elem <- new _elem:type, capacity
*result <- put *result, data:offset, data
return result
2015-04-18 03:36:25 +00:00
]
2015-04-18 04:51:13 +00:00
def grow-buffer buf:&:buffer:_elem -> buf:&:buffer:_elem [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2015-04-18 04:51:13 +00:00
# double buffer size
olddata:&:@:_elem <- get *buf, data:offset
2016-09-17 17:28:25 +00:00
oldlen:num <- length *olddata
newlen:num <- multiply oldlen, 2
newdata:&:@:_elem <- new _elem:type, newlen
2016-09-15 16:25:52 +00:00
*buf <- put *buf, data:offset, newdata
2015-04-18 04:51:13 +00:00
# copy old contents
2016-09-17 17:28:25 +00:00
i:num <- copy 0
2015-04-18 04:51:13 +00:00
{
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal i, oldlen
2015-07-29 21:37:57 +00:00
break-if done?
src:_elem <- index *olddata, i
*newdata <- put-index *newdata, i, src
2015-07-29 21:37:57 +00:00
i <- add i, 1
2015-04-18 04:51:13 +00:00
loop
}
]
def buffer-full? in:&:buffer:_elem -> result:bool [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-09-17 17:28:25 +00:00
len:num <- get *in, length:offset
s:&:@:_elem <- get *in, data:offset
2016-09-17 17:28:25 +00:00
capacity:num <- length *s
2015-11-19 05:36:36 +00:00
result <- greater-or-equal len, capacity
2015-04-18 04:51:13 +00:00
]
# most broadly applicable definition of append to a buffer
def append buf:&:buffer:_elem, x:_elem -> buf:&:buffer:_elem [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
len:num <- get *buf, length:offset
{
# grow buffer if necessary
full?:bool <- buffer-full? buf
break-unless full?
buf <- grow-buffer buf
}
s:&:@:_elem <- get *buf, data:offset
*s <- put-index *s, len, x
len <- add len, 1
*buf <- put *buf, length:offset, len
]
# most broadly applicable definition of append to a buffer of characters: just
# call to-text
def append buf:&:buffer:char, x:_elem -> buf:&:buffer:char [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
text:text <- to-text x
2017-06-09 07:13:39 +00:00
buf <- append buf, text
]
# specialization for characters that is backspace-aware
def append buf:&:buffer:char, c:char -> buf:&:buffer:char [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-09-17 17:28:25 +00:00
len:num <- get *buf, length:offset
{
# backspace? just drop last character if it exists and return
2016-09-17 17:30:24 +00:00
backspace?:bool <- equal c, 8/backspace
2015-07-29 21:37:57 +00:00
break-unless backspace?
2016-09-17 17:30:24 +00:00
empty?:bool <- lesser-or-equal len, 0
return-if empty?
len <- subtract len, 1
2016-09-15 16:25:52 +00:00
*buf <- put *buf, length:offset, len
return
}
2015-04-18 04:51:13 +00:00
{
# grow buffer if necessary
2016-09-17 17:30:24 +00:00
full?:bool <- buffer-full? buf
2015-07-29 21:37:57 +00:00
break-unless full?
2016-09-15 16:25:52 +00:00
buf <- grow-buffer buf
2015-04-18 04:51:13 +00:00
}
2016-09-15 16:25:52 +00:00
s:text <- get *buf, data:offset
*s <- put-index *s, len, c
len <- add len, 1
2016-09-15 16:25:52 +00:00
*buf <- put *buf, length:offset, len
2015-04-18 04:51:13 +00:00
]
def append buf:&:buffer:_elem, t:&:@:_elem -> buf:&:buffer:_elem [
2016-09-15 16:55:14 +00:00
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-09-17 17:28:25 +00:00
len:num <- length *t
i:num <- copy 0
2016-09-15 16:55:14 +00:00
{
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal i, len
2016-09-15 16:55:14 +00:00
break-if done?
x:_elem <- index *t, i
buf <- append buf, x
2016-09-15 16:55:14 +00:00
i <- add i, 1
loop
}
]
scenario append-to-empty-buffer [
local-scope
x:&:buffer:char <- new-buffer
2015-04-18 04:51:13 +00:00
run [
c:char <- copy 97/a
x <- append x, c
10:num/raw <- get *x, length:offset
s:text <- get *x, data:offset
11:char/raw <- index *s, 0
12:char/raw <- index *s, 1
]
memory-should-contain [
10 <- 1 # buffer length
11 <- 97 # a
12 <- 0 # rest of buffer is empty
]
]
scenario append-to-buffer [
local-scope
x:&:buffer:char <- new-buffer
c:char <- copy 97/a
x <- append x, c
run [
c <- copy 98/b
x <- append x, c
10:num/raw <- get *x, length:offset
s:text <- get *x, data:offset
11:char/raw <- index *s, 0
12:char/raw <- index *s, 1
13:char/raw <- index *s, 2
]
memory-should-contain [
10 <- 2 # buffer length
11 <- 97 # a
12 <- 98 # b
13 <- 0 # rest of buffer is empty
]
]
scenario append-grows-buffer [
local-scope
x:&:buffer:char <- new-buffer 3
s1:text <- get *x, data:offset
x <- append x, [abc] # buffer is now full
s2:text <- get *x, data:offset
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- equal s1, s2
2016-09-17 20:00:39 +00:00
11:@:char/raw <- copy *s2
+buffer-filled
c:char <- copy 100/d
x <- append x, c
s3:text <- get *x, data:offset
2016-09-17 17:30:24 +00:00
20:bool/raw <- equal s1, s3
2016-09-17 17:28:25 +00:00
21:num/raw <- get *x, length:offset
2016-09-17 20:00:39 +00:00
30:@:char/raw <- copy *s3
2015-04-18 04:51:13 +00:00
]
memory-should-contain [
# before +buffer-filled
10 <- 1 # no change in data pointer after original append
11 <- 3 # size of data
12 <- 97 # data
13 <- 98
14 <- 99
# in the end
20 <- 0 # data pointer has grown after second append
21 <- 4 # final length
30 <- 6 # but data's capacity has doubled
31 <- 97 # data
32 <- 98
33 <- 99
34 <- 100
35 <- 0
36 <- 0
2015-04-18 04:51:13 +00:00
]
]
scenario buffer-append-handles-backspace [
local-scope
x:&:buffer:char <- new-buffer
x <- append x, [ab]
run [
c:char <- copy 8/backspace
x <- append x, c
s:text <- buffer-to-array x
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *s
]
memory-should-contain [
10 <- 1 # length
11 <- 97 # contents
12 <- 0
]
]
scenario append-to-buffer-of-non-characters [
local-scope
x:&:buffer:text <- new-buffer 1/capacity
# no errors
]
def buffer-to-array in:&:buffer:_elem -> result:&:@:_elem [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2017-09-24 01:31:26 +00:00
# propagate null buffer
2018-06-17 18:20:53 +00:00
return-unless in, null
2016-09-17 17:28:25 +00:00
len:num <- get *in, length:offset
s:&:@:_elem <- get *in, data:offset
# we can't just return s because it is usually the wrong length
result <- new _elem:type, len
2016-09-17 17:28:25 +00:00
i:num <- copy 0
{
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal i, len
2015-07-29 21:37:57 +00:00
break-if done?
src:_elem <- index *s, i
*result <- put-index *result, i, src
2015-07-29 21:37:57 +00:00
i <- add i, 1
loop
}
]
2017-11-16 00:39:15 +00:00
def blank? x:&:@:_elem -> result:bool [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
return-unless x, true
2017-11-16 00:39:15 +00:00
len:num <- length *x
result <- equal len, 0
]
# Append any number of texts together.
# A later layer also translates calls to this to implicitly call to-text, so
# append to string becomes effectively dynamically typed.
#
# Beware though: this hack restricts how much 'append' can be overridden. Any
# new variants that match:
# append _:text, ___
# will never ever get used.
def append first:text -> result:text [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
buf:&:buffer:char <- new-buffer 30
2017-12-04 07:25:40 +00:00
# append first input
{
break-unless first
buf <- append buf, first
}
2017-12-04 07:25:40 +00:00
# append remaining inputs
2015-04-19 07:13:08 +00:00
{
2017-12-04 07:25:40 +00:00
arg:text, arg-found?:bool <- next-input
2016-09-15 16:55:14 +00:00
break-unless arg-found?
loop-unless arg
buf <- append buf, arg
2015-04-19 07:13:08 +00:00
loop
}
2016-09-15 16:55:14 +00:00
result <- buffer-to-array buf
2015-04-19 07:13:08 +00:00
]
scenario text-append-1 [
local-scope
x:text <- new [hello,]
y:text <- new [ world!]
2015-04-19 07:13:08 +00:00
run [
z:text <- append x, y
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *z
2015-04-19 07:13:08 +00:00
]
memory-should-contain [
10:array:character <- [hello, world!]
2015-04-19 07:13:08 +00:00
]
]
2015-04-20 17:25:02 +00:00
scenario text-append-null [
local-scope
2018-06-17 18:20:53 +00:00
x:text <- copy null
y:text <- new [ world!]
run [
z:text <- append x, y
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *z
]
memory-should-contain [
10:array:character <- [ world!]
]
]
scenario text-append-null-2 [
local-scope
x:text <- new [hello,]
2018-06-17 18:20:53 +00:00
y:text <- copy null
run [
z:text <- append x, y
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *z
]
memory-should-contain [
10:array:character <- [hello,]
]
]
2016-09-15 16:55:14 +00:00
scenario text-append-multiary [
local-scope
x:text <- new [hello, ]
y:text <- new [world]
z:text <- new [!]
2016-09-15 16:55:14 +00:00
run [
z:text <- append x, y, z
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *z
2016-09-15 16:55:14 +00:00
]
memory-should-contain [
10:array:character <- [hello, world!]
]
]
scenario replace-character-in-text [
local-scope
x:text <- new [abc]
2015-08-20 05:13:15 +00:00
run [
x <- replace x, 98/b, 122/z
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *x
2015-08-20 05:13:15 +00:00
]
memory-should-contain [
10:array:character <- [azc]
2015-08-20 05:13:15 +00:00
]
]
2016-09-17 17:28:25 +00:00
def replace s:text, oldc:char, newc:char, from:num/optional -> s:text [
2015-08-20 05:13:15 +00:00
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-09-17 17:28:25 +00:00
len:num <- length *s
i:num <- find-next s, oldc, from
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal i, len
return-if done?
*s <- put-index *s, i, newc
2015-08-20 05:13:15 +00:00
i <- add i, 1
s <- replace s, oldc, newc, i
2015-08-20 05:13:15 +00:00
]
scenario replace-character-at-start [
local-scope
x:text <- new [abc]
2015-08-20 05:13:15 +00:00
run [
x <- replace x, 97/a, 122/z
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *x
2015-08-20 05:13:15 +00:00
]
memory-should-contain [
10:array:character <- [zbc]
2015-08-20 05:13:15 +00:00
]
]
scenario replace-character-at-end [
local-scope
x:text <- new [abc]
2015-08-20 05:13:15 +00:00
run [
x <- replace x, 99/c, 122/z
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *x
2015-08-20 05:13:15 +00:00
]
memory-should-contain [
10:array:character <- [abz]
2015-08-20 05:13:15 +00:00
]
]
scenario replace-character-missing [
local-scope
x:text <- new [abc]
2015-08-20 05:13:15 +00:00
run [
x <- replace x, 100/d, 122/z
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *x
2015-08-20 05:13:15 +00:00
]
memory-should-contain [
10:array:character <- [abc]
2015-08-20 05:13:15 +00:00
]
]
scenario replace-all-characters [
local-scope
x:text <- new [banana]
2015-08-20 05:13:15 +00:00
run [
x <- replace x, 97/a, 122/z
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *x
2015-08-20 05:13:15 +00:00
]
memory-should-contain [
10:array:character <- [bznznz]
2015-08-20 05:13:15 +00:00
]
]
2015-04-20 17:25:02 +00:00
# replace underscores in first with remaining args
def interpolate template:text -> result:text [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs # consume just the template
2015-04-20 17:25:02 +00:00
# compute result-len, space to allocate for result
2016-09-17 17:28:25 +00:00
tem-len:num <- length *template
result-len:num <- copy tem-len
2015-04-20 17:25:02 +00:00
{
2017-12-04 07:25:40 +00:00
# while inputs remain
a:text, arg-received?:bool <- next-input
2015-07-29 21:37:57 +00:00
break-unless arg-received?
# result-len = result-len + arg.length - 1 (for the 'underscore' being replaced)
2016-09-17 17:28:25 +00:00
a-len:num <- length *a
2015-07-29 21:37:57 +00:00
result-len <- add result-len, a-len
result-len <- subtract result-len, 1
2015-04-20 17:25:02 +00:00
loop
}
2017-12-04 07:25:40 +00:00
rewind-inputs
_ <- next-input # skip template
result <- new character:type, result-len
2015-04-20 17:25:02 +00:00
# repeatedly copy sections of template and 'holes' into result
2016-09-17 17:28:25 +00:00
result-idx:num <- copy 0
i:num <- copy 0
2015-04-20 17:25:02 +00:00
{
# while arg received
2017-12-04 07:25:40 +00:00
a:text, arg-received?:bool <- next-input
2015-07-29 21:37:57 +00:00
break-unless arg-received?
2015-04-20 17:25:02 +00:00
# copy template into result until '_'
{
# while i < template.length
2016-09-17 17:30:24 +00:00
tem-done?:bool <- greater-or-equal i, tem-len
break-if tem-done?, +done
2015-04-20 17:25:02 +00:00
# while template[i] != '_'
in:char <- index *template, i
2016-09-17 17:30:24 +00:00
underscore?:bool <- equal in, 95/_
2015-07-29 21:37:57 +00:00
break-if underscore?
2015-04-20 17:25:02 +00:00
# result[result-idx] = template[i]
*result <- put-index *result, result-idx, in
2015-07-29 21:37:57 +00:00
i <- add i, 1
result-idx <- add result-idx, 1
2015-04-20 17:25:02 +00:00
loop
}
# copy 'a' into result
2016-09-17 17:28:25 +00:00
j:num <- copy 0
2015-04-20 17:25:02 +00:00
{
# while j < a.length
2016-09-17 17:30:24 +00:00
arg-done?:bool <- greater-or-equal j, a-len
2015-07-29 21:37:57 +00:00
break-if arg-done?
2015-04-20 17:25:02 +00:00
# result[result-idx] = a[j]
in:char <- index *a, j
*result <- put-index *result, result-idx, in
2015-07-29 21:37:57 +00:00
j <- add j, 1
result-idx <- add result-idx, 1
2015-04-20 17:25:02 +00:00
loop
}
# skip '_' in template
2015-07-29 21:37:57 +00:00
i <- add i, 1
2015-04-20 17:25:02 +00:00
loop # interpolate next arg
}
+done
2015-04-20 17:25:02 +00:00
# done with holes; copy rest of template directly into result
{
# while i < template.length
2016-09-17 17:30:24 +00:00
tem-done?:bool <- greater-or-equal i, tem-len
2015-07-29 21:37:57 +00:00
break-if tem-done?
2015-04-20 17:25:02 +00:00
# result[result-idx] = template[i]
in:char <- index *template, i
*result <- put-index *result, result-idx, in
2015-07-29 21:37:57 +00:00
i <- add i, 1
result-idx <- add result-idx, 1
2015-04-20 17:25:02 +00:00
loop
}
]
scenario interpolate-works [
local-scope
x:text <- new [abc_ghi]
y:text <- new [def]
2015-04-20 17:25:02 +00:00
run [
z:text <- interpolate x, y
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *z
2015-04-20 17:25:02 +00:00
]
memory-should-contain [
2016-05-27 01:50:33 +00:00
10:array:character <- [abcdefghi]
2015-04-20 17:25:02 +00:00
]
]
scenario interpolate-at-start [
local-scope
x:text <- new [_, hello!]
y:text <- new [abc]
2015-04-20 17:25:02 +00:00
run [
z:text <- interpolate x, y
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *z
2015-04-20 17:25:02 +00:00
]
memory-should-contain [
10:array:character <- [abc, hello!]
22 <- 0 # out of bounds
2015-04-20 17:25:02 +00:00
]
]
scenario interpolate-at-end [
local-scope
x:text <- new [hello, _]
y:text <- new [abc]
2015-04-20 17:25:02 +00:00
run [
z:text <- interpolate x, y
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *z
2015-04-20 17:25:02 +00:00
]
memory-should-contain [
10:array:character <- [hello, abc]
2015-04-20 17:25:02 +00:00
]
]
2015-05-23 18:37:44 +00:00
2016-09-17 17:30:24 +00:00
# result:bool <- space? c:char
def space? c:char -> result:bool [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2015-05-23 19:35:10 +00:00
# most common case first
2015-11-19 05:36:36 +00:00
result <- equal c, 32/space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 10/newline
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 9/tab
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 13/carriage-return
return-if result
2015-05-23 19:30:58 +00:00
# remaining uncommon cases in sorted order
# http://unicode.org code-points in unicode-set Z and Pattern_White_Space
2015-07-29 21:37:57 +00:00
result <- equal c, 11/ctrl-k
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 12/ctrl-l
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 133/ctrl-0085
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 160/no-break-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 5760/ogham-space-mark
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8192/en-quad
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8193/em-quad
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8194/en-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8195/em-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8196/three-per-em-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8197/four-per-em-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8198/six-per-em-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8199/figure-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8200/punctuation-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8201/thin-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8202/hair-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8206/left-to-right
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8207/right-to-left
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8232/line-separator
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8233/paragraph-separator
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8239/narrow-no-break-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 8287/medium-mathematical-space
return-if result
2015-07-29 21:37:57 +00:00
result <- equal c, 12288/ideographic-space
2015-05-23 19:30:58 +00:00
]
def trim s:text -> result:text [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-09-17 17:28:25 +00:00
len:num <- length *s
2015-05-23 18:37:44 +00:00
# left trim: compute start
2016-09-17 17:28:25 +00:00
start:num <- copy 0
2015-05-23 18:37:44 +00:00
{
{
2016-09-17 17:30:24 +00:00
at-end?:bool <- greater-or-equal start, len
2015-07-29 21:37:57 +00:00
break-unless at-end?
2015-11-19 05:36:36 +00:00
result <- new character:type, 0
return
2015-05-23 18:37:44 +00:00
}
curr:char <- index *s, start
2016-09-17 17:30:24 +00:00
whitespace?:bool <- space? curr
2015-07-29 21:37:57 +00:00
break-unless whitespace?
start <- add start, 1
2015-05-23 18:37:44 +00:00
loop
}
# right trim: compute end
2016-09-17 17:28:25 +00:00
end:num <- subtract len, 1
2015-05-23 18:37:44 +00:00
{
2016-09-17 17:30:24 +00:00
not-at-start?:bool <- greater-than end, start
2015-07-29 21:37:57 +00:00
assert not-at-start?, [end ran up against start]
curr:char <- index *s, end
2016-09-17 17:30:24 +00:00
whitespace?:bool <- space? curr
2015-07-29 21:37:57 +00:00
break-unless whitespace?
end <- subtract end, 1
2015-05-23 18:37:44 +00:00
loop
}
2015-05-23 19:35:10 +00:00
# result = new character[end+1 - start]
2016-09-17 17:28:25 +00:00
new-len:num <- subtract end, start, -1
result:text <- new character:type, new-len
# copy the untrimmed parts between start and end
2016-09-17 17:28:25 +00:00
i:num <- copy start
j:num <- copy 0
2015-05-23 18:37:44 +00:00
{
# while i <= end
2016-09-17 17:30:24 +00:00
done?:bool <- greater-than i, end
2015-07-29 21:37:57 +00:00
break-if done?
2015-05-23 18:37:44 +00:00
# result[j] = s[i]
src:char <- index *s, i
*result <- put-index *result, j, src
2015-07-29 21:37:57 +00:00
i <- add i, 1
j <- add j, 1
2015-05-23 18:37:44 +00:00
loop
}
]
scenario trim-unmodified [
local-scope
x:text <- new [abc]
2015-05-23 18:37:44 +00:00
run [
y:text <- trim x
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
2015-05-23 18:37:44 +00:00
]
memory-should-contain [
1:array:character <- [abc]
2015-05-23 18:37:44 +00:00
]
]
scenario trim-left [
local-scope
x:text <- new [ abc]
2015-05-23 18:37:44 +00:00
run [
y:text <- trim x
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
2015-05-23 18:37:44 +00:00
]
memory-should-contain [
1:array:character <- [abc]
2015-05-23 18:37:44 +00:00
]
]
scenario trim-right [
local-scope
x:text <- new [abc ]
2015-05-23 18:37:44 +00:00
run [
y:text <- trim x
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
2015-05-23 18:37:44 +00:00
]
memory-should-contain [
1:array:character <- [abc]
2015-05-23 18:37:44 +00:00
]
]
scenario trim-left-right [
local-scope
x:text <- new [ abc ]
2015-05-23 18:37:44 +00:00
run [
y:text <- trim x
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
2015-05-23 18:37:44 +00:00
]
memory-should-contain [
1:array:character <- [abc]
2015-05-23 18:37:44 +00:00
]
]
2015-05-23 19:30:58 +00:00
scenario trim-newline-tab [
local-scope
x:text <- new [ abc
2015-05-23 19:30:58 +00:00
]
run [
y:text <- trim x
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
2015-05-23 19:30:58 +00:00
]
memory-should-contain [
1:array:character <- [abc]
2015-05-23 19:30:58 +00:00
]
]
2016-09-17 17:28:25 +00:00
def find-next text:text, pattern:char, idx:num -> next-index:num [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-09-17 17:28:25 +00:00
len:num <- length *text
{
2016-09-17 17:30:24 +00:00
eof?:bool <- greater-or-equal idx, len
2015-07-29 21:37:57 +00:00
break-if eof?
curr:char <- index *text, idx
2016-09-17 17:30:24 +00:00
found?:bool <- equal curr, pattern
2015-07-29 21:37:57 +00:00
break-if found?
idx <- add idx, 1
loop
}
return idx
]
scenario text-find-next [
local-scope
x:text <- new [a/b]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 0/start-index
]
memory-should-contain [
10 <- 1
]
]
scenario text-find-next-empty [
local-scope
x:text <- new []
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 0/start-index
]
memory-should-contain [
10 <- 0
]
]
scenario text-find-next-initial [
local-scope
x:text <- new [/abc]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 0/start-index
]
memory-should-contain [
10 <- 0 # prefix match
]
]
scenario text-find-next-final [
local-scope
x:text <- new [abc/]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 0/start-index
]
memory-should-contain [
10 <- 3 # suffix match
]
]
scenario text-find-next-missing [
local-scope
x:text <- new [abcd]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 0/start-index
]
memory-should-contain [
10 <- 4 # no match
]
]
scenario text-find-next-invalid-index [
local-scope
x:text <- new [abc]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 4/start-index
]
memory-should-contain [
10 <- 4 # no change
]
]
scenario text-find-next-first [
local-scope
x:text <- new [ab/c/]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 0/start-index
]
memory-should-contain [
10 <- 2 # first '/' of multiple
]
]
scenario text-find-next-second [
local-scope
x:text <- new [ab/c/]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, 47/slash, 3/start-index
]
memory-should-contain [
10 <- 4 # second '/' of multiple
]
]
# search for a pattern of multiple characters
# fairly dumb algorithm
2016-09-17 17:28:25 +00:00
def find-next text:text, pattern:text, idx:num -> next-index:num [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
first:char <- index *pattern, 0
# repeatedly check for match at current idx
2016-09-17 17:28:25 +00:00
len:num <- length *text
{
# does some unnecessary work checking even when there isn't enough of text left
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal idx, len
2015-07-29 21:37:57 +00:00
break-if done?
2016-09-17 17:30:24 +00:00
found?:bool <- match-at text, pattern, idx
2015-07-29 21:37:57 +00:00
break-if found?
idx <- add idx, 1
# optimization: skip past indices that definitely won't match
2015-07-29 21:37:57 +00:00
idx <- find-next text, first, idx
loop
}
return idx
]
scenario find-next-text-1 [
local-scope
x:text <- new [abc]
y:text <- new [bc]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, y, 0
]
memory-should-contain [
10 <- 1
]
]
scenario find-next-text-2 [
local-scope
x:text <- new [abcd]
y:text <- new [bc]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, y, 1
]
memory-should-contain [
10 <- 1
]
]
scenario find-next-no-match [
local-scope
x:text <- new [abc]
y:text <- new [bd]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, y, 0
]
memory-should-contain [
10 <- 3 # not found
]
]
scenario find-next-suffix-match [
local-scope
x:text <- new [abcd]
y:text <- new [cd]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, y, 0
]
memory-should-contain [
10 <- 2
]
]
scenario find-next-suffix-match-2 [
local-scope
x:text <- new [abcd]
y:text <- new [cde]
run [
2016-09-17 17:28:25 +00:00
10:num/raw <- find-next x, y, 0
]
memory-should-contain [
10 <- 4 # not found
]
]
# checks if pattern matches at index 'idx'
2016-09-17 17:30:24 +00:00
def match-at text:text, pattern:text, idx:num -> result:bool [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-09-17 17:28:25 +00:00
pattern-len:num <- length *pattern
# check that there's space left for the pattern
2017-09-24 01:31:26 +00:00
x:num <- length *text
x <- subtract x, pattern-len
enough-room?:bool <- lesser-or-equal idx, x
return-unless enough-room?, false/not-found
# check each character of pattern
2016-09-17 17:28:25 +00:00
pattern-idx:num <- copy 0
{
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal pattern-idx, pattern-len
2015-07-29 21:37:57 +00:00
break-if done?
c:char <- index *text, idx
exp:char <- index *pattern, pattern-idx
2017-09-24 01:31:26 +00:00
match?:bool <- equal c, exp
return-unless match?, false/not-found
2015-07-29 21:37:57 +00:00
idx <- add idx, 1
pattern-idx <- add pattern-idx, 1
loop
}
return true/found
]
scenario match-at-checks-pattern-at-index [
local-scope
x:text <- new [abc]
y:text <- new [ab]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, y, 0
]
memory-should-contain [
10 <- 1 # match found
]
]
scenario match-at-reflexive [
local-scope
x:text <- new [abc]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, x, 0
]
memory-should-contain [
10 <- 1 # match found
]
]
scenario match-at-outside-bounds [
local-scope
x:text <- new [abc]
y:text <- new [a]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, y, 4
]
memory-should-contain [
10 <- 0 # never matches
]
]
scenario match-at-empty-pattern [
local-scope
x:text <- new [abc]
y:text <- new []
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, y, 0
]
memory-should-contain [
10 <- 1 # always matches empty pattern given a valid index
]
]
scenario match-at-empty-pattern-outside-bound [
local-scope
x:text <- new [abc]
y:text <- new []
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, y, 4
]
memory-should-contain [
10 <- 0 # no match
]
]
scenario match-at-empty-text [
local-scope
x:text <- new []
y:text <- new [abc]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, y, 0
]
memory-should-contain [
10 <- 0 # no match
]
]
scenario match-at-empty-against-empty [
local-scope
x:text <- new []
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, x, 0
]
memory-should-contain [
10 <- 1 # matches because pattern is also empty
]
]
scenario match-at-inside-bounds [
local-scope
x:text <- new [abc]
y:text <- new [bc]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, y, 1
]
memory-should-contain [
10 <- 1 # match
]
]
scenario match-at-inside-bounds-2 [
local-scope
x:text <- new [abc]
y:text <- new [bc]
run [
2016-09-17 17:30:24 +00:00
10:bool/raw <- match-at x, y, 0
]
memory-should-contain [
10 <- 0 # no match
]
]
2016-09-17 20:00:39 +00:00
def split s:text, delim:char -> result:&:@:text [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
# empty text? return empty array
2016-09-17 17:28:25 +00:00
len:num <- length *s
{
2016-09-17 17:30:24 +00:00
empty?:bool <- equal len, 0
2015-07-29 21:37:57 +00:00
break-unless empty?
result <- new {(address array character): type}, 0
return
}
# count #pieces we need room for
2016-09-17 17:28:25 +00:00
count:num <- copy 1 # n delimiters = n+1 pieces
idx:num <- copy 0
{
2015-07-29 21:37:57 +00:00
idx <- find-next s, delim, idx
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal idx, len
2015-07-29 21:37:57 +00:00
break-if done?
idx <- add idx, 1
count <- add count, 1
loop
}
# allocate space
result <- new {(address array character): type}, count
# repeatedly copy slices start..end until delimiter into result[curr-result]
2016-09-17 17:28:25 +00:00
curr-result:num <- copy 0
start:num <- copy 0
{
# while next delim exists
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal start, len
2015-07-29 21:37:57 +00:00
break-if done?
2016-09-17 17:28:25 +00:00
end:num <- find-next s, delim, start
# copy start..end into result[curr-result]
dest:text <- copy-range s, start, end
*result <- put-index *result, curr-result, dest
# slide over to next slice
2015-07-29 21:37:57 +00:00
start <- add end, 1
curr-result <- add curr-result, 1
loop
}
]
scenario text-split-1 [
local-scope
x:text <- new [a/b]
run [
2016-09-17 20:00:39 +00:00
y:&:@:text <- split x, 47/slash
2016-09-17 17:28:25 +00:00
10:num/raw <- length *y
a:text <- index *y, 0
b:text <- index *y, 1
2016-09-17 20:00:39 +00:00
20:@:char/raw <- copy *a
30:@:char/raw <- copy *b
]
memory-should-contain [
10 <- 2 # length of result
20:array:character <- [a]
30:array:character <- [b]
]
]
scenario text-split-2 [
local-scope
x:text <- new [a/b/c]
run [
2016-09-17 20:00:39 +00:00
y:&:@:text <- split x, 47/slash
2016-09-17 17:28:25 +00:00
10:num/raw <- length *y
a:text <- index *y, 0
b:text <- index *y, 1
c:text <- index *y, 2
2016-09-17 20:00:39 +00:00
20:@:char/raw <- copy *a
30:@:char/raw <- copy *b
40:@:char/raw <- copy *c
]
memory-should-contain [
10 <- 3 # length of result
20:array:character <- [a]
30:array:character <- [b]
40:array:character <- [c]
]
]
scenario text-split-missing [
local-scope
x:text <- new [abc]
run [
2016-09-17 20:00:39 +00:00
y:&:@:text <- split x, 47/slash
2016-09-17 17:28:25 +00:00
10:num/raw <- length *y
a:text <- index *y, 0
2016-09-17 20:00:39 +00:00
20:@:char/raw <- copy *a
]
memory-should-contain [
10 <- 1 # length of result
20:array:character <- [abc]
]
]
scenario text-split-empty [
local-scope
x:text <- new []
run [
2016-09-17 20:00:39 +00:00
y:&:@:text <- split x, 47/slash
2016-09-17 17:28:25 +00:00
10:num/raw <- length *y
]
memory-should-contain [
10 <- 0 # empty result
]
]
scenario text-split-empty-piece [
local-scope
x:text <- new [a/b//c]
run [
2016-09-17 20:00:39 +00:00
y:&:@:text <- split x:text, 47/slash
2016-09-17 17:28:25 +00:00
10:num/raw <- length *y
a:text <- index *y, 0
b:text <- index *y, 1
c:text <- index *y, 2
d:text <- index *y, 3
2016-09-17 20:00:39 +00:00
20:@:char/raw <- copy *a
30:@:char/raw <- copy *b
40:@:char/raw <- copy *c
50:@:char/raw <- copy *d
]
memory-should-contain [
10 <- 4 # length of result
20:array:character <- [a]
30:array:character <- [b]
40:array:character <- []
50:array:character <- [c]
]
]
def split-first text:text, delim:char -> x:text, y:text [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
# empty text? return empty texts
2016-09-17 17:28:25 +00:00
len:num <- length *text
{
2016-09-17 17:30:24 +00:00
empty?:bool <- equal len, 0
2015-07-29 21:37:57 +00:00
break-unless empty?
x:text <- new []
y:text <- new []
return
}
2016-09-17 17:28:25 +00:00
idx:num <- find-next text, delim, 0
x:text <- copy-range text, 0, idx
2015-07-29 21:37:57 +00:00
idx <- add idx, 1
y:text <- copy-range text, idx, len
]
scenario text-split-first [
local-scope
x:text <- new [a/b]
run [
y:text, z:text <- split-first x, 47/slash
2016-09-17 20:00:39 +00:00
10:@:char/raw <- copy *y
20:@:char/raw <- copy *z
]
memory-should-contain [
10:array:character <- [a]
20:array:character <- [b]
]
]
2016-09-17 17:28:25 +00:00
def copy-range buf:text, start:num, end:num -> result:text [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
# if end is out of bounds, trim it
2016-09-17 17:28:25 +00:00
len:num <- length *buf
end:num <- min len, end
# allocate space for result
2015-07-29 21:37:57 +00:00
len <- subtract end, start
result:text <- new character:type, len
# copy start..end into result[curr-result]
2016-09-17 17:28:25 +00:00
src-idx:num <- copy start
dest-idx:num <- copy 0
{
2016-09-17 17:30:24 +00:00
done?:bool <- greater-or-equal src-idx, end
2015-07-29 21:37:57 +00:00
break-if done?
src:char <- index *buf, src-idx
*result <- put-index *result, dest-idx, src
2015-07-29 21:37:57 +00:00
src-idx <- add src-idx, 1
dest-idx <- add dest-idx, 1
loop
}
]
scenario copy-range-works [
local-scope
x:text <- new [abc]
run [
y:text <- copy-range x, 1, 3
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
]
memory-should-contain [
1:array:character <- [bc]
]
]
scenario copy-range-out-of-bounds [
local-scope
x:text <- new [abc]
run [
y:text <- copy-range x, 2, 4
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
]
memory-should-contain [
1:array:character <- [c]
]
]
scenario copy-range-out-of-bounds-2 [
local-scope
x:text <- new [abc]
run [
y:text <- copy-range x, 3, 3
2016-09-17 20:00:39 +00:00
1:@:char/raw <- copy *y
]
memory-should-contain [
1:array:character <- []
]
]
def parse-whole-number in:text -> out:num, error?:bool [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
2016-11-14 03:27:12 +00:00
out <- copy 0
result:num <- copy 0 # temporary location
i:num <- copy 0
len:num <- length *in
{
done?:bool <- greater-or-equal i, len
break-if done?
c:char <- index *in, i
x:num <- character-to-code c
digit:num, error?:bool <- character-code-to-digit x
2017-03-12 08:08:41 +00:00
return-if error?
result <- multiply result, 10
result <- add result, digit
i <- add i, 1
loop
}
# no error; all digits were valid
out <- copy result
]
# (contributed by Ella Couch)
2016-11-13 17:28:00 +00:00
recipe character-code-to-digit character-code:number -> result:number, error?:boolean [
local-scope
2017-12-04 07:25:40 +00:00
load-inputs
result <- copy 0
2016-11-13 17:28:00 +00:00
error? <- lesser-than character-code, 48 # '0'
2017-03-12 08:08:41 +00:00
return-if error?
2016-11-13 17:28:00 +00:00
error? <- greater-than character-code, 57 # '9'
2017-03-12 08:08:41 +00:00
return-if error?
result <- subtract character-code, 48
]
scenario character-code-to-digit-contain-only-digit [
local-scope
a:number <- copy 48 # character code for '0'
run [
10:number/raw, 11:boolean/raw <- character-code-to-digit a
]
memory-should-contain [
10 <- 0
11 <- 0 # no error
]
]
scenario character-code-to-digit-contain-only-digit-2 [
local-scope
a:number <- copy 57 # character code for '9'
run [
1:number/raw, 2:boolean/raw <- character-code-to-digit a
]
memory-should-contain [
1 <- 9
2 <- 0 # no error
]
]
scenario character-code-to-digit-handles-codes-lower-than-zero [
local-scope
a:number <- copy 47
run [
10:number/raw, 11:boolean/raw <- character-code-to-digit a
]
memory-should-contain [
10 <- 0
11 <- 1 # error
]
]
scenario character-code-to-digit-handles-codes-larger-than-nine [
local-scope
a:number <- copy 58
run [
10:number/raw, 11:boolean/raw <- character-code-to-digit a
]
memory-should-contain [
10 <- 0
11 <- 1 # error
]
]