253 lines
5.2 KiB
Go
253 lines
5.2 KiB
Go
/*
|
|
Copyright (C) 2023 Brian Evans (aka sloum). All rights reserved.
|
|
|
|
This source code is available under the terms of the ffsl, or,
|
|
Floodgap Free Software License. A copy of the license has been
|
|
provided as the file 'LICENSE' in the same folder as this source
|
|
code file. If for some reason it is not present, you can find the
|
|
terms of version 1 of the FFSL at the following URL:
|
|
|
|
https://www.floodgap.com/software/ffsl/license.html
|
|
*/
|
|
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
func lex(data string, f string) ([]token, error) {
|
|
var reader *strings.Reader
|
|
if f == "" {
|
|
f = "REPL"
|
|
}
|
|
reader = strings.NewReader(data)
|
|
tokens := make([]token, 0, 25)
|
|
fileLine := 1
|
|
|
|
TokenizationLoop:
|
|
for reader.Len() > 0 {
|
|
c, _, err := reader.ReadRune()
|
|
if err != nil {
|
|
break TokenizationLoop
|
|
}
|
|
if unicode.IsSpace(c) {
|
|
reader.UnreadRune()
|
|
eatWhitespace(reader, &fileLine)
|
|
continue
|
|
} else if c == '#' {
|
|
eatComment(reader, &fileLine)
|
|
} else if c == '"' {
|
|
tok, err := eatDelimited(reader, &fileLine, c, f)
|
|
if err != nil {
|
|
return []token{}, err
|
|
}
|
|
tok.val = unescapeString(tok.val.(string))
|
|
tokens = append(tokens, tok)
|
|
} else if c == '`' || c == '|' {
|
|
tok, err := eatDelimited(reader, &fileLine, c, f)
|
|
if err != nil {
|
|
return []token{}, err
|
|
}
|
|
tokens = append(tokens, tok)
|
|
} else if c == '.' {
|
|
tok := token{END, false, fileLine, f}
|
|
tokens = append(tokens, tok)
|
|
} else if c == '[' {
|
|
tokens = append(tokens, token{LBRACKET, false, fileLine, f})
|
|
} else if c == ']' {
|
|
tokens = append(tokens, token{RBRACKET, false, fileLine, f})
|
|
} else if c == '{' {
|
|
tokens = append(tokens, token{LCURLY, false, fileLine, f})
|
|
} else if c == '}' {
|
|
tokens = append(tokens, token{RCURLY, false, fileLine, f})
|
|
} else {
|
|
reader.UnreadRune()
|
|
wn := eatWordNumber(reader, &fileLine, f)
|
|
if wn.kind == SYMBOL {
|
|
// TODO this may need to get reworked
|
|
// to not use this function and can just
|
|
// default to an isKeyword function, which
|
|
// will end up being a check for presence
|
|
// in a map of built-ins
|
|
if isKeyword(wn.val.(string)) {
|
|
wn.kind = KEYWORD
|
|
} else if wn.val.(string) == "end" {
|
|
wn = token{END, false, fileLine, f}
|
|
}
|
|
}
|
|
tokens = append(tokens, wn)
|
|
}
|
|
}
|
|
return tokens, nil
|
|
}
|
|
|
|
func eatWhitespace(r *strings.Reader, currentLine *int) {
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
return
|
|
}
|
|
if c == '\n' {
|
|
*currentLine++
|
|
} else if !unicode.IsSpace(c) {
|
|
r.UnreadRune()
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func eatComment(r *strings.Reader, currentLine *int) {
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
return
|
|
}
|
|
if c == '\n' {
|
|
*currentLine++
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func eatDelimited(r *strings.Reader, currentLine *int, delim rune, f string) (token, error) {
|
|
var buf strings.Builder
|
|
escapeCount := 0
|
|
endLine := *currentLine
|
|
for {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
return token{}, fmt.Errorf("Lex reached EOF before encountering closing `%c` (Line %d of %s)", delim, *currentLine, f)
|
|
}
|
|
if c == delim && (escapeCount == 0 || escapeCount%2 == 0) {
|
|
break
|
|
} else {
|
|
buf.WriteRune(c)
|
|
if c == '\n' {
|
|
if delim == '"' {
|
|
return token{}, fmt.Errorf("Double quoted string spans more than one line (Line %d of %s)", *currentLine, f)
|
|
}
|
|
endLine++
|
|
}
|
|
}
|
|
if c == '\\' {
|
|
escapeCount++
|
|
} else {
|
|
escapeCount = 0
|
|
}
|
|
}
|
|
|
|
k := STRING
|
|
s := buf.String()
|
|
|
|
if delim == '|' {
|
|
k = DOCSTRING
|
|
s = strings.TrimSpace(s)
|
|
}
|
|
|
|
out := token{
|
|
k,
|
|
s,
|
|
*currentLine,
|
|
f,
|
|
}
|
|
*currentLine = endLine
|
|
return out, nil
|
|
}
|
|
|
|
func eatWordNumber(r *strings.Reader, currentLine *int, f string) token {
|
|
var buf strings.Builder
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
panic("Error parsing word/number")
|
|
return token{}
|
|
}
|
|
if unicode.IsSpace(c) || c == ']' || c == '}' {
|
|
r.UnreadRune()
|
|
break
|
|
}
|
|
buf.WriteRune(c)
|
|
}
|
|
tokenString := buf.String()
|
|
|
|
if strings.HasSuffix(tokenString, "]") && !strings.ContainsRune(tokenString, '[') {
|
|
tokenString = tokenString[:len(tokenString)-1]
|
|
r.UnreadRune()
|
|
}
|
|
|
|
out := token{
|
|
SYMBOL,
|
|
tokenString,
|
|
*currentLine,
|
|
f,
|
|
}
|
|
|
|
if tokenString == "false" {
|
|
out.val = false
|
|
out.kind = BOOL
|
|
return out
|
|
}
|
|
|
|
if tokenString == "true" {
|
|
out.val = true
|
|
out.kind = BOOL
|
|
return out
|
|
}
|
|
|
|
// Check for hex string
|
|
if strings.HasPrefix(tokenString, "0x") {
|
|
i, err := strconv.ParseInt(tokenString[2:], 16, 0)
|
|
if err == nil {
|
|
out.val = int(i)
|
|
out.kind = INT
|
|
return out
|
|
}
|
|
}
|
|
|
|
// Check for octal string
|
|
if strings.HasPrefix(tokenString, "0o") {
|
|
i, err := strconv.ParseInt(tokenString[2:], 8, 0)
|
|
if err == nil {
|
|
out.val = int(i)
|
|
out.kind = INT
|
|
return out
|
|
}
|
|
}
|
|
|
|
// Check for binary string
|
|
if strings.HasPrefix(tokenString, "0b") {
|
|
i, err := strconv.ParseInt(tokenString[2:], 2, 0)
|
|
if err == nil {
|
|
out.val = int(i)
|
|
out.kind = INT
|
|
return out
|
|
}
|
|
}
|
|
|
|
if strings.Contains(tokenString, ".") {
|
|
f, err := strconv.ParseFloat(tokenString, 64)
|
|
if err == nil {
|
|
out.val = float64(f)
|
|
out.kind = FLOAT
|
|
}
|
|
}
|
|
|
|
i, err := strconv.ParseInt(tokenString, 10, 0)
|
|
if err == nil {
|
|
out.val = int(i)
|
|
out.kind = INT
|
|
return out
|
|
}
|
|
|
|
if validType, kind := isType(tokenString); validType {
|
|
out.val = kind
|
|
out.kind = TYPE
|
|
}
|
|
|
|
return out
|
|
}
|