felise/lexer.go

253 lines
5.2 KiB
Go

/*
Copyright (C) 2023 Brian Evans (aka sloum). All rights reserved.
This source code is available under the terms of the ffsl, or,
Floodgap Free Software License. A copy of the license has been
provided as the file 'LICENSE' in the same folder as this source
code file. If for some reason it is not present, you can find the
terms of version 1 of the FFSL at the following URL:
https://www.floodgap.com/software/ffsl/license.html
*/
package main
import (
"fmt"
"strconv"
"strings"
"unicode"
)
func lex(data string, f string) ([]token, error) {
var reader *strings.Reader
if f == "" {
f = "REPL"
}
reader = strings.NewReader(data)
tokens := make([]token, 0, 25)
fileLine := 1
TokenizationLoop:
for reader.Len() > 0 {
c, _, err := reader.ReadRune()
if err != nil {
break TokenizationLoop
}
if unicode.IsSpace(c) {
reader.UnreadRune()
eatWhitespace(reader, &fileLine)
continue
} else if c == '#' {
eatComment(reader, &fileLine)
} else if c == '"' {
tok, err := eatDelimited(reader, &fileLine, c, f)
if err != nil {
return []token{}, err
}
tok.val = unescapeString(tok.val.(string))
tokens = append(tokens, tok)
} else if c == '`' || c == '|' {
tok, err := eatDelimited(reader, &fileLine, c, f)
if err != nil {
return []token{}, err
}
tokens = append(tokens, tok)
} else if c == '.' {
tok := token{END, false, fileLine, f}
tokens = append(tokens, tok)
} else if c == '[' {
tokens = append(tokens, token{LBRACKET, false, fileLine, f})
} else if c == ']' {
tokens = append(tokens, token{RBRACKET, false, fileLine, f})
} else if c == '{' {
tokens = append(tokens, token{LCURLY, false, fileLine, f})
} else if c == '}' {
tokens = append(tokens, token{RCURLY, false, fileLine, f})
} else {
reader.UnreadRune()
wn := eatWordNumber(reader, &fileLine, f)
if wn.kind == SYMBOL {
// TODO this may need to get reworked
// to not use this function and can just
// default to an isKeyword function, which
// will end up being a check for presence
// in a map of built-ins
if isKeyword(wn.val.(string)) {
wn.kind = KEYWORD
} else if wn.val.(string) == "end" {
wn = token{END, false, fileLine, f}
}
}
tokens = append(tokens, wn)
}
}
return tokens, nil
}
func eatWhitespace(r *strings.Reader, currentLine *int) {
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
return
}
if c == '\n' {
*currentLine++
} else if !unicode.IsSpace(c) {
r.UnreadRune()
break
}
}
}
func eatComment(r *strings.Reader, currentLine *int) {
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
return
}
if c == '\n' {
*currentLine++
return
}
}
}
func eatDelimited(r *strings.Reader, currentLine *int, delim rune, f string) (token, error) {
var buf strings.Builder
escapeCount := 0
endLine := *currentLine
for {
c, _, err := r.ReadRune()
if err != nil {
return token{}, fmt.Errorf("Lex reached EOF before encountering closing `%c` (Line %d of %s)", delim, *currentLine, f)
}
if c == delim && (escapeCount == 0 || escapeCount%2 == 0) {
break
} else {
buf.WriteRune(c)
if c == '\n' {
if delim == '"' {
return token{}, fmt.Errorf("Double quoted string spans more than one line (Line %d of %s)", *currentLine, f)
}
endLine++
}
}
if c == '\\' {
escapeCount++
} else {
escapeCount = 0
}
}
k := STRING
s := buf.String()
if delim == '|' {
k = DOCSTRING
s = strings.TrimSpace(s)
}
out := token{
k,
s,
*currentLine,
f,
}
*currentLine = endLine
return out, nil
}
func eatWordNumber(r *strings.Reader, currentLine *int, f string) token {
var buf strings.Builder
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
panic("Error parsing word/number")
return token{}
}
if unicode.IsSpace(c) || c == ']' || c == '}' {
r.UnreadRune()
break
}
buf.WriteRune(c)
}
tokenString := buf.String()
if strings.HasSuffix(tokenString, "]") && !strings.ContainsRune(tokenString, '[') {
tokenString = tokenString[:len(tokenString)-1]
r.UnreadRune()
}
out := token{
SYMBOL,
tokenString,
*currentLine,
f,
}
if tokenString == "false" {
out.val = false
out.kind = BOOL
return out
}
if tokenString == "true" {
out.val = true
out.kind = BOOL
return out
}
// Check for hex string
if strings.HasPrefix(tokenString, "0x") {
i, err := strconv.ParseInt(tokenString[2:], 16, 0)
if err == nil {
out.val = int(i)
out.kind = INT
return out
}
}
// Check for octal string
if strings.HasPrefix(tokenString, "0o") {
i, err := strconv.ParseInt(tokenString[2:], 8, 0)
if err == nil {
out.val = int(i)
out.kind = INT
return out
}
}
// Check for binary string
if strings.HasPrefix(tokenString, "0b") {
i, err := strconv.ParseInt(tokenString[2:], 2, 0)
if err == nil {
out.val = int(i)
out.kind = INT
return out
}
}
if strings.Contains(tokenString, ".") {
f, err := strconv.ParseFloat(tokenString, 64)
if err == nil {
out.val = float64(f)
out.kind = FLOAT
}
}
i, err := strconv.ParseInt(tokenString, 10, 0)
if err == nil {
out.val = int(i)
out.kind = INT
return out
}
if validType, kind := isType(tokenString); validType {
out.val = kind
out.kind = TYPE
}
return out
}