slope/lexer.go

264 lines
4.8 KiB
Go

package main
import (
"fmt"
"strings"
"unicode"
)
var lexLine int = 1
var parens int = 0
func eatSingleLineComment(r *strings.Reader) {
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
panic(fmt.Sprintf("Lexing error while eating comment on line %d", lexLine))
}
if c == '\n' {
lexLine++
break
}
}
}
func eatWhiteSpace(r *strings.Reader) {
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
panic("Lexing error while eating whitespace")
}
if c == '\n' {
lexLine++
}
if !unicode.IsSpace(c) {
r.UnreadRune()
break
}
}
}
func eatString(r *strings.Reader, delim rune) string {
var buf strings.Builder
buf.WriteRune(delim)
escapeCount := 0
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
panic("Lexing error while eating string")
}
if delim == '`' && c == delim && (escapeCount > 0 && escapeCount%2 != 0) {
out := buf.String()
buf.Reset()
buf.WriteString(out[:len(out)-1])
buf.WriteRune(c)
} else {
buf.WriteRune(c)
}
if c == delim && (escapeCount == 0 || escapeCount%2 == 0) {
break
} else if (delim == '"' && c == '\n') || (c != delim && r.Len() == 0) {
panic(fmt.Sprintf("Parse error: Unclosed string on line %d ", lexLine))
}
if c == '\\' {
escapeCount++
} else {
escapeCount = 0
}
}
return buf.String()
}
func eatSymbol(r *strings.Reader) string {
var buf strings.Builder
var previous rune
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
panic("Lexing error while eating symbol")
}
if c == ')' || c == ']' {
r.UnreadRune()
return buf.String()
} else if c == '\n' {
lexLine++
}
if unicode.IsSpace(c) && previous != '\\' {
break
}
buf.WriteRune(c)
previous = c
}
return buf.String()
}
func eatQuote(r *strings.Reader) []string {
tokens := make([]string, 0, 10)
tokens = append(tokens, "(", "quote")
parens++
list := false
depth := 0
var currentString string
Loop:
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
break Loop
}
if c == '\n' {
lexLine++
}
if unicode.IsSpace(c) && !list {
panic(fmt.Sprintf("Parse error: Invalid construct \"' \" on line %d", lexLine))
}
switch c {
case '(':
list = true
depth++
parens++
tokens = append(tokens, string(c))
case '[':
list = true
depth++
parens++
tokens = append(tokens, "(", "list")
case ')':
depth--
parens--
tokens = append(tokens, string(c))
if depth == 0 {
break Loop
}
case ']':
depth--
parens--
tokens = append(tokens, ")")
if depth == 0 {
break Loop
}
case '\'':
t := eatQuote(r)
tokens = append(tokens, t...)
case '"', '`':
currentString = eatString(r, c)
tokens = append(tokens, currentString)
if !list {
break Loop
}
default:
r.UnreadRune()
currentString = eatSymbol(r)
if currentString != "" {
tokens = append(tokens, currentString)
}
if !list {
break Loop
}
}
}
tokens = append(tokens, ")")
parens--
return tokens
}
func eatShebang(r *strings.Reader) {
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
panic(fmt.Sprintf("Lexing error while eating shebang on line %d", lexLine))
}
if c == '\n' {
lexLine++
break
}
}
}
func Tokenize(s string) []string {
lexLine = 1
parens = 0
reader := strings.NewReader(s)
tokens := make([]string, 0)
var currentString string
// Handle shebang
if reader.Len() > 2 {
c, _, _ := reader.ReadRune()
if c == '#' {
c, _, _ := reader.ReadRune()
if c == '!' {
eatShebang(reader)
} else {
reader.UnreadRune()
reader.UnreadRune()
}
} else {
reader.UnreadRune()
}
}
TokenizationLoop:
for reader.Len() > 0 {
c, _, err := reader.ReadRune()
if err != nil {
break TokenizationLoop
}
if unicode.IsSpace(c) {
if c == '\n' {
lexLine++
}
eatWhiteSpace(reader)
continue
}
switch c {
case '(':
tokens = append(tokens, string(c))
parens++
case ')':
tokens = append(tokens, string(c))
parens--
case ']':
tokens = append(tokens, ")")
parens--
case '[':
tokens = append(tokens, "(", "list")
parens++
case '\'':
t := eatQuote(reader)
tokens = append(tokens, t...)
case '"', '`':
currentString = eatString(reader, c)
tokens = append(tokens, currentString)
case ';':
eatSingleLineComment(reader)
case '#':
c, _, err := reader.ReadRune()
if err != nil {
break TokenizationLoop
}
tokens = append(tokens, fmt.Sprintf("#%c", c))
default:
reader.UnreadRune()
currentString = eatSymbol(reader)
if currentString != "" {
tokens = append(tokens, currentString)
}
}
if parens < 0 {
panic(fmt.Sprintf("Parse error: too many closing parenthesis near line %d", lexLine))
}
}
if parens > 0 {
panic("Parse error: parse ended without enough closing parenthesis")
}
return tokens
}