255 lines
4.6 KiB
Go
255 lines
4.6 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
var lexLine int = 1
|
|
var parens int = 0
|
|
|
|
func eatSingleLineComment(r *strings.Reader) {
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Lexing error while eating comment on line %d", lexLine))
|
|
}
|
|
if c == '\n' {
|
|
lexLine++
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func eatWhiteSpace(r *strings.Reader) {
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
panic("Lexing error while eating whitespace")
|
|
}
|
|
if c == '\n' {
|
|
lexLine++
|
|
}
|
|
if !unicode.IsSpace(c) {
|
|
r.UnreadRune()
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func eatString(r *strings.Reader) string {
|
|
var buf strings.Builder
|
|
buf.WriteRune('"')
|
|
prevPrev := rune(0)
|
|
previous := '"'
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
panic("Lexing error while eating string")
|
|
}
|
|
buf.WriteRune(c)
|
|
if c == '"' && (previous != '\\' || previous == '\\' && prevPrev == '\\') {
|
|
break
|
|
} else if c == '\n' || (c != '"' && r.Len() == 0) {
|
|
panic(fmt.Sprintf("Parse error: Unclosed string on line %d ", lexLine))
|
|
}
|
|
prevPrev = previous
|
|
previous = c
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
func eatSymbol(r *strings.Reader) string {
|
|
var buf strings.Builder
|
|
var previous rune
|
|
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
panic("Lexing error while eating symbol")
|
|
}
|
|
if c == ')' || c == ']' {
|
|
r.UnreadRune()
|
|
return buf.String()
|
|
} else if c == '\n' {
|
|
lexLine++
|
|
}
|
|
if unicode.IsSpace(c) && previous != '\\' {
|
|
break
|
|
}
|
|
buf.WriteRune(c)
|
|
previous = c
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
func eatQuote(r *strings.Reader) []string {
|
|
tokens := make([]string, 0, 10)
|
|
tokens = append(tokens, "(", "quote")
|
|
parens++
|
|
list := false
|
|
depth := 0
|
|
var currentString string
|
|
|
|
Loop:
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
break Loop
|
|
}
|
|
if c == '\n' {
|
|
lexLine++
|
|
}
|
|
|
|
if unicode.IsSpace(c) && !list {
|
|
panic(fmt.Sprintf("Parse error: Invalid construct \"' \" on line %d", lexLine))
|
|
}
|
|
|
|
switch c {
|
|
case '(':
|
|
list = true
|
|
depth++
|
|
parens++
|
|
tokens = append(tokens, string(c))
|
|
case '[':
|
|
list = true
|
|
depth++
|
|
parens++
|
|
tokens = append(tokens, "(", "list")
|
|
case ')':
|
|
depth--
|
|
parens--
|
|
tokens = append(tokens, string(c))
|
|
if depth == 0 {
|
|
break Loop
|
|
}
|
|
case ']':
|
|
depth--
|
|
parens--
|
|
tokens = append(tokens, ")")
|
|
if depth == 0 {
|
|
break Loop
|
|
}
|
|
case '\'':
|
|
t := eatQuote(r)
|
|
tokens = append(tokens, t...)
|
|
case '"':
|
|
currentString = eatString(r)
|
|
tokens = append(tokens, currentString)
|
|
if !list {
|
|
break Loop
|
|
}
|
|
default:
|
|
r.UnreadRune()
|
|
currentString = eatSymbol(r)
|
|
if currentString != "" {
|
|
tokens = append(tokens, currentString)
|
|
}
|
|
if !list {
|
|
break Loop
|
|
}
|
|
}
|
|
}
|
|
tokens = append(tokens, ")")
|
|
parens--
|
|
return tokens
|
|
}
|
|
|
|
func eatShebang(r *strings.Reader) {
|
|
for r.Len() > 0 {
|
|
c, _, err := r.ReadRune()
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Lexing error while eating shebang on line %d", lexLine))
|
|
}
|
|
if c == '\n' {
|
|
lexLine++
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func Tokenize(s string) []string {
|
|
lexLine = 1
|
|
parens = 0
|
|
reader := strings.NewReader(s)
|
|
tokens := make([]string, 0)
|
|
var currentString string
|
|
|
|
// Handle shebang
|
|
if reader.Len() > 2 {
|
|
c, _, _ := reader.ReadRune()
|
|
if c == '#' {
|
|
c, _, _ := reader.ReadRune()
|
|
if c == '!' {
|
|
eatShebang(reader)
|
|
} else {
|
|
reader.UnreadRune()
|
|
reader.UnreadRune()
|
|
}
|
|
} else {
|
|
reader.UnreadRune()
|
|
}
|
|
}
|
|
|
|
TokenizationLoop:
|
|
for reader.Len() > 0 {
|
|
c, _, err := reader.ReadRune()
|
|
if err != nil {
|
|
break TokenizationLoop
|
|
}
|
|
|
|
if unicode.IsSpace(c) {
|
|
if c == '\n' {
|
|
lexLine++
|
|
}
|
|
eatWhiteSpace(reader)
|
|
continue
|
|
}
|
|
|
|
switch c {
|
|
case '(':
|
|
tokens = append(tokens, string(c))
|
|
parens++
|
|
case ')':
|
|
tokens = append(tokens, string(c))
|
|
parens--
|
|
case ']':
|
|
tokens = append(tokens, ")")
|
|
parens--
|
|
case '[':
|
|
tokens = append(tokens, "(", "list")
|
|
parens++
|
|
case '\'':
|
|
t := eatQuote(reader)
|
|
tokens = append(tokens, t...)
|
|
case '"':
|
|
currentString = eatString(reader)
|
|
tokens = append(tokens, currentString)
|
|
case ';':
|
|
eatSingleLineComment(reader)
|
|
case '#':
|
|
c, _, err := reader.ReadRune()
|
|
if err != nil {
|
|
break TokenizationLoop
|
|
}
|
|
tokens = append(tokens, fmt.Sprintf("#%c", c))
|
|
default:
|
|
reader.UnreadRune()
|
|
currentString = eatSymbol(reader)
|
|
if currentString != "" {
|
|
tokens = append(tokens, currentString)
|
|
}
|
|
}
|
|
|
|
if parens < 0 {
|
|
panic(fmt.Sprintf("Parse error: too many closing parenthesis near line %d", lexLine))
|
|
}
|
|
}
|
|
if parens > 0 {
|
|
panic("Parse error: parse ended without enough closing parenthesis")
|
|
}
|
|
|
|
return tokens
|
|
}
|