bird-asm/lex.go

186 lines
4.3 KiB
Go

package main
import (
"fmt"
"os"
"strings"
"unicode"
)
var lexLine uint16
func lexString(r *strings.Reader) []token {
start := lexLine
b := make([]byte, 0)
for {
c, _, err := r.ReadRune()
if err != nil {
fmt.Fprintf(os.Stderr, errTmplt, start, "syntax", "unclosed string")
os.Exit(lexError)
} else if c == strDelim {
break
} else if c == '\n' {
lexLine++
}
if c > 255 {
c = '?'
}
b = append(b, byte(c))
}
b = append(b, 0) // add a null for the end of the string
t := make([]token, 0)
for i := len(b)-1; i >= 0; i-- {
t = append(t, []token{token{"PSH", lexLine}, token{fmt.Sprintf("%02x", b[i]), lexLine}}...)
}
// Add the length
l := uint16(len(b))
t = append(t, []token{token{"PSH2", lexLine}, token{fmt.Sprintf("%02x", byte(l >> 7)), lexLine}, token{fmt.Sprintf("%02x", byte(l & uint16(0xFF))), lexLine}}...)
return t
}
func eatMacro(r *strings.Reader) (string, []token) {
startLine := lexLine
name := eatText(r)
data := make([]token, 0)
for r.Len() > 0 {
eatWhiteSpace(r)
c, _, err := r.ReadRune()
if err != nil {
break
}
switch c {
case '[', '{', ']', '}':
// Allow these chars, but assign no meaning to them
continue
case strDelim:
data = append(data, lexString(r)...)
case '/':
if c == '/' {
eatSingleLineComment(r)
} else if c == '*' {
eatMultiLineComment(r)
} else {
fmt.Fprintf(os.Stderr, "Illegal char found following '/' on line %d,\nexpected '/' or '*'\n", lexLine)
os.Exit(lexError)
}
case '\'':
c, _, err := r.ReadRune()
if err != nil {
fmt.Fprintf(os.Stderr, "Incomplete char literal on line %d\n", lexLine)
os.Exit(lexError)
}
data = append(data, []token{token{"PSH", lexLine}, token{fmt.Sprintf("%02x", c), lexLine}}...)
case '#':
data = append(data, eatLiteral(r)...)
case '%':
fmt.Fprintf(os.Stderr, "Macro definition inside of macro definition starting on line %d\n", startLine)
os.Exit(lexError)
default:
r.UnreadRune()
dataWord := eatText(r)
if dataWord == ";" {
return name, data
} else {
if dataWord == name {
fmt.Fprintf(os.Stderr, "Macro recursively referencing itself on line %d\n", lexLine)
os.Exit(lexError)
}
data = append(data, token{dataWord, lexLine})
}
}
}
fmt.Fprintf(os.Stderr, "Unclosed macro starting on line %d\n", startLine)
os.Exit(lexError)
return name, data
}
func eatLiteral(r *strings.Reader) []token {
num := eatText(r)
for _, v := range []byte(num) {
if !isHexValue(v) {
fmt.Fprintf(os.Stderr, "Invalid literal '#' value on line %d\n", lexLine)
os.Exit(lexError)
}
}
if len(num) == 2 {
return []token{token{"PSH", lexLine}, token{num, lexLine}}
} else if len(num) == 4 {
return []token{token{"PSH2", lexLine}, token{num[:2], lexLine}, token{num[2:], lexLine}}
} else {
fmt.Fprintf(os.Stderr, "Invalid PSH/# value on line %d\n", lexLine)
os.Exit(lexError)
}
return []token{} // will never happen, but makes the compiler happy
}
func eatText(r *strings.Reader) string {
var buf strings.Builder
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil || unicode.IsSpace(c) || c == '[' || c == ']' || c == '{' || c == '}' {
break
}
buf.WriteRune(c)
}
r.UnreadRune()
return buf.String()
}
func eatWhiteSpace(r *strings.Reader) {
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
return
}
if c == '\n' {
lexLine++
}
if !unicode.IsSpace(c) {
r.UnreadRune()
break
}
}
}
func eatSingleLineComment(r *strings.Reader) {
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
return
}
if c == '\n' {
lexLine++
break
}
}
}
func eatMultiLineComment(r *strings.Reader) {
startLine := lexLine
var asterisk bool
for r.Len() > 0 {
c, _, err := r.ReadRune()
if err != nil {
break
}
switch c {
case '*':
asterisk = true
continue
case '/':
if asterisk {
return
}
case '\n':
lexLine++
default:
asterisk = false
}
}
fmt.Fprintf(os.Stderr, "Unclosed multiline comment starting on line %d\n", startLine)
os.Exit(lexError)
}