Pigeon-Ruby/dist/pigeon/lexer.rb

138 lines
3.3 KiB
Ruby
Raw Normal View History

2020-03-18 12:32:40 +00:00
module Pigeon
class Lexer
attr_reader :bundle_string, :scanner, :tokens
# TODO: Change all the `{40,90}` values in ::Lexer to real values
# TODO: Create regexes using string and Regexp.new() for cleaner regexes.
FEED_VALUE = /@.{40,90}.ed25519/
DEPTH_COUNT = /\d{1,7}/
MESG_VALUE = /%.{40,90}.sha256/
BLOB_VALUE = /&.{40,90}.sha256/
NULL_VALUE = /NONE/
STRG_VALUE = /".{1,64}"/
ALPHANUMERICISH = /[a-zA-Z\d\._]{1,64}/
2020-03-18 12:32:40 +00:00
ALL_VALUES = [
FEED_VALUE,
MESG_VALUE,
NULL_VALUE,
STRG_VALUE,
BLOB_VALUE,
].map(&:source).join("|")
ANY_VALUE = Regexp.new(ALL_VALUES)
SEPERATOR = /\n/
AUTHOR = /author #{FEED_VALUE}\n/
DEPTH = /depth #{DEPTH_COUNT}\n/
PREV = /prev (#{MESG_VALUE}|#{NULL_VALUE})\n/
KIND = /kind #{ALPHANUMERICISH}\n/
2020-03-18 12:32:40 +00:00
BODY_ENTRY = /#{ALPHANUMERICISH}:#{ANY_VALUE}\n/
FOOTER_ENTRY = /signature .*{87,88}\.sig\.ed25519\n?/
2020-03-18 12:32:40 +00:00
LEXER_STATES = [HEADER = :header, BODY = :body, FOOTER = :footer]
def self.tokenize(bundle_string)
# TODO: Maybe move #tokeinze into constructor.
new(bundle_string).tokenize
end
2020-03-20 03:14:13 +00:00
def tokenize
until scanner.eos?
case @state
when HEADER then do_header
when BODY then do_body
when FOOTER then do_footer
end
end
maybe_end_message!
return tokens
2020-03-20 03:14:13 +00:00
end
private
2020-03-18 12:32:40 +00:00
def initialize(bundle_string)
@bundle_string = bundle_string
@scanner = StringScanner.new(bundle_string)
@tokens = []
@state = HEADER
2020-03-18 12:32:40 +00:00
end
2020-03-20 03:14:13 +00:00
def flunk!
raise "Syntax error at #{scanner.pos}"
end
# This might be a mistake or uneccessary. NN 20 MAR 2020
def maybe_end_message!
@tokens << [:MESSAGE_END] unless tokens.last.last == :MESSAGE_END
2020-03-20 03:14:13 +00:00
end
def do_header
if scanner.scan(AUTHOR)
author = scanner.matched.chomp.gsub("author ", "")
@tokens << [:AUTHOR, author]
return
end
2020-03-18 12:32:40 +00:00
if scanner.scan(DEPTH)
depth = scanner.matched.chomp.gsub("depth ", "").to_i
@tokens << [:DEPTH, depth]
return
end
2020-03-18 12:32:40 +00:00
if scanner.scan(PREV)
prev = scanner.matched.chomp.gsub("prev ", "")
@tokens << [:PREV, prev]
return
end
2020-03-18 12:32:40 +00:00
if scanner.scan(KIND)
kind = scanner.matched.chomp.gsub("kind ", "")
@tokens << [:KIND, kind]
return
end
if scanner.scan(SEPERATOR)
@state = BODY
@tokens << [:HEADER_END]
return
end
2020-03-20 03:14:13 +00:00
flunk!
2020-03-18 12:32:40 +00:00
end
def do_body
if scanner.scan(BODY_ENTRY)
key, value = scanner.matched.chomp.split(":")
@tokens << [:BODY_ENTRY, key, value]
2020-03-18 12:32:40 +00:00
return
end
if scanner.scan(SEPERATOR)
@state = FOOTER
@tokens << [:BODY_END]
return
2020-03-18 12:32:40 +00:00
end
2020-03-20 03:14:13 +00:00
flunk!
end
def do_footer
2020-03-20 03:14:13 +00:00
# Reset the lexer to ingest the next entry.
# If scanner.eos? == true, it will just terminate.
2020-03-18 12:32:40 +00:00
# This freezes everything:
2020-03-20 03:14:13 +00:00
if scanner.scan(FOOTER_ENTRY)
sig = scanner.matched.strip.gsub("signature ", "")
@tokens << [:SIGNATURE, sig]
return
end
if scanner.scan(SEPERATOR)
@state = HEADER
maybe_end_message!
return
2020-03-18 12:32:40 +00:00
end
raise "Parse error at #{scanner.pos}. Double carriage return not found."
2020-03-18 12:32:40 +00:00
end
end
end