From 11b2e8c747a454ba3702ce2ea41ccc212241ee5b Mon Sep 17 00:00:00 2001 From: Rick Carlino Date: Wed, 13 May 2020 07:30:11 -0500 Subject: [PATCH] Problems with new Pigeon::StringScanner --- README.md | 16 ++++++---------- lib/pigeon.rb | 1 + lib/pigeon/lexer.rb | 14 +++++++++++++- lib/pigeon/string_scanner.rb | 28 ++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 11 deletions(-) create mode 100644 lib/pigeon/string_scanner.rb diff --git a/README.md b/README.md index 9191f4a..d92ea76 100644 --- a/README.md +++ b/README.md @@ -46,18 +46,14 @@ See `kitchen_sink.sh` examples. # Current Status - - [X] Create a contact email for project outsiders (and maybe a developer email list?) - - [X] Update README.md - - [X] Update Ruby API docs - - [X] Update blobs spec to clear out blob folder every time it runs. - - [ ] Oops, `lipmaa` field needs to be a hash, not an integer! - - [ ] Change `@`, `%`, `&` to `feed.`, `mesg.`, `blob.`, respectively. Better readability, easier onboarding, URL friendly. - - [ ] Change draft and message templates to render headers in this order: `author`, `prev`, `lipmaa`, `depth`, `kind`. - - [ ] Make location of blob folder configurable? - - [ ] Update Dev docs in protocol spec to reflect changes to `lipmaa` header. - - [ ] Update spec document CLI usage examples to reflect API changes in 2020. + - [X] Oops, `lipmaa` field needs to be a hash, not an integer! - [ ] BUG: Keys that start with a carriage return (`\n`) freeze tokenizer. - [ ] Convert literals to constants, remove unused locals, reduce duplication, run linter. + - [ ] Change message templates to render headers in this order: `author`, `prev`, `lipmaa`, `depth`, `kind`. + - [ ] Make location of blob folder configurable? + - [ ] Change `@`, `%`, `&` to `feed.`, `mesg.`, `blob.`, respectively. Better readability, easier onboarding, URL friendly. + - [ ] Update Dev docs in protocol spec to reflect changes to `lipmaa` header. + - [ ] Update spec document CLI usage examples to reflect API changes in 2020. - [ ] 100% class / module documentation - [ ] Run a [terminology extraction tool](https://www.visualthesaurus.com/vocabgrabber/#) on the documentation and write a glossary of terms. - [ ] Publish to RubyGems diff --git a/lib/pigeon.rb b/lib/pigeon.rb index 2a052a2..2111af6 100644 --- a/lib/pigeon.rb +++ b/lib/pigeon.rb @@ -283,6 +283,7 @@ require_relative File.join("pigeon", "message_serializer.rb") require_relative File.join("pigeon", "draft_serializer.rb") require_relative File.join("pigeon", "message.rb") require_relative File.join("pigeon", "draft.rb") +require_relative File.join("pigeon", "string_scanner.rb") require_relative File.join("pigeon", "lexer.rb") require_relative File.join("pigeon", "parser.rb") require_relative File.join("pigeon", "database.rb") diff --git a/lib/pigeon/lexer.rb b/lib/pigeon/lexer.rb index d8001b9..9784dd2 100644 --- a/lib/pigeon/lexer.rb +++ b/lib/pigeon/lexer.rb @@ -9,15 +9,17 @@ module Pigeon end def initialize(bundle_string) - @bundle_string = bundle_string + @bundle_string = bundle_string + "\n" @scanner = StringScanner.new(bundle_string) @tokens = [] @state = HEADER @last_good = :START + @loops = 0 end def tokenize until scanner.eos? + safety_check case @state when HEADER then do_header when BODY then do_body @@ -30,6 +32,7 @@ module Pigeon def tokenize_unsigned(signature) until scanner.eos? + safety_check case @state when HEADER then do_header when BODY then do_body @@ -42,6 +45,14 @@ module Pigeon private + def safety_check + if @loops > 1000 + raise "RUNAWAY LOOP DETECTED" + else + @loops += 1 + end + end + attr_reader :bundle_string, :scanner, :tokens # TODO: Change all the `{40,90}` values in ::Lexer to real values # TODO: Create regexes using string and Regexp.new() for cleaner regexes. @@ -167,6 +178,7 @@ module Pigeon @state = HEADER maybe_end_message! @last_good = :FOOTER_SEPERATOR + @loops = 0 return end diff --git a/lib/pigeon/string_scanner.rb b/lib/pigeon/string_scanner.rb new file mode 100644 index 0000000..d9db7f3 --- /dev/null +++ b/lib/pigeon/string_scanner.rb @@ -0,0 +1,28 @@ +module Pigeon + class StringScanner + attr_reader :pos, :matched, :string + MAX_TOKEN_SIZE = 500 + + def initialize(string) + @string = string.freeze + @pos = 0 + @matched = "" + end + + def eos? + result = @pos == @string.length - 1 + puts result ? "is eos" : "not eos" + end + + def scan(regex) + puts "Scanning #{regex}" + @last = regex + match = regex.match(@string[@pos...MAX_TOKEN_SIZE]) + if match + length = match.end(0) + @pos += length + @matched = match.values_at(0).first + end + end + end +end