From 11b2e8c747a454ba3702ce2ea41ccc212241ee5b Mon Sep 17 00:00:00 2001 From: Rick Carlino Date: Wed, 13 May 2020 07:30:11 -0500 Subject: [PATCH 1/3] Problems with new Pigeon::StringScanner --- README.md | 16 ++++++---------- lib/pigeon.rb | 1 + lib/pigeon/lexer.rb | 14 +++++++++++++- lib/pigeon/string_scanner.rb | 28 ++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 11 deletions(-) create mode 100644 lib/pigeon/string_scanner.rb diff --git a/README.md b/README.md index 9191f4a..d92ea76 100644 --- a/README.md +++ b/README.md @@ -46,18 +46,14 @@ See `kitchen_sink.sh` examples. # Current Status - - [X] Create a contact email for project outsiders (and maybe a developer email list?) - - [X] Update README.md - - [X] Update Ruby API docs - - [X] Update blobs spec to clear out blob folder every time it runs. - - [ ] Oops, `lipmaa` field needs to be a hash, not an integer! - - [ ] Change `@`, `%`, `&` to `feed.`, `mesg.`, `blob.`, respectively. Better readability, easier onboarding, URL friendly. - - [ ] Change draft and message templates to render headers in this order: `author`, `prev`, `lipmaa`, `depth`, `kind`. - - [ ] Make location of blob folder configurable? - - [ ] Update Dev docs in protocol spec to reflect changes to `lipmaa` header. - - [ ] Update spec document CLI usage examples to reflect API changes in 2020. + - [X] Oops, `lipmaa` field needs to be a hash, not an integer! - [ ] BUG: Keys that start with a carriage return (`\n`) freeze tokenizer. - [ ] Convert literals to constants, remove unused locals, reduce duplication, run linter. + - [ ] Change message templates to render headers in this order: `author`, `prev`, `lipmaa`, `depth`, `kind`. + - [ ] Make location of blob folder configurable? + - [ ] Change `@`, `%`, `&` to `feed.`, `mesg.`, `blob.`, respectively. Better readability, easier onboarding, URL friendly. + - [ ] Update Dev docs in protocol spec to reflect changes to `lipmaa` header. + - [ ] Update spec document CLI usage examples to reflect API changes in 2020. - [ ] 100% class / module documentation - [ ] Run a [terminology extraction tool](https://www.visualthesaurus.com/vocabgrabber/#) on the documentation and write a glossary of terms. - [ ] Publish to RubyGems diff --git a/lib/pigeon.rb b/lib/pigeon.rb index 2a052a2..2111af6 100644 --- a/lib/pigeon.rb +++ b/lib/pigeon.rb @@ -283,6 +283,7 @@ require_relative File.join("pigeon", "message_serializer.rb") require_relative File.join("pigeon", "draft_serializer.rb") require_relative File.join("pigeon", "message.rb") require_relative File.join("pigeon", "draft.rb") +require_relative File.join("pigeon", "string_scanner.rb") require_relative File.join("pigeon", "lexer.rb") require_relative File.join("pigeon", "parser.rb") require_relative File.join("pigeon", "database.rb") diff --git a/lib/pigeon/lexer.rb b/lib/pigeon/lexer.rb index d8001b9..9784dd2 100644 --- a/lib/pigeon/lexer.rb +++ b/lib/pigeon/lexer.rb @@ -9,15 +9,17 @@ module Pigeon end def initialize(bundle_string) - @bundle_string = bundle_string + @bundle_string = bundle_string + "\n" @scanner = StringScanner.new(bundle_string) @tokens = [] @state = HEADER @last_good = :START + @loops = 0 end def tokenize until scanner.eos? + safety_check case @state when HEADER then do_header when BODY then do_body @@ -30,6 +32,7 @@ module Pigeon def tokenize_unsigned(signature) until scanner.eos? + safety_check case @state when HEADER then do_header when BODY then do_body @@ -42,6 +45,14 @@ module Pigeon private + def safety_check + if @loops > 1000 + raise "RUNAWAY LOOP DETECTED" + else + @loops += 1 + end + end + attr_reader :bundle_string, :scanner, :tokens # TODO: Change all the `{40,90}` values in ::Lexer to real values # TODO: Create regexes using string and Regexp.new() for cleaner regexes. @@ -167,6 +178,7 @@ module Pigeon @state = HEADER maybe_end_message! @last_good = :FOOTER_SEPERATOR + @loops = 0 return end diff --git a/lib/pigeon/string_scanner.rb b/lib/pigeon/string_scanner.rb new file mode 100644 index 0000000..d9db7f3 --- /dev/null +++ b/lib/pigeon/string_scanner.rb @@ -0,0 +1,28 @@ +module Pigeon + class StringScanner + attr_reader :pos, :matched, :string + MAX_TOKEN_SIZE = 500 + + def initialize(string) + @string = string.freeze + @pos = 0 + @matched = "" + end + + def eos? + result = @pos == @string.length - 1 + puts result ? "is eos" : "not eos" + end + + def scan(regex) + puts "Scanning #{regex}" + @last = regex + match = regex.match(@string[@pos...MAX_TOKEN_SIZE]) + if match + length = match.end(0) + @pos += length + @matched = match.values_at(0).first + end + end + end +end From 50728795b5e1d6b0d5a3933491ed7ab1663b10be Mon Sep 17 00:00:00 2001 From: Rick Carlino Date: Thu, 14 May 2020 08:02:41 -0500 Subject: [PATCH 2/3] WIP --- lib/pigeon/lexer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pigeon/lexer.rb b/lib/pigeon/lexer.rb index 9784dd2..7753ff9 100644 --- a/lib/pigeon/lexer.rb +++ b/lib/pigeon/lexer.rb @@ -9,7 +9,7 @@ module Pigeon end def initialize(bundle_string) - @bundle_string = bundle_string + "\n" + @bundle_string = bundle_string @scanner = StringScanner.new(bundle_string) @tokens = [] @state = HEADER From 7d05f28f5ded5ae07121a7f47bca8e831e8d13d3 Mon Sep 17 00:00:00 2001 From: Rick Carlino Date: Thu, 14 May 2020 08:07:33 -0500 Subject: [PATCH 3/3] Sorta kinda fix the frozen lexer issue --- lib/pigeon.rb | 1 - lib/pigeon/string_scanner.rb | 28 ---------------------------- spec/pigeon/message_spec.rb | 3 +-- 3 files changed, 1 insertion(+), 31 deletions(-) delete mode 100644 lib/pigeon/string_scanner.rb diff --git a/lib/pigeon.rb b/lib/pigeon.rb index 4012e8f..0236a37 100644 --- a/lib/pigeon.rb +++ b/lib/pigeon.rb @@ -277,7 +277,6 @@ require_relative File.join("pigeon", "message_serializer.rb") require_relative File.join("pigeon", "draft_serializer.rb") require_relative File.join("pigeon", "message.rb") require_relative File.join("pigeon", "draft.rb") -require_relative File.join("pigeon", "string_scanner.rb") require_relative File.join("pigeon", "lexer.rb") require_relative File.join("pigeon", "parser.rb") require_relative File.join("pigeon", "database.rb") diff --git a/lib/pigeon/string_scanner.rb b/lib/pigeon/string_scanner.rb deleted file mode 100644 index d9db7f3..0000000 --- a/lib/pigeon/string_scanner.rb +++ /dev/null @@ -1,28 +0,0 @@ -module Pigeon - class StringScanner - attr_reader :pos, :matched, :string - MAX_TOKEN_SIZE = 500 - - def initialize(string) - @string = string.freeze - @pos = 0 - @matched = "" - end - - def eos? - result = @pos == @string.length - 1 - puts result ? "is eos" : "not eos" - end - - def scan(regex) - puts "Scanning #{regex}" - @last = regex - match = regex.match(@string[@pos...MAX_TOKEN_SIZE]) - if match - length = match.end(0) - @pos += length - @matched = match.values_at(0).first - end - end - end -end diff --git a/spec/pigeon/message_spec.rb b/spec/pigeon/message_spec.rb index 392c135..bf03cfd 100644 --- a/spec/pigeon/message_spec.rb +++ b/spec/pigeon/message_spec.rb @@ -167,14 +167,13 @@ RSpec.describe Pigeon::Message do # This was originally a bug nooted during development # That caused a runaway loop in the tokenizer. it "handles this key: '\\nVUx0hC3'" do - pending("Known bug- will fix after writing docs.") db.delete_current_draft db.new_draft(kind: "unit_test") db.update_draft("\nVUx0hC3", "n") db.update_draft("n", "\nVUx0hC3") Timeout::timeout(0.5) do boom = -> { Pigeon::Lexer.tokenize(db.publish_draft.render) } - expect(boom).to raise_error(Pigeon::Lexer::LexError) + expect(boom).to raise_error("RUNAWAY LOOP DETECTED") end end