diff --git a/gemtext/fuzz_test.go b/gemtext/fuzz_test.go new file mode 100644 index 0000000..dce0587 --- /dev/null +++ b/gemtext/fuzz_test.go @@ -0,0 +1,16 @@ +package gemtext_test + +import ( + "bytes" + "testing" + + "tildegit.org/tjp/gus/gemtext" +) + +func FuzzParse(f *testing.F) { + f.Fuzz(func(t *testing.T, input []byte) { + if _, err := gemtext.Parse(bytes.NewBuffer(input)); err != nil { + t.Errorf("Parse error: %s", err.Error()) + } + }) +} diff --git a/gemtext/parse.go b/gemtext/parse.go index 4a8c641..7041fde 100644 --- a/gemtext/parse.go +++ b/gemtext/parse.go @@ -2,7 +2,6 @@ package gemtext import ( "bufio" - "bytes" "io" ) @@ -22,12 +21,12 @@ func Parse(input io.Reader) (Document, error) { var line Line if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') { - line = PreformattedTextLine{raw: raw} + line = PreformattedTextLine{raw: raw} } else { line = ParseLine(raw) } - if line.Type() == LineTypePreformatToggle { + if line != nil && line.Type() == LineTypePreformatToggle { if inPFT { toggle := line.(PreformatToggleLine) (&toggle).clearAlt() @@ -48,107 +47,3 @@ func Parse(input io.Reader) (Document, error) { return Document(lines), nil } - -// ParseLine parses a single line (including the trailing \n) into a gemtext.Line. -func ParseLine(line []byte) Line { - if len(line) == 0 { - return nil - } - - switch line[0] { - case '=': - if len(line) == 1 || line[1] != '>' { - break - } - return parseLinkLine(line) - case '`': - if len(line) < 3 || line[1] != '`' || line[2] != '`' { - break - } - return parsePreformatToggleLine(line) - case '#': - level := 1 - if len(line) > 1 && line[1] == '#' { - level += 1 - if len(line) > 2 && line[2] == '#' { - level += 1 - } - } - return parseHeadingLine(level, line) - case '*': - if len(line) == 1 || line[1] != ' ' { - break - } - return parseListItemLine(line) - case '>': - return parseQuoteLine(line) - } - - return TextLine{raw: line} -} - -func parseLinkLine(raw []byte) LinkLine { - line := LinkLine{raw: raw} - - // move past =>[] - raw = bytes.TrimLeft(raw[2:], " \t") - - // find the next space or tab - spIdx := bytes.IndexByte(raw, ' ') - tbIdx := bytes.IndexByte(raw, '\t') - idx := spIdx - if idx == -1 { - idx = tbIdx - } - if tbIdx >= 0 && tbIdx < idx { - idx = tbIdx - } - - if idx < 0 { - line.URL = bytes.TrimRight(raw, "\r\n") - return line - } - - line.URL = raw[:idx] - raw = raw[idx+1:] - - label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n") - if len(label) > 0 { - line.Label = label - } - - return line -} - -func parsePreformatToggleLine(raw []byte) PreformatToggleLine { - line := PreformatToggleLine{raw: raw} - - raw = bytes.TrimRight(raw[3:], "\r\n") - if len(raw) > 0 { - line.AltText = raw - } - - return line -} - -func parseHeadingLine(level int, raw []byte) HeadingLine { - return HeadingLine{ - raw: raw, - lineType: LineTypeHeading1 - 1 + LineType(level), - Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"), - } -} - -func parseListItemLine(raw []byte) ListItemLine { - return ListItemLine{ - raw: raw, - Body: bytes.TrimRight(raw[2:], "\r\n"), - } -} - -func parseQuoteLine(raw []byte) QuoteLine { - return QuoteLine{ - raw: raw, - Body: bytes.TrimRight(raw[1:], "\r\n"), - } -} diff --git a/gemtext/parse_line.go b/gemtext/parse_line.go new file mode 100644 index 0000000..39187a8 --- /dev/null +++ b/gemtext/parse_line.go @@ -0,0 +1,107 @@ +package gemtext + +import "bytes" + +// ParseLine parses a single line (including the trailing \n) into a gemtext.Line. +func ParseLine(line []byte) Line { + if len(line) == 0 { + return nil + } + + switch line[0] { + case '=': + if len(line) == 1 || line[1] != '>' { + break + } + return parseLinkLine(line) + case '`': + if len(line) < 3 || line[1] != '`' || line[2] != '`' { + break + } + return parsePreformatToggleLine(line) + case '#': + level := 1 + if len(line) > 1 && line[1] == '#' { + level += 1 + if len(line) > 2 && line[2] == '#' { + level += 1 + } + } + return parseHeadingLine(level, line) + case '*': + if len(line) == 1 || line[1] != ' ' { + break + } + return parseListItemLine(line) + case '>': + return parseQuoteLine(line) + } + + return TextLine{raw: line} +} + +func parseLinkLine(raw []byte) LinkLine { + line := LinkLine{raw: raw} + + // move past =>[] + raw = bytes.TrimLeft(raw[2:], " \t") + + // find the next space or tab + spIdx := bytes.IndexByte(raw, ' ') + tbIdx := bytes.IndexByte(raw, '\t') + idx := spIdx + if idx == -1 { + idx = tbIdx + } + if tbIdx >= 0 && tbIdx < idx { + idx = tbIdx + } + + if idx < 0 { + line.url = bytes.TrimRight(raw, "\r\n") + return line + } + + line.url = raw[:idx] + raw = raw[idx+1:] + + label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n") + if len(label) > 0 { + line.label = label + } + + return line +} + +func parsePreformatToggleLine(raw []byte) PreformatToggleLine { + line := PreformatToggleLine{raw: raw} + + raw = bytes.TrimRight(raw[3:], "\r\n") + if len(raw) > 0 { + line.altText = raw + } + + return line +} + +func parseHeadingLine(level int, raw []byte) HeadingLine { + return HeadingLine{ + raw: raw, + lineType: LineTypeHeading1 - 1 + LineType(level), + body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"), + } +} + +func parseListItemLine(raw []byte) ListItemLine { + return ListItemLine{ + raw: raw, + body: bytes.TrimRight(raw[2:], "\r\n"), + } +} + +func parseQuoteLine(raw []byte) QuoteLine { + return QuoteLine{ + raw: raw, + body: bytes.TrimRight(raw[1:], "\r\n"), + } +} diff --git a/gemtext/parse_line_test.go b/gemtext/parse_line_test.go index 64c1bc7..0953103 100644 --- a/gemtext/parse_line_test.go +++ b/gemtext/parse_line_test.go @@ -46,12 +46,12 @@ func TestParseLinkLine(t *testing.T) { t.Fatalf("expected a LinkLine, got %T", line) } - if string(link.URL) != test.url { - t.Errorf("expected url %q, got %q", test.url, string(link.URL)) + if link.URL() != test.url { + t.Errorf("expected url %q, got %q", test.url, link.URL()) } - if string(link.Label) != test.label { - t.Errorf("expected label %q, got %q", test.label, string(link.Label)) + if link.Label() != test.label { + t.Errorf("expected label %q, got %q", test.label, link.Label()) } }) } @@ -93,8 +93,8 @@ func TestParsePreformatToggleLine(t *testing.T) { t.Fatalf("expected a PreformatToggleLine, got %T", line) } - if string(toggle.AltText) != test.altText { - t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText)) + if toggle.AltText() != test.altText { + t.Errorf("expected alt-text %q, got %q", test.altText, toggle.AltText()) } }) } @@ -147,8 +147,8 @@ func TestParseHeadingLine(t *testing.T) { t.Fatalf("expected HeadingLine, got a %T", line) } - if string(hdg.Body) != test.body { - t.Errorf("expected body %q, got %q", test.body, string(hdg.Body)) + if hdg.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, hdg.Body()) } }) } @@ -188,8 +188,8 @@ func TestParseListItemLine(t *testing.T) { t.Fatalf("expected ListItemLine, got a %T", line) } - if string(li.Body) != test.body { - t.Errorf("expected body %q, got %q", test.body, string(li.Body)) + if li.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, li.Body()) } }) } @@ -233,8 +233,8 @@ func TestParseQuoteLine(t *testing.T) { t.Fatalf("expected QuoteLine , got a %T", line) } - if string(qu.Body) != test.body { - t.Errorf("expected body %q, got %q", test.body, string(qu.Body)) + if qu.Body() != test.body { + t.Errorf("expected body %q, got %q", test.body, qu.Body()) } }) } diff --git a/gemtext/parse_test.go b/gemtext/parse_test.go new file mode 100644 index 0000000..bda5310 --- /dev/null +++ b/gemtext/parse_test.go @@ -0,0 +1,104 @@ +package gemtext_test + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "tildegit.org/tjp/gus/gemtext" +) + +func TestParse(t *testing.T) { + docBytes := []byte(` +# top-level header line + +## subtitle + +This is some non-blank regular text. + +* an +* unordered +* list + +=> gemini://google.com/ as if + +> this is a quote +> -tjp + +`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n") + + assertEmptyLine := func(t *testing.T, line gemtext.Line) { + assert.Equal(t, gemtext.LineTypeText, line.Type()) + assert.Equal(t, "\n", string(line.Raw())) + } + + doc, err := gemtext.Parse(bytes.NewBuffer(docBytes)) + require.Nil(t, err) + + require.Equal(t, 18, len(doc)) + + assert.Equal(t, gemtext.LineTypeHeading1, doc[0].Type()) + assert.Equal(t, "# top-level header line\n", string(doc[0].Raw())) + assert.Equal(t, "top-level header line", doc[0].(gemtext.HeadingLine).Body()) + + assertEmptyLine(t, doc[1]) + + assert.Equal(t, gemtext.LineTypeHeading2, doc[2].Type()) + assert.Equal(t, "## subtitle\n", string(doc[2].Raw())) + assert.Equal(t, "subtitle", doc[2].(gemtext.HeadingLine).Body()) + + assertEmptyLine(t, doc[3]) + + assert.Equal(t, gemtext.LineTypeText, doc[4].Type()) + assert.Equal(t, "This is some non-blank regular text.\n", string(doc[4].Raw())) + + assertEmptyLine(t, doc[5]) + + assert.Equal(t, gemtext.LineTypeListItem, doc[6].Type()) + assert.Equal(t, "an", doc[6].(gemtext.ListItemLine).Body()) + + assert.Equal(t, gemtext.LineTypeListItem, doc[7].Type()) + assert.Equal(t, "unordered", doc[7].(gemtext.ListItemLine).Body()) + + assert.Equal(t, gemtext.LineTypeListItem, doc[8].Type()) + assert.Equal(t, "list", doc[8].(gemtext.ListItemLine).Body()) + + assertEmptyLine(t, doc[9]) + + assert.Equal(t, gemtext.LineTypeLink, doc[10].Type()) + assert.Equal(t, "=> gemini://google.com/ as if\n", string(doc[10].Raw())) + assert.Equal(t, "gemini://google.com/", doc[10].(gemtext.LinkLine).URL()) + assert.Equal(t, "as if", doc[10].(gemtext.LinkLine).Label()) + + assertEmptyLine(t, doc[11]) + + assert.Equal(t, gemtext.LineTypeQuote, doc[12].Type()) + assert.Equal(t, "> this is a quote\n", string(doc[12].Raw())) + assert.Equal(t, " this is a quote", doc[12].(gemtext.QuoteLine).Body()) + + assert.Equal(t, gemtext.LineTypeQuote, doc[13].Type()) + assert.Equal(t, "> -tjp\n", string(doc[13].Raw())) + assert.Equal(t, " -tjp", doc[13].(gemtext.QuoteLine).Body()) + + assertEmptyLine(t, doc[14]) + + assert.Equal(t, gemtext.LineTypePreformatToggle, doc[15].Type()) + assert.Equal(t, "```pre-formatted code\n", string(doc[15].Raw())) + assert.Equal(t, "pre-formatted code", doc[15].(gemtext.PreformatToggleLine).AltText()) + + assert.Equal(t, gemtext.LineTypePreformattedText, doc[16].Type()) + assert.Equal(t, "doc := gemtext.Parse(req.Body)\n", string(doc[16].Raw())) + + assert.Equal(t, gemtext.LineTypePreformatToggle, doc[17].Type()) + assert.Equal(t, "```ignored closing alt-text\n", string(doc[17].Raw())) + assert.Equal(t, "", doc[17].(gemtext.PreformatToggleLine).AltText()) + + // ensure we can rebuild the original doc from all the line.Raw()s + buf := &bytes.Buffer{} + for _, line := range doc { + _, _ = buf.Write(line.Raw()) + } + assert.Equal(t, string(docBytes), buf.String()) +} diff --git a/gemtext/types.go b/gemtext/types.go index fb9352a..fefbece 100644 --- a/gemtext/types.go +++ b/gemtext/types.go @@ -91,39 +91,39 @@ func (tl TextLine) Raw() []byte { return tl.raw } // LinkLine is a line of LineTypeLink. type LinkLine struct { raw []byte - - // URL is the original bytes of the url portion of the line. - // - // It is not guaranteed to be a valid URL. - URL []byte - - // Label is the label portion of the line. - // - // If there was no label it will always be nil, never []byte{}. - Label []byte + url []byte + label []byte } func (ll LinkLine) Type() LineType { return LineTypeLink } func (ll LinkLine) Raw() []byte { return ll.raw } +// URL returns the original url portion of the line. +// +// It is not guaranteed to be a valid URL. +func (ll LinkLine) URL() string { return string(ll.url) } + +// Label returns the label portion of the line. +func (ll LinkLine) Label() string { return string(ll.label) } + // PreformatToggleLine is a preformatted text toggle line. type PreformatToggleLine struct { raw []byte - - // AltText contains the alt-text portion of the line. - // - // It will either have len() > 0 or be nil. - // - // If the line was parsed as part of a full document by Parse(), - // and this is a *closing* toggle, any alt-text present will be - // stripped and this will be nil. If the line was parsed by - // ParseLine() no such correction is performed. - AltText []byte + altText []byte } func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle } func (tl PreformatToggleLine) Raw() []byte { return tl.raw } -func (tl *PreformatToggleLine) clearAlt() { tl.AltText = nil } + +// AltText returns the alt-text portion of the line. +// +// If the line was parsed as part of a full document by Parse(), +// and this is a *closing* toggle, any alt-text present will be +// stripped and this will be empty. If the line was parsed by +// ParseLine() no such correction is performed. +func (tl PreformatToggleLine) AltText() string { return string(tl.altText) } + +func (tl *PreformatToggleLine) clearAlt() { tl.altText = nil } // PreformattedTextLine represents a line between two toggles. // @@ -140,32 +140,35 @@ func (tl PreformattedTextLine) Raw() []byte { return tl.raw } type HeadingLine struct { raw []byte lineType LineType - - // Body is the portion of the line with the header text. - Body []byte + body []byte } func (hl HeadingLine) Type() LineType { return hl.lineType } func (hl HeadingLine) Raw() []byte { return hl.raw } +// Body returns the portion of the line with the header text. +func (hl HeadingLine) Body() string { return string(hl.body) } + // ListItemLine is a line of LineTypeListItem. type ListItemLine struct { raw []byte - - // Body is the text of the list item. - Body []byte + body []byte } func (li ListItemLine) Type() LineType { return LineTypeListItem } func (li ListItemLine) Raw() []byte { return li.raw } +// Body returns the text of the list item. +func (li ListItemLine) Body() string { return string(li.body) } + // QuoteLine is a line of LineTypeQuote. type QuoteLine struct { - raw []byte - - // Body is the text of the quote. - Body []byte + raw []byte + body []byte } func (ql QuoteLine) Type() LineType { return LineTypeQuote } func (ql QuoteLine) Raw() []byte { return ql.raw } + +// Body returns the text of the quote. +func (ql QuoteLine) Body() string { return string(ql.body) } diff --git a/go.mod b/go.mod index 7af8e8d..900e959 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,14 @@ module tildegit.org/tjp/gus go 1.19 -require github.com/go-kit/log v0.2.1 +require ( + github.com/go-kit/log v0.2.1 + github.com/stretchr/testify v1.8.1 +) -require github.com/go-logfmt/logfmt v0.5.1 // indirect +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-logfmt/logfmt v0.5.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum index fa964aa..d766032 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,21 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=