Completed gemtext package.
continuous-integration/drone/push Build is passing Details

resolves #2

- fuzz testing
- split out line parsing into a separate file
- changed line type-specific public []byte fields to string accessor methods
- added document parsing test for a stress test doc
- added dependency on stretchr/testify
This commit is contained in:
tjpcc 2023-01-14 09:57:16 -07:00
parent aa23984bc2
commit 0d904f9f10
8 changed files with 302 additions and 152 deletions

16
gemtext/fuzz_test.go Normal file
View File

@ -0,0 +1,16 @@
package gemtext_test
import (
"bytes"
"testing"
"tildegit.org/tjp/gus/gemtext"
)
func FuzzParse(f *testing.F) {
f.Fuzz(func(t *testing.T, input []byte) {
if _, err := gemtext.Parse(bytes.NewBuffer(input)); err != nil {
t.Errorf("Parse error: %s", err.Error())
}
})
}

View File

@ -2,7 +2,6 @@ package gemtext
import (
"bufio"
"bytes"
"io"
)
@ -22,12 +21,12 @@ func Parse(input io.Reader) (Document, error) {
var line Line
if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') {
line = PreformattedTextLine{raw: raw}
line = PreformattedTextLine{raw: raw}
} else {
line = ParseLine(raw)
}
if line.Type() == LineTypePreformatToggle {
if line != nil && line.Type() == LineTypePreformatToggle {
if inPFT {
toggle := line.(PreformatToggleLine)
(&toggle).clearAlt()
@ -48,107 +47,3 @@ func Parse(input io.Reader) (Document, error) {
return Document(lines), nil
}
// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
func ParseLine(line []byte) Line {
if len(line) == 0 {
return nil
}
switch line[0] {
case '=':
if len(line) == 1 || line[1] != '>' {
break
}
return parseLinkLine(line)
case '`':
if len(line) < 3 || line[1] != '`' || line[2] != '`' {
break
}
return parsePreformatToggleLine(line)
case '#':
level := 1
if len(line) > 1 && line[1] == '#' {
level += 1
if len(line) > 2 && line[2] == '#' {
level += 1
}
}
return parseHeadingLine(level, line)
case '*':
if len(line) == 1 || line[1] != ' ' {
break
}
return parseListItemLine(line)
case '>':
return parseQuoteLine(line)
}
return TextLine{raw: line}
}
func parseLinkLine(raw []byte) LinkLine {
line := LinkLine{raw: raw}
// move past =>[<whitespace>]
raw = bytes.TrimLeft(raw[2:], " \t")
// find the next space or tab
spIdx := bytes.IndexByte(raw, ' ')
tbIdx := bytes.IndexByte(raw, '\t')
idx := spIdx
if idx == -1 {
idx = tbIdx
}
if tbIdx >= 0 && tbIdx < idx {
idx = tbIdx
}
if idx < 0 {
line.URL = bytes.TrimRight(raw, "\r\n")
return line
}
line.URL = raw[:idx]
raw = raw[idx+1:]
label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
if len(label) > 0 {
line.Label = label
}
return line
}
func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
line := PreformatToggleLine{raw: raw}
raw = bytes.TrimRight(raw[3:], "\r\n")
if len(raw) > 0 {
line.AltText = raw
}
return line
}
func parseHeadingLine(level int, raw []byte) HeadingLine {
return HeadingLine{
raw: raw,
lineType: LineTypeHeading1 - 1 + LineType(level),
Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
}
}
func parseListItemLine(raw []byte) ListItemLine {
return ListItemLine{
raw: raw,
Body: bytes.TrimRight(raw[2:], "\r\n"),
}
}
func parseQuoteLine(raw []byte) QuoteLine {
return QuoteLine{
raw: raw,
Body: bytes.TrimRight(raw[1:], "\r\n"),
}
}

107
gemtext/parse_line.go Normal file
View File

@ -0,0 +1,107 @@
package gemtext
import "bytes"
// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
func ParseLine(line []byte) Line {
if len(line) == 0 {
return nil
}
switch line[0] {
case '=':
if len(line) == 1 || line[1] != '>' {
break
}
return parseLinkLine(line)
case '`':
if len(line) < 3 || line[1] != '`' || line[2] != '`' {
break
}
return parsePreformatToggleLine(line)
case '#':
level := 1
if len(line) > 1 && line[1] == '#' {
level += 1
if len(line) > 2 && line[2] == '#' {
level += 1
}
}
return parseHeadingLine(level, line)
case '*':
if len(line) == 1 || line[1] != ' ' {
break
}
return parseListItemLine(line)
case '>':
return parseQuoteLine(line)
}
return TextLine{raw: line}
}
func parseLinkLine(raw []byte) LinkLine {
line := LinkLine{raw: raw}
// move past =>[<whitespace>]
raw = bytes.TrimLeft(raw[2:], " \t")
// find the next space or tab
spIdx := bytes.IndexByte(raw, ' ')
tbIdx := bytes.IndexByte(raw, '\t')
idx := spIdx
if idx == -1 {
idx = tbIdx
}
if tbIdx >= 0 && tbIdx < idx {
idx = tbIdx
}
if idx < 0 {
line.url = bytes.TrimRight(raw, "\r\n")
return line
}
line.url = raw[:idx]
raw = raw[idx+1:]
label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
if len(label) > 0 {
line.label = label
}
return line
}
func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
line := PreformatToggleLine{raw: raw}
raw = bytes.TrimRight(raw[3:], "\r\n")
if len(raw) > 0 {
line.altText = raw
}
return line
}
func parseHeadingLine(level int, raw []byte) HeadingLine {
return HeadingLine{
raw: raw,
lineType: LineTypeHeading1 - 1 + LineType(level),
body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
}
}
func parseListItemLine(raw []byte) ListItemLine {
return ListItemLine{
raw: raw,
body: bytes.TrimRight(raw[2:], "\r\n"),
}
}
func parseQuoteLine(raw []byte) QuoteLine {
return QuoteLine{
raw: raw,
body: bytes.TrimRight(raw[1:], "\r\n"),
}
}

View File

@ -46,12 +46,12 @@ func TestParseLinkLine(t *testing.T) {
t.Fatalf("expected a LinkLine, got %T", line)
}
if string(link.URL) != test.url {
t.Errorf("expected url %q, got %q", test.url, string(link.URL))
if link.URL() != test.url {
t.Errorf("expected url %q, got %q", test.url, link.URL())
}
if string(link.Label) != test.label {
t.Errorf("expected label %q, got %q", test.label, string(link.Label))
if link.Label() != test.label {
t.Errorf("expected label %q, got %q", test.label, link.Label())
}
})
}
@ -93,8 +93,8 @@ func TestParsePreformatToggleLine(t *testing.T) {
t.Fatalf("expected a PreformatToggleLine, got %T", line)
}
if string(toggle.AltText) != test.altText {
t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText))
if toggle.AltText() != test.altText {
t.Errorf("expected alt-text %q, got %q", test.altText, toggle.AltText())
}
})
}
@ -147,8 +147,8 @@ func TestParseHeadingLine(t *testing.T) {
t.Fatalf("expected HeadingLine, got a %T", line)
}
if string(hdg.Body) != test.body {
t.Errorf("expected body %q, got %q", test.body, string(hdg.Body))
if hdg.Body() != test.body {
t.Errorf("expected body %q, got %q", test.body, hdg.Body())
}
})
}
@ -188,8 +188,8 @@ func TestParseListItemLine(t *testing.T) {
t.Fatalf("expected ListItemLine, got a %T", line)
}
if string(li.Body) != test.body {
t.Errorf("expected body %q, got %q", test.body, string(li.Body))
if li.Body() != test.body {
t.Errorf("expected body %q, got %q", test.body, li.Body())
}
})
}
@ -233,8 +233,8 @@ func TestParseQuoteLine(t *testing.T) {
t.Fatalf("expected QuoteLine , got a %T", line)
}
if string(qu.Body) != test.body {
t.Errorf("expected body %q, got %q", test.body, string(qu.Body))
if qu.Body() != test.body {
t.Errorf("expected body %q, got %q", test.body, qu.Body())
}
})
}

104
gemtext/parse_test.go Normal file
View File

@ -0,0 +1,104 @@
package gemtext_test
import (
"bytes"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"tildegit.org/tjp/gus/gemtext"
)
func TestParse(t *testing.T) {
docBytes := []byte(`
# top-level header line
## subtitle
This is some non-blank regular text.
* an
* unordered
* list
=> gemini://google.com/ as if
> this is a quote
> -tjp
`[1:] + "```pre-formatted code\ndoc := gemtext.Parse(req.Body)\n```ignored closing alt-text\n")
assertEmptyLine := func(t *testing.T, line gemtext.Line) {
assert.Equal(t, gemtext.LineTypeText, line.Type())
assert.Equal(t, "\n", string(line.Raw()))
}
doc, err := gemtext.Parse(bytes.NewBuffer(docBytes))
require.Nil(t, err)
require.Equal(t, 18, len(doc))
assert.Equal(t, gemtext.LineTypeHeading1, doc[0].Type())
assert.Equal(t, "# top-level header line\n", string(doc[0].Raw()))
assert.Equal(t, "top-level header line", doc[0].(gemtext.HeadingLine).Body())
assertEmptyLine(t, doc[1])
assert.Equal(t, gemtext.LineTypeHeading2, doc[2].Type())
assert.Equal(t, "## subtitle\n", string(doc[2].Raw()))
assert.Equal(t, "subtitle", doc[2].(gemtext.HeadingLine).Body())
assertEmptyLine(t, doc[3])
assert.Equal(t, gemtext.LineTypeText, doc[4].Type())
assert.Equal(t, "This is some non-blank regular text.\n", string(doc[4].Raw()))
assertEmptyLine(t, doc[5])
assert.Equal(t, gemtext.LineTypeListItem, doc[6].Type())
assert.Equal(t, "an", doc[6].(gemtext.ListItemLine).Body())
assert.Equal(t, gemtext.LineTypeListItem, doc[7].Type())
assert.Equal(t, "unordered", doc[7].(gemtext.ListItemLine).Body())
assert.Equal(t, gemtext.LineTypeListItem, doc[8].Type())
assert.Equal(t, "list", doc[8].(gemtext.ListItemLine).Body())
assertEmptyLine(t, doc[9])
assert.Equal(t, gemtext.LineTypeLink, doc[10].Type())
assert.Equal(t, "=> gemini://google.com/ as if\n", string(doc[10].Raw()))
assert.Equal(t, "gemini://google.com/", doc[10].(gemtext.LinkLine).URL())
assert.Equal(t, "as if", doc[10].(gemtext.LinkLine).Label())
assertEmptyLine(t, doc[11])
assert.Equal(t, gemtext.LineTypeQuote, doc[12].Type())
assert.Equal(t, "> this is a quote\n", string(doc[12].Raw()))
assert.Equal(t, " this is a quote", doc[12].(gemtext.QuoteLine).Body())
assert.Equal(t, gemtext.LineTypeQuote, doc[13].Type())
assert.Equal(t, "> -tjp\n", string(doc[13].Raw()))
assert.Equal(t, " -tjp", doc[13].(gemtext.QuoteLine).Body())
assertEmptyLine(t, doc[14])
assert.Equal(t, gemtext.LineTypePreformatToggle, doc[15].Type())
assert.Equal(t, "```pre-formatted code\n", string(doc[15].Raw()))
assert.Equal(t, "pre-formatted code", doc[15].(gemtext.PreformatToggleLine).AltText())
assert.Equal(t, gemtext.LineTypePreformattedText, doc[16].Type())
assert.Equal(t, "doc := gemtext.Parse(req.Body)\n", string(doc[16].Raw()))
assert.Equal(t, gemtext.LineTypePreformatToggle, doc[17].Type())
assert.Equal(t, "```ignored closing alt-text\n", string(doc[17].Raw()))
assert.Equal(t, "", doc[17].(gemtext.PreformatToggleLine).AltText())
// ensure we can rebuild the original doc from all the line.Raw()s
buf := &bytes.Buffer{}
for _, line := range doc {
_, _ = buf.Write(line.Raw())
}
assert.Equal(t, string(docBytes), buf.String())
}

View File

@ -91,39 +91,39 @@ func (tl TextLine) Raw() []byte { return tl.raw }
// LinkLine is a line of LineTypeLink.
type LinkLine struct {
raw []byte
// URL is the original bytes of the url portion of the line.
//
// It is not guaranteed to be a valid URL.
URL []byte
// Label is the label portion of the line.
//
// If there was no label it will always be nil, never []byte{}.
Label []byte
url []byte
label []byte
}
func (ll LinkLine) Type() LineType { return LineTypeLink }
func (ll LinkLine) Raw() []byte { return ll.raw }
// URL returns the original url portion of the line.
//
// It is not guaranteed to be a valid URL.
func (ll LinkLine) URL() string { return string(ll.url) }
// Label returns the label portion of the line.
func (ll LinkLine) Label() string { return string(ll.label) }
// PreformatToggleLine is a preformatted text toggle line.
type PreformatToggleLine struct {
raw []byte
// AltText contains the alt-text portion of the line.
//
// It will either have len() > 0 or be nil.
//
// If the line was parsed as part of a full document by Parse(),
// and this is a *closing* toggle, any alt-text present will be
// stripped and this will be nil. If the line was parsed by
// ParseLine() no such correction is performed.
AltText []byte
altText []byte
}
func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle }
func (tl PreformatToggleLine) Raw() []byte { return tl.raw }
func (tl *PreformatToggleLine) clearAlt() { tl.AltText = nil }
// AltText returns the alt-text portion of the line.
//
// If the line was parsed as part of a full document by Parse(),
// and this is a *closing* toggle, any alt-text present will be
// stripped and this will be empty. If the line was parsed by
// ParseLine() no such correction is performed.
func (tl PreformatToggleLine) AltText() string { return string(tl.altText) }
func (tl *PreformatToggleLine) clearAlt() { tl.altText = nil }
// PreformattedTextLine represents a line between two toggles.
//
@ -140,32 +140,35 @@ func (tl PreformattedTextLine) Raw() []byte { return tl.raw }
type HeadingLine struct {
raw []byte
lineType LineType
// Body is the portion of the line with the header text.
Body []byte
body []byte
}
func (hl HeadingLine) Type() LineType { return hl.lineType }
func (hl HeadingLine) Raw() []byte { return hl.raw }
// Body returns the portion of the line with the header text.
func (hl HeadingLine) Body() string { return string(hl.body) }
// ListItemLine is a line of LineTypeListItem.
type ListItemLine struct {
raw []byte
// Body is the text of the list item.
Body []byte
body []byte
}
func (li ListItemLine) Type() LineType { return LineTypeListItem }
func (li ListItemLine) Raw() []byte { return li.raw }
// Body returns the text of the list item.
func (li ListItemLine) Body() string { return string(li.body) }
// QuoteLine is a line of LineTypeQuote.
type QuoteLine struct {
raw []byte
// Body is the text of the quote.
Body []byte
raw []byte
body []byte
}
func (ql QuoteLine) Type() LineType { return LineTypeQuote }
func (ql QuoteLine) Raw() []byte { return ql.raw }
// Body returns the text of the quote.
func (ql QuoteLine) Body() string { return string(ql.body) }

12
go.mod
View File

@ -2,6 +2,14 @@ module tildegit.org/tjp/gus
go 1.19
require github.com/go-kit/log v0.2.1
require (
github.com/go-kit/log v0.2.1
github.com/stretchr/testify v1.8.1
)
require github.com/go-logfmt/logfmt v0.5.1 // indirect
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-logfmt/logfmt v0.5.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

17
go.sum
View File

@ -1,4 +1,21 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA=
github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=