Initial gemtext package.
continuous-integration/drone/push Build is passing Details

Contains:
- gemtext AST (Document and line types)
- Parse from an io.Reader
- ParseLine a []byte
- doc comments on everything
- ParseLine tests for every line type

Still needs tests for Parse & Document.
This commit is contained in:
tjpcc 2023-01-13 10:50:30 -07:00
parent 13f553c965
commit aa23984bc2
3 changed files with 596 additions and 0 deletions

154
gemtext/parse.go Normal file
View File

@ -0,0 +1,154 @@
package gemtext
import (
"bufio"
"bytes"
"io"
)
// Parse parses the full contents of an io.Reader into a gemtext.Document.
func Parse(input io.Reader) (Document, error) {
rdr := bufio.NewReader(input)
var lines []Line
inPFT := false
for {
raw, err := rdr.ReadBytes('\n')
if err != io.EOF && err != nil {
return nil, err
}
var line Line
if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') {
line = PreformattedTextLine{raw: raw}
} else {
line = ParseLine(raw)
}
if line.Type() == LineTypePreformatToggle {
if inPFT {
toggle := line.(PreformatToggleLine)
(&toggle).clearAlt()
line = toggle
}
inPFT = !inPFT
}
if line != nil {
lines = append(lines, line)
}
if err == io.EOF {
break
}
}
return Document(lines), nil
}
// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
func ParseLine(line []byte) Line {
if len(line) == 0 {
return nil
}
switch line[0] {
case '=':
if len(line) == 1 || line[1] != '>' {
break
}
return parseLinkLine(line)
case '`':
if len(line) < 3 || line[1] != '`' || line[2] != '`' {
break
}
return parsePreformatToggleLine(line)
case '#':
level := 1
if len(line) > 1 && line[1] == '#' {
level += 1
if len(line) > 2 && line[2] == '#' {
level += 1
}
}
return parseHeadingLine(level, line)
case '*':
if len(line) == 1 || line[1] != ' ' {
break
}
return parseListItemLine(line)
case '>':
return parseQuoteLine(line)
}
return TextLine{raw: line}
}
func parseLinkLine(raw []byte) LinkLine {
line := LinkLine{raw: raw}
// move past =>[<whitespace>]
raw = bytes.TrimLeft(raw[2:], " \t")
// find the next space or tab
spIdx := bytes.IndexByte(raw, ' ')
tbIdx := bytes.IndexByte(raw, '\t')
idx := spIdx
if idx == -1 {
idx = tbIdx
}
if tbIdx >= 0 && tbIdx < idx {
idx = tbIdx
}
if idx < 0 {
line.URL = bytes.TrimRight(raw, "\r\n")
return line
}
line.URL = raw[:idx]
raw = raw[idx+1:]
label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
if len(label) > 0 {
line.Label = label
}
return line
}
func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
line := PreformatToggleLine{raw: raw}
raw = bytes.TrimRight(raw[3:], "\r\n")
if len(raw) > 0 {
line.AltText = raw
}
return line
}
func parseHeadingLine(level int, raw []byte) HeadingLine {
return HeadingLine{
raw: raw,
lineType: LineTypeHeading1 - 1 + LineType(level),
Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
}
}
func parseListItemLine(raw []byte) ListItemLine {
return ListItemLine{
raw: raw,
Body: bytes.TrimRight(raw[2:], "\r\n"),
}
}
func parseQuoteLine(raw []byte) QuoteLine {
return QuoteLine{
raw: raw,
Body: bytes.TrimRight(raw[1:], "\r\n"),
}
}

271
gemtext/parse_line_test.go Normal file
View File

@ -0,0 +1,271 @@
package gemtext_test
import (
"testing"
"tildegit.org/tjp/gus/gemtext"
)
func TestParseLinkLine(t *testing.T) {
tests := []struct {
input string
url string
label string
}{
{
input: "=> gemini.ctrl-c.club/~tjp/ home page\r\n",
url: "gemini.ctrl-c.club/~tjp/",
label: "home page",
},
{
input: "=> gemi.dev/\n",
url: "gemi.dev/",
},
{
input: "=> /gemlog/foobar 2023-01-13 - Foo Bar\n",
url: "/gemlog/foobar",
label: "2023-01-13 - Foo Bar",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
line := gemtext.ParseLine([]byte(test.input))
if line == nil {
t.Fatal("ParseLine() returned nil line")
}
if string(line.Raw()) != string(test.input) {
t.Error("Raw() does not match input")
}
if line.Type() != gemtext.LineTypeLink {
t.Errorf("expected LineTypeLink, got %d", line.Type())
}
link, ok := line.(gemtext.LinkLine)
if !ok {
t.Fatalf("expected a LinkLine, got %T", line)
}
if string(link.URL) != test.url {
t.Errorf("expected url %q, got %q", test.url, string(link.URL))
}
if string(link.Label) != test.label {
t.Errorf("expected label %q, got %q", test.label, string(link.Label))
}
})
}
}
func TestParsePreformatToggleLine(t *testing.T) {
tests := []struct {
input string
altText string
}{
{
input: "```\n",
},
{
input: "```some alt-text\r\n",
altText: "some alt-text",
},
{
input: "``` leading space preserved\n",
altText: " leading space preserved",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
line := gemtext.ParseLine([]byte(test.input))
if line == nil {
t.Fatal("ParseLine() returned nil line")
}
if string(line.Raw()) != string(test.input) {
t.Error("Raw() does not match input")
}
if line.Type() != gemtext.LineTypePreformatToggle {
t.Errorf("expected LineTypePreformatToggle, got %d", line.Type())
}
toggle, ok := line.(gemtext.PreformatToggleLine)
if !ok {
t.Fatalf("expected a PreformatToggleLine, got %T", line)
}
if string(toggle.AltText) != test.altText {
t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText))
}
})
}
}
func TestParseHeadingLine(t *testing.T) {
tests := []struct {
input string
lineType gemtext.LineType
body string
}{
{
input: "# this is an H1\n",
lineType: gemtext.LineTypeHeading1,
body: "this is an H1",
},
{
input: "## extra leading spaces\r\n",
lineType: gemtext.LineTypeHeading2,
body: "extra leading spaces",
},
{
input: "##no leading space\n",
lineType: gemtext.LineTypeHeading2,
body: "no leading space",
},
{
input: "#### there is no h4\n",
lineType: gemtext.LineTypeHeading3,
body: "# there is no h4",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
line := gemtext.ParseLine([]byte(test.input))
if line == nil {
t.Fatal("ParseLine() returned nil")
}
if line.Type() != test.lineType {
t.Errorf("expected line type %d, got %d", test.lineType, line.Type())
}
if string(line.Raw()) != test.input {
t.Error("line.Raw() does not match input")
}
hdg, ok := line.(gemtext.HeadingLine)
if !ok {
t.Fatalf("expected HeadingLine, got a %T", line)
}
if string(hdg.Body) != test.body {
t.Errorf("expected body %q, got %q", test.body, string(hdg.Body))
}
})
}
}
func TestParseListItemLine(t *testing.T) {
tests := []struct {
input string
body string
}{
{
input: "* this is a list item\r\n",
body: "this is a list item",
},
{
input: "* more leading spaces\n",
body: " more leading spaces",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
line := gemtext.ParseLine([]byte(test.input))
if line == nil {
t.Fatal("ParseLine() returned nil")
}
if line.Type() != gemtext.LineTypeListItem {
t.Errorf("expected LineTypeListItem, got %d", line.Type())
}
if string(line.Raw()) != test.input {
t.Error("line.Raw() does not match input")
}
li, ok := line.(gemtext.ListItemLine)
if !ok {
t.Fatalf("expected ListItemLine, got a %T", line)
}
if string(li.Body) != test.body {
t.Errorf("expected body %q, got %q", test.body, string(li.Body))
}
})
}
}
func TestParseQuoteLine(t *testing.T) {
tests := []struct {
input string
body string
}{
{
input: ">a quote line\r\n",
body: "a quote line",
},
{
input: "> with a leading space\n",
body: " with a leading space",
},
{
input: "> more leading spaces\n",
body: " more leading spaces",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
line := gemtext.ParseLine([]byte(test.input))
if line == nil {
t.Fatal("ParseLine() returned nil")
}
if line.Type() != gemtext.LineTypeQuote {
t.Errorf("expected LineTypeQuote, got %d", line.Type())
}
if string(line.Raw()) != test.input {
t.Error("line.Raw() does not match input")
}
qu, ok := line.(gemtext.QuoteLine)
if !ok {
t.Fatalf("expected QuoteLine , got a %T", line)
}
if string(qu.Body) != test.body {
t.Errorf("expected body %q, got %q", test.body, string(qu.Body))
}
})
}
}
func TestParseTextLine(t *testing.T) {
tests := []string {
"\n",
"simple text line\r\n",
" * an invalid list item\n",
"*another invalid list item\r\n",
}
for _, test := range tests {
t.Run(test, func(t *testing.T) {
line := gemtext.ParseLine([]byte(test))
if line == nil {
t.Fatal("ParseLine() returned nil")
}
if line.Type() != gemtext.LineTypeText {
t.Errorf("expected LineTypeText, got %d", line.Type())
}
if string(line.Raw()) != test {
t.Error("line.Raw() does not match input")
}
_, ok := line.(gemtext.TextLine)
if !ok {
t.Fatalf("expected TextLine , got a %T", line)
}
})
}
}

171
gemtext/types.go Normal file
View File

@ -0,0 +1,171 @@
package gemtext
// LineType represents the different types of lines in a gemtext document.
type LineType int
const (
// LineTypeText is the default case when nothing else matches.
//
// It indicates that the line object is a TextLine.
LineTypeText LineType = iota + 1
// LineTypeLink is a link line.
//
// =>[<ws>]<url>[<ws><label>][\r]\n
//
// The line is a LinkLine.
LineTypeLink
// LineTypePreformatToggle switches the document between pre-formatted text or not.
//
// ```[<alt-text>][\r]\n
//
// The line object is a PreformatToggleLine.
LineTypePreformatToggle
// LineTypePreformattedText is any line between two PreformatToggles.
//
// The line is a PreformattedTextLine.
LineTypePreformattedText
// LineTypeHeading1 is a top-level heading.
//
// #[<ws>]body[\r]\n
//
// The line is a HeadingLine.
LineTypeHeading1
// LineTypeHeading2 is a second-level heading.
//
// ##[<ws>]body[\r]\n
//
// The line is a HeadingLine.
LineTypeHeading2
// LineTypeHeading3 is a third-level heading.
//
// ###[<ws>]<body>[\r]\n
//
// The line is a HeadingLine.
LineTypeHeading3
// LineTypeListItem is an unordered list item.
//
// * <body>[\r]\n
//
// The line object is a ListItemLine.
LineTypeListItem
// LineTypeQuote is a quote line.
//
// ><body>[\r]\n
//
// The line object is a QuoteLine.
LineTypeQuote
)
// Line is the interface implemented by all specific line types.
//
// Many of those concrete implementation types have additional useful fields,
// so it can be a good idea to cast these to their concrete types based on the
// return value of the Type() method.
type Line interface {
// Type returns the specific type of the gemtext line.
Type() LineType
// Raw reproduces the original bytes from the source reader.
Raw() []byte
}
// Document is the list of lines that make up a full text/gemini resource.
type Document []Line
// TextLine is a line of LineTypeText.
type TextLine struct {
raw []byte
}
func (tl TextLine) Type() LineType { return LineTypeText }
func (tl TextLine) Raw() []byte { return tl.raw }
// LinkLine is a line of LineTypeLink.
type LinkLine struct {
raw []byte
// URL is the original bytes of the url portion of the line.
//
// It is not guaranteed to be a valid URL.
URL []byte
// Label is the label portion of the line.
//
// If there was no label it will always be nil, never []byte{}.
Label []byte
}
func (ll LinkLine) Type() LineType { return LineTypeLink }
func (ll LinkLine) Raw() []byte { return ll.raw }
// PreformatToggleLine is a preformatted text toggle line.
type PreformatToggleLine struct {
raw []byte
// AltText contains the alt-text portion of the line.
//
// It will either have len() > 0 or be nil.
//
// If the line was parsed as part of a full document by Parse(),
// and this is a *closing* toggle, any alt-text present will be
// stripped and this will be nil. If the line was parsed by
// ParseLine() no such correction is performed.
AltText []byte
}
func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle }
func (tl PreformatToggleLine) Raw() []byte { return tl.raw }
func (tl *PreformatToggleLine) clearAlt() { tl.AltText = nil }
// PreformattedTextLine represents a line between two toggles.
//
// It is never returned by ParseLine but can be part of a
// document parsed by Parse().
type PreformattedTextLine struct {
raw []byte
}
func (tl PreformattedTextLine) Type() LineType { return LineTypePreformattedText }
func (tl PreformattedTextLine) Raw() []byte { return tl.raw }
// HeadingLine is a line of LineTypeHeading[1,2,3].
type HeadingLine struct {
raw []byte
lineType LineType
// Body is the portion of the line with the header text.
Body []byte
}
func (hl HeadingLine) Type() LineType { return hl.lineType }
func (hl HeadingLine) Raw() []byte { return hl.raw }
// ListItemLine is a line of LineTypeListItem.
type ListItemLine struct {
raw []byte
// Body is the text of the list item.
Body []byte
}
func (li ListItemLine) Type() LineType { return LineTypeListItem }
func (li ListItemLine) Raw() []byte { return li.raw }
// QuoteLine is a line of LineTypeQuote.
type QuoteLine struct {
raw []byte
// Body is the text of the quote.
Body []byte
}
func (ql QuoteLine) Type() LineType { return LineTypeQuote }
func (ql QuoteLine) Raw() []byte { return ql.raw }