Initial gemtext package.
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
Contains: - gemtext AST (Document and line types) - Parse from an io.Reader - ParseLine a []byte - doc comments on everything - ParseLine tests for every line type Still needs tests for Parse & Document.
This commit is contained in:
parent
13f553c965
commit
aa23984bc2
|
@ -0,0 +1,154 @@
|
|||
package gemtext
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Parse parses the full contents of an io.Reader into a gemtext.Document.
|
||||
func Parse(input io.Reader) (Document, error) {
|
||||
rdr := bufio.NewReader(input)
|
||||
|
||||
var lines []Line
|
||||
inPFT := false
|
||||
|
||||
for {
|
||||
raw, err := rdr.ReadBytes('\n')
|
||||
if err != io.EOF && err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var line Line
|
||||
|
||||
if inPFT && (len(raw) < 3 || raw[0] != '`' || raw[1] != '`' || raw[2] != '`') {
|
||||
line = PreformattedTextLine{raw: raw}
|
||||
} else {
|
||||
line = ParseLine(raw)
|
||||
}
|
||||
|
||||
if line.Type() == LineTypePreformatToggle {
|
||||
if inPFT {
|
||||
toggle := line.(PreformatToggleLine)
|
||||
(&toggle).clearAlt()
|
||||
line = toggle
|
||||
}
|
||||
|
||||
inPFT = !inPFT
|
||||
}
|
||||
|
||||
if line != nil {
|
||||
lines = append(lines, line)
|
||||
}
|
||||
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return Document(lines), nil
|
||||
}
|
||||
|
||||
// ParseLine parses a single line (including the trailing \n) into a gemtext.Line.
|
||||
func ParseLine(line []byte) Line {
|
||||
if len(line) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch line[0] {
|
||||
case '=':
|
||||
if len(line) == 1 || line[1] != '>' {
|
||||
break
|
||||
}
|
||||
return parseLinkLine(line)
|
||||
case '`':
|
||||
if len(line) < 3 || line[1] != '`' || line[2] != '`' {
|
||||
break
|
||||
}
|
||||
return parsePreformatToggleLine(line)
|
||||
case '#':
|
||||
level := 1
|
||||
if len(line) > 1 && line[1] == '#' {
|
||||
level += 1
|
||||
if len(line) > 2 && line[2] == '#' {
|
||||
level += 1
|
||||
}
|
||||
}
|
||||
return parseHeadingLine(level, line)
|
||||
case '*':
|
||||
if len(line) == 1 || line[1] != ' ' {
|
||||
break
|
||||
}
|
||||
return parseListItemLine(line)
|
||||
case '>':
|
||||
return parseQuoteLine(line)
|
||||
}
|
||||
|
||||
return TextLine{raw: line}
|
||||
}
|
||||
|
||||
func parseLinkLine(raw []byte) LinkLine {
|
||||
line := LinkLine{raw: raw}
|
||||
|
||||
// move past =>[<whitespace>]
|
||||
raw = bytes.TrimLeft(raw[2:], " \t")
|
||||
|
||||
// find the next space or tab
|
||||
spIdx := bytes.IndexByte(raw, ' ')
|
||||
tbIdx := bytes.IndexByte(raw, '\t')
|
||||
idx := spIdx
|
||||
if idx == -1 {
|
||||
idx = tbIdx
|
||||
}
|
||||
if tbIdx >= 0 && tbIdx < idx {
|
||||
idx = tbIdx
|
||||
}
|
||||
|
||||
if idx < 0 {
|
||||
line.URL = bytes.TrimRight(raw, "\r\n")
|
||||
return line
|
||||
}
|
||||
|
||||
line.URL = raw[:idx]
|
||||
raw = raw[idx+1:]
|
||||
|
||||
label := bytes.TrimRight(bytes.TrimLeft(raw, " \t"), "\r\n")
|
||||
if len(label) > 0 {
|
||||
line.Label = label
|
||||
}
|
||||
|
||||
return line
|
||||
}
|
||||
|
||||
func parsePreformatToggleLine(raw []byte) PreformatToggleLine {
|
||||
line := PreformatToggleLine{raw: raw}
|
||||
|
||||
raw = bytes.TrimRight(raw[3:], "\r\n")
|
||||
if len(raw) > 0 {
|
||||
line.AltText = raw
|
||||
}
|
||||
|
||||
return line
|
||||
}
|
||||
|
||||
func parseHeadingLine(level int, raw []byte) HeadingLine {
|
||||
return HeadingLine{
|
||||
raw: raw,
|
||||
lineType: LineTypeHeading1 - 1 + LineType(level),
|
||||
Body: bytes.TrimRight(bytes.TrimLeft(raw[level:], " \t"), "\r\n"),
|
||||
}
|
||||
}
|
||||
|
||||
func parseListItemLine(raw []byte) ListItemLine {
|
||||
return ListItemLine{
|
||||
raw: raw,
|
||||
Body: bytes.TrimRight(raw[2:], "\r\n"),
|
||||
}
|
||||
}
|
||||
|
||||
func parseQuoteLine(raw []byte) QuoteLine {
|
||||
return QuoteLine{
|
||||
raw: raw,
|
||||
Body: bytes.TrimRight(raw[1:], "\r\n"),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,271 @@
|
|||
package gemtext_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"tildegit.org/tjp/gus/gemtext"
|
||||
)
|
||||
|
||||
func TestParseLinkLine(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
url string
|
||||
label string
|
||||
}{
|
||||
{
|
||||
input: "=> gemini.ctrl-c.club/~tjp/ home page\r\n",
|
||||
url: "gemini.ctrl-c.club/~tjp/",
|
||||
label: "home page",
|
||||
},
|
||||
{
|
||||
input: "=> gemi.dev/\n",
|
||||
url: "gemi.dev/",
|
||||
},
|
||||
{
|
||||
input: "=> /gemlog/foobar 2023-01-13 - Foo Bar\n",
|
||||
url: "/gemlog/foobar",
|
||||
label: "2023-01-13 - Foo Bar",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
line := gemtext.ParseLine([]byte(test.input))
|
||||
if line == nil {
|
||||
t.Fatal("ParseLine() returned nil line")
|
||||
}
|
||||
if string(line.Raw()) != string(test.input) {
|
||||
t.Error("Raw() does not match input")
|
||||
}
|
||||
|
||||
if line.Type() != gemtext.LineTypeLink {
|
||||
t.Errorf("expected LineTypeLink, got %d", line.Type())
|
||||
}
|
||||
link, ok := line.(gemtext.LinkLine)
|
||||
if !ok {
|
||||
t.Fatalf("expected a LinkLine, got %T", line)
|
||||
}
|
||||
|
||||
if string(link.URL) != test.url {
|
||||
t.Errorf("expected url %q, got %q", test.url, string(link.URL))
|
||||
}
|
||||
|
||||
if string(link.Label) != test.label {
|
||||
t.Errorf("expected label %q, got %q", test.label, string(link.Label))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePreformatToggleLine(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
altText string
|
||||
}{
|
||||
{
|
||||
input: "```\n",
|
||||
},
|
||||
{
|
||||
input: "```some alt-text\r\n",
|
||||
altText: "some alt-text",
|
||||
},
|
||||
{
|
||||
input: "``` leading space preserved\n",
|
||||
altText: " leading space preserved",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
line := gemtext.ParseLine([]byte(test.input))
|
||||
if line == nil {
|
||||
t.Fatal("ParseLine() returned nil line")
|
||||
}
|
||||
if string(line.Raw()) != string(test.input) {
|
||||
t.Error("Raw() does not match input")
|
||||
}
|
||||
|
||||
if line.Type() != gemtext.LineTypePreformatToggle {
|
||||
t.Errorf("expected LineTypePreformatToggle, got %d", line.Type())
|
||||
}
|
||||
toggle, ok := line.(gemtext.PreformatToggleLine)
|
||||
if !ok {
|
||||
t.Fatalf("expected a PreformatToggleLine, got %T", line)
|
||||
}
|
||||
|
||||
if string(toggle.AltText) != test.altText {
|
||||
t.Errorf("expected alt-text %q, got %q", test.altText, string(toggle.AltText))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHeadingLine(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
lineType gemtext.LineType
|
||||
body string
|
||||
}{
|
||||
{
|
||||
input: "# this is an H1\n",
|
||||
lineType: gemtext.LineTypeHeading1,
|
||||
body: "this is an H1",
|
||||
},
|
||||
{
|
||||
input: "## extra leading spaces\r\n",
|
||||
lineType: gemtext.LineTypeHeading2,
|
||||
body: "extra leading spaces",
|
||||
},
|
||||
{
|
||||
input: "##no leading space\n",
|
||||
lineType: gemtext.LineTypeHeading2,
|
||||
body: "no leading space",
|
||||
},
|
||||
{
|
||||
input: "#### there is no h4\n",
|
||||
lineType: gemtext.LineTypeHeading3,
|
||||
body: "# there is no h4",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
line := gemtext.ParseLine([]byte(test.input))
|
||||
if line == nil {
|
||||
t.Fatal("ParseLine() returned nil")
|
||||
}
|
||||
|
||||
if line.Type() != test.lineType {
|
||||
t.Errorf("expected line type %d, got %d", test.lineType, line.Type())
|
||||
}
|
||||
if string(line.Raw()) != test.input {
|
||||
t.Error("line.Raw() does not match input")
|
||||
}
|
||||
|
||||
hdg, ok := line.(gemtext.HeadingLine)
|
||||
if !ok {
|
||||
t.Fatalf("expected HeadingLine, got a %T", line)
|
||||
}
|
||||
|
||||
if string(hdg.Body) != test.body {
|
||||
t.Errorf("expected body %q, got %q", test.body, string(hdg.Body))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseListItemLine(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
body string
|
||||
}{
|
||||
{
|
||||
input: "* this is a list item\r\n",
|
||||
body: "this is a list item",
|
||||
},
|
||||
{
|
||||
input: "* more leading spaces\n",
|
||||
body: " more leading spaces",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
line := gemtext.ParseLine([]byte(test.input))
|
||||
if line == nil {
|
||||
t.Fatal("ParseLine() returned nil")
|
||||
}
|
||||
|
||||
if line.Type() != gemtext.LineTypeListItem {
|
||||
t.Errorf("expected LineTypeListItem, got %d", line.Type())
|
||||
}
|
||||
if string(line.Raw()) != test.input {
|
||||
t.Error("line.Raw() does not match input")
|
||||
}
|
||||
|
||||
li, ok := line.(gemtext.ListItemLine)
|
||||
if !ok {
|
||||
t.Fatalf("expected ListItemLine, got a %T", line)
|
||||
}
|
||||
|
||||
if string(li.Body) != test.body {
|
||||
t.Errorf("expected body %q, got %q", test.body, string(li.Body))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseQuoteLine(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
body string
|
||||
}{
|
||||
{
|
||||
input: ">a quote line\r\n",
|
||||
body: "a quote line",
|
||||
},
|
||||
{
|
||||
input: "> with a leading space\n",
|
||||
body: " with a leading space",
|
||||
},
|
||||
{
|
||||
input: "> more leading spaces\n",
|
||||
body: " more leading spaces",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
line := gemtext.ParseLine([]byte(test.input))
|
||||
if line == nil {
|
||||
t.Fatal("ParseLine() returned nil")
|
||||
}
|
||||
|
||||
if line.Type() != gemtext.LineTypeQuote {
|
||||
t.Errorf("expected LineTypeQuote, got %d", line.Type())
|
||||
}
|
||||
if string(line.Raw()) != test.input {
|
||||
t.Error("line.Raw() does not match input")
|
||||
}
|
||||
|
||||
qu, ok := line.(gemtext.QuoteLine)
|
||||
if !ok {
|
||||
t.Fatalf("expected QuoteLine , got a %T", line)
|
||||
}
|
||||
|
||||
if string(qu.Body) != test.body {
|
||||
t.Errorf("expected body %q, got %q", test.body, string(qu.Body))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTextLine(t *testing.T) {
|
||||
tests := []string {
|
||||
"\n",
|
||||
"simple text line\r\n",
|
||||
" * an invalid list item\n",
|
||||
"*another invalid list item\r\n",
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test, func(t *testing.T) {
|
||||
line := gemtext.ParseLine([]byte(test))
|
||||
if line == nil {
|
||||
t.Fatal("ParseLine() returned nil")
|
||||
}
|
||||
|
||||
if line.Type() != gemtext.LineTypeText {
|
||||
t.Errorf("expected LineTypeText, got %d", line.Type())
|
||||
}
|
||||
if string(line.Raw()) != test {
|
||||
t.Error("line.Raw() does not match input")
|
||||
}
|
||||
|
||||
_, ok := line.(gemtext.TextLine)
|
||||
if !ok {
|
||||
t.Fatalf("expected TextLine , got a %T", line)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
package gemtext
|
||||
|
||||
// LineType represents the different types of lines in a gemtext document.
|
||||
type LineType int
|
||||
|
||||
const (
|
||||
// LineTypeText is the default case when nothing else matches.
|
||||
//
|
||||
// It indicates that the line object is a TextLine.
|
||||
LineTypeText LineType = iota + 1
|
||||
|
||||
// LineTypeLink is a link line.
|
||||
//
|
||||
// =>[<ws>]<url>[<ws><label>][\r]\n
|
||||
//
|
||||
// The line is a LinkLine.
|
||||
LineTypeLink
|
||||
|
||||
// LineTypePreformatToggle switches the document between pre-formatted text or not.
|
||||
//
|
||||
// ```[<alt-text>][\r]\n
|
||||
//
|
||||
// The line object is a PreformatToggleLine.
|
||||
LineTypePreformatToggle
|
||||
|
||||
// LineTypePreformattedText is any line between two PreformatToggles.
|
||||
//
|
||||
// The line is a PreformattedTextLine.
|
||||
LineTypePreformattedText
|
||||
|
||||
// LineTypeHeading1 is a top-level heading.
|
||||
//
|
||||
// #[<ws>]body[\r]\n
|
||||
//
|
||||
// The line is a HeadingLine.
|
||||
LineTypeHeading1
|
||||
|
||||
// LineTypeHeading2 is a second-level heading.
|
||||
//
|
||||
// ##[<ws>]body[\r]\n
|
||||
//
|
||||
// The line is a HeadingLine.
|
||||
LineTypeHeading2
|
||||
|
||||
// LineTypeHeading3 is a third-level heading.
|
||||
//
|
||||
// ###[<ws>]<body>[\r]\n
|
||||
//
|
||||
// The line is a HeadingLine.
|
||||
LineTypeHeading3
|
||||
|
||||
// LineTypeListItem is an unordered list item.
|
||||
//
|
||||
// * <body>[\r]\n
|
||||
//
|
||||
// The line object is a ListItemLine.
|
||||
LineTypeListItem
|
||||
|
||||
// LineTypeQuote is a quote line.
|
||||
//
|
||||
// ><body>[\r]\n
|
||||
//
|
||||
// The line object is a QuoteLine.
|
||||
LineTypeQuote
|
||||
)
|
||||
|
||||
// Line is the interface implemented by all specific line types.
|
||||
//
|
||||
// Many of those concrete implementation types have additional useful fields,
|
||||
// so it can be a good idea to cast these to their concrete types based on the
|
||||
// return value of the Type() method.
|
||||
type Line interface {
|
||||
// Type returns the specific type of the gemtext line.
|
||||
Type() LineType
|
||||
|
||||
// Raw reproduces the original bytes from the source reader.
|
||||
Raw() []byte
|
||||
}
|
||||
|
||||
// Document is the list of lines that make up a full text/gemini resource.
|
||||
type Document []Line
|
||||
|
||||
// TextLine is a line of LineTypeText.
|
||||
type TextLine struct {
|
||||
raw []byte
|
||||
}
|
||||
|
||||
func (tl TextLine) Type() LineType { return LineTypeText }
|
||||
func (tl TextLine) Raw() []byte { return tl.raw }
|
||||
|
||||
// LinkLine is a line of LineTypeLink.
|
||||
type LinkLine struct {
|
||||
raw []byte
|
||||
|
||||
// URL is the original bytes of the url portion of the line.
|
||||
//
|
||||
// It is not guaranteed to be a valid URL.
|
||||
URL []byte
|
||||
|
||||
// Label is the label portion of the line.
|
||||
//
|
||||
// If there was no label it will always be nil, never []byte{}.
|
||||
Label []byte
|
||||
}
|
||||
|
||||
func (ll LinkLine) Type() LineType { return LineTypeLink }
|
||||
func (ll LinkLine) Raw() []byte { return ll.raw }
|
||||
|
||||
// PreformatToggleLine is a preformatted text toggle line.
|
||||
type PreformatToggleLine struct {
|
||||
raw []byte
|
||||
|
||||
// AltText contains the alt-text portion of the line.
|
||||
//
|
||||
// It will either have len() > 0 or be nil.
|
||||
//
|
||||
// If the line was parsed as part of a full document by Parse(),
|
||||
// and this is a *closing* toggle, any alt-text present will be
|
||||
// stripped and this will be nil. If the line was parsed by
|
||||
// ParseLine() no such correction is performed.
|
||||
AltText []byte
|
||||
}
|
||||
|
||||
func (tl PreformatToggleLine) Type() LineType { return LineTypePreformatToggle }
|
||||
func (tl PreformatToggleLine) Raw() []byte { return tl.raw }
|
||||
func (tl *PreformatToggleLine) clearAlt() { tl.AltText = nil }
|
||||
|
||||
// PreformattedTextLine represents a line between two toggles.
|
||||
//
|
||||
// It is never returned by ParseLine but can be part of a
|
||||
// document parsed by Parse().
|
||||
type PreformattedTextLine struct {
|
||||
raw []byte
|
||||
}
|
||||
|
||||
func (tl PreformattedTextLine) Type() LineType { return LineTypePreformattedText }
|
||||
func (tl PreformattedTextLine) Raw() []byte { return tl.raw }
|
||||
|
||||
// HeadingLine is a line of LineTypeHeading[1,2,3].
|
||||
type HeadingLine struct {
|
||||
raw []byte
|
||||
lineType LineType
|
||||
|
||||
// Body is the portion of the line with the header text.
|
||||
Body []byte
|
||||
}
|
||||
|
||||
func (hl HeadingLine) Type() LineType { return hl.lineType }
|
||||
func (hl HeadingLine) Raw() []byte { return hl.raw }
|
||||
|
||||
// ListItemLine is a line of LineTypeListItem.
|
||||
type ListItemLine struct {
|
||||
raw []byte
|
||||
|
||||
// Body is the text of the list item.
|
||||
Body []byte
|
||||
}
|
||||
|
||||
func (li ListItemLine) Type() LineType { return LineTypeListItem }
|
||||
func (li ListItemLine) Raw() []byte { return li.raw }
|
||||
|
||||
// QuoteLine is a line of LineTypeQuote.
|
||||
type QuoteLine struct {
|
||||
raw []byte
|
||||
|
||||
// Body is the text of the quote.
|
||||
Body []byte
|
||||
}
|
||||
|
||||
func (ql QuoteLine) Type() LineType { return LineTypeQuote }
|
||||
func (ql QuoteLine) Raw() []byte { return ql.raw }
|
Reference in New Issue