Adding ability to read from io.Reader

This allows for testing without relying on the file system.  Parsing algorithm to not read the entire file into memory.
This commit is contained in:
Noah Campbell 2013-08-05 07:53:58 -07:00
parent 274d324c8b
commit 085ce15f7c
3 changed files with 280 additions and 96 deletions

View File

@ -14,12 +14,15 @@
package hugolib
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
"github.com/BurntSushi/toml"
"github.com/theplant/blackfriday"
"html/template"
"io"
"io/ioutil"
"launchpad.net/goyaml"
"os"
@ -28,6 +31,7 @@ import (
"sort"
"strings"
"time"
"unicode"
)
var _ = filepath.Base("")
@ -126,6 +130,22 @@ func (page *Page) Layout(l ...string) string {
return strings.ToLower(page.Type()) + "/" + layout + ".html"
}
func ReadFrom(buf io.Reader, name string) (page *Page, err error) {
if len(name) == 0 {
return nil, errors.New("Zero length page name")
}
p := initializePage(name)
if err = p.parse(buf); err != nil {
return
}
p.analyzePage()
return &p, nil
}
// TODO should return errors as well
// TODO new page should return just a page
// TODO initalize separately... load from reader (file, or []byte)
@ -133,7 +153,6 @@ func NewPage(filename string) *Page {
p := initializePage(filename)
if err := p.buildPageFromFile(); err != nil {
fmt.Println(err)
os.Exit(1)
}
p.analyzePage()
@ -146,49 +165,6 @@ func (p *Page) analyzePage() {
p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
}
// TODO //rewrite to use byte methods instead
func (page *Page) parseYamlMetaData(data []byte) ([]string, error) {
var err error
datum, lines := splitPageContent(data, "---", "---")
d, err := page.handleYamlMetaData([]byte(strings.Join(datum, "\n")))
if err != nil {
return lines, err
}
err = page.handleMetaData(d)
return lines, err
}
func (page *Page) parseTomlMetaData(data []byte) ([]string, error) {
var err error
datum, lines := splitPageContent(data, "+++", "+++")
d, err := page.handleTomlMetaData([]byte(strings.Join(datum, "\n")))
if err != nil {
return lines, err
}
err = page.handleMetaData(d)
return lines, err
}
func (page *Page) parseJsonMetaData(data []byte) ([]string, error) {
var err error
datum, lines := splitPageContent(data, "{", "}")
d, err := page.handleJsonMetaData([]byte(strings.Join(datum, "\n")))
if err != nil {
return lines, err
}
err = page.handleMetaData(d)
return lines, err
}
func splitPageContent(data []byte, start string, end string) ([]string, []string) {
lines := strings.Split(string(data), "\n")
datum := lines[0:]
@ -211,18 +187,6 @@ func splitPageContent(data []byte, start string, end string) ([]string, []string
break
}
}
} else { // Start token & end token are the same
for i, line := range lines {
if found == 1 && strings.HasPrefix(line, end) {
datum = lines[1:i]
lines = lines[i+1:]
break
}
if found == 0 && strings.HasPrefix(line, start) {
found = 1
}
}
}
return datum, lines
}
@ -272,7 +236,7 @@ func (page *Page) handleJsonMetaData(datum []byte) (interface{}, error) {
return f, nil
}
func (page *Page) handleMetaData(f interface{}) error {
func (page *Page) update(f interface{}) error {
m := f.(map[string]interface{})
for k, v := range m {
@ -304,7 +268,6 @@ func (page *Page) handleMetaData(f interface{}) error {
page.Status = interfaceToString(v)
default:
// If not one of the explicit values, store in Params
//fmt.Println(strings.ToLower(k))
switch vv := v.(type) {
case string: // handle string values
page.Params[strings.ToLower(k)] = vv
@ -340,25 +303,106 @@ func (page *Page) GetParam(key string) interface{} {
return nil
}
func (page *Page) Err(message string) {
fmt.Println(page.FileName + " : " + message)
// TODO return error on last line instead of nil
func (page *Page) parseFrontMatter(data *bufio.Reader) (err error) {
if err = checkEmpty(data); err != nil {
return err
}
var mark rune
if mark, err = chompWhitespace(data); err != nil {
return err
}
f := page.detectFrontMatter(mark)
if f == nil {
return errors.New("unable to match beginning front matter delimiter")
}
if found, err := beginFrontMatter(data, f); err != nil || !found {
return errors.New("unable to match beginning front matter delimiter")
}
var frontmatter = new(bytes.Buffer)
for {
line, _, err := data.ReadLine()
if err != nil {
if err == io.EOF {
return errors.New("unable to match ending front matter delimiter")
}
return err
}
if bytes.Equal(line, f.markend) {
break
}
frontmatter.Write(line)
frontmatter.Write([]byte{'\n'})
}
metadata, err := f.parse(frontmatter.Bytes())
if err != nil {
return err
}
if err = page.update(metadata); err != nil {
return err
}
return
}
// TODO return error on last line instead of nil
func (page *Page) parseFileHeading(data []byte) ([]string, error) {
if len(data) == 0 {
page.Err("Empty File, skipping")
} else {
switch data[0] {
case '{':
return page.parseJsonMetaData(data)
case '-':
return page.parseYamlMetaData(data)
case '+':
return page.parseTomlMetaData(data)
}
func checkEmpty(data *bufio.Reader) (err error) {
if _, _, err = data.ReadRune(); err != nil {
return errors.New("unable to locate front matter")
}
if err = data.UnreadRune(); err != nil {
return errors.New("unable to unread first charactor in page buffer.")
}
return
}
type frontmatterType struct {
markstart, markend []byte
parse func([]byte) (interface{}, error)
}
func (page *Page) detectFrontMatter(mark rune) (f *frontmatterType) {
switch mark {
case '-':
return &frontmatterType{[]byte{'-', '-', '-'}, []byte{'-', '-', '-'}, page.handleYamlMetaData}
case '+':
return &frontmatterType{[]byte{'+', '+', '+'}, []byte{'+', '+', '+'}, page.handleTomlMetaData}
case '{':
return &frontmatterType{[]byte{'{'}, []byte{'}'}, page.handleJsonMetaData}
default:
return nil
}
}
func beginFrontMatter(data *bufio.Reader, f *frontmatterType) (bool, error) {
peek := make([]byte, 3)
_, err := data.Read(peek)
if err != nil {
return false, err
}
return bytes.Equal(peek, f.markstart), nil
}
func chompWhitespace(data *bufio.Reader) (r rune, err error) {
for {
r, _, err = data.ReadRune()
if err != nil {
return
}
if unicode.IsSpace(r) {
continue
}
if err := data.UnreadRune(); err != nil {
return r, errors.New("unable to unread first charactor in front matter.")
}
return r, nil
}
return nil, nil
}
func (p *Page) Render(layout ...string) template.HTML {
@ -378,46 +422,50 @@ func (p *Page) ExecuteTemplate(layout string) *bytes.Buffer {
return buffer
}
func (page *Page) readFile() []byte {
var data, err = ioutil.ReadFile(page.FileName)
func (page *Page) readFile() (data []byte, err error) {
data, err = ioutil.ReadFile(page.FileName)
if err != nil {
PrintErr("Error Reading: " + page.FileName)
return nil
return nil, err
}
return data
return data, nil
}
func (page *Page) buildPageFromFile() error {
data := page.readFile()
f, err := os.Open(page.FileName)
if err != nil {
return err
}
return page.parse(bufio.NewReader(f))
}
content, err := page.parseFileHeading(data)
func (page *Page) parse(reader io.Reader) error {
data := bufio.NewReader(reader)
err := page.parseFrontMatter(data)
if err != nil {
return err
}
switch page.Markup {
case "md":
page.convertMarkdown(content)
page.convertMarkdown(data)
case "rst":
page.convertRestructuredText(content)
page.convertRestructuredText(data)
}
return nil
}
func (page *Page) convertMarkdown(lines []string) {
page.RawMarkdown = strings.Join(lines, "\n")
content := string(blackfriday.MarkdownCommon([]byte(page.RawMarkdown)))
func (page *Page) convertMarkdown(lines io.Reader) {
b := new(bytes.Buffer)
b.ReadFrom(lines)
content := string(blackfriday.MarkdownCommon(b.Bytes()))
page.Content = template.HTML(content)
page.Summary = template.HTML(TruncateWordsToWholeSentence(StripHTML(StripShortcodes(content)), summaryLength))
}
func (page *Page) convertRestructuredText(lines []string) {
page.RawMarkdown = strings.Join(lines, "\n")
func (page *Page) convertRestructuredText(lines io.Reader) {
cmd := exec.Command("rst2html.py")
cmd.Stdin = strings.NewReader(page.RawMarkdown)
cmd.Stdin = lines
var out bytes.Buffer
cmd.Stdout = &out
if err := cmd.Run(); err != nil {

124
hugolib/page_test.go Normal file
View File

@ -0,0 +1,124 @@
package hugolib
import (
"html/template"
"io"
"strings"
"testing"
)
var EMPTY_PAGE = ""
var SIMPLE_PAGE = `---
title: Simple
---
Simple Page
`
var INVALID_FRONT_MATTER_MISSING = `This is a test`
var INVALID_FRONT_MATTER_SHORT_DELIM = `
--
title: Short delim start
---
Short Delim
`
var INVALID_FRONT_MATTER_SHORT_DELIM_ENDING = `
---
title: Short delim ending
--
Short Delim
`
var INVALID_FRONT_MATTER_LEADING_WS = `
---
title: Leading WS
---
Leading
`
func checkError(t *testing.T, err error, expected string) {
if err == nil {
t.Fatalf("err is nil")
}
if err.Error() != expected {
t.Errorf("err.Error() returned: '%s'. Expected: '%s'", err.Error(), expected)
}
}
func TestDegenerateEmptyPageZeroLengthName(t *testing.T) {
_, err := ReadFrom(strings.NewReader(EMPTY_PAGE), "")
if err == nil {
t.Fatalf("A zero length page name must return an error")
}
checkError(t, err, "Zero length page name")
}
func TestDegenerateEmptyPage(t *testing.T) {
_, err := ReadFrom(strings.NewReader(EMPTY_PAGE), "test")
if err == nil {
t.Fatalf("Expected ReadFrom to return an error when an empty buffer is passed.")
}
checkError(t, err, "unable to locate front matter")
}
func checkPageTitle(t *testing.T, page *Page, title string) {
if page.Title != title {
t.Fatalf("Page title is: %s. Expected %s", page.Title, title)
}
}
func checkPageContent(t *testing.T, page *Page, content string) {
if page.Content != template.HTML(content) {
t.Fatalf("Page content is: %s. Expected %s", page.Content, content)
}
}
func checkPageType(t *testing.T, page *Page, pageType string) {
if page.Type() != pageType {
t.Fatalf("Page type is: %s. Expected: %s", page.Type(), pageType)
}
}
func checkPageLayout(t *testing.T, page *Page, layout string) {
if page.Layout() != layout {
t.Fatalf("Page layout is: %s. Expected: %s", page.Layout(), layout)
}
}
func TestCreateNewPage(t *testing.T) {
p, err := ReadFrom(strings.NewReader(SIMPLE_PAGE), "simple")
if err != nil {
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
}
checkPageTitle(t, p, "Simple")
checkPageContent(t, p, "<p>Simple Page</p>\n")
checkPageType(t, p, "page")
checkPageLayout(t, p, "page/single.html")
}
func TestDegenerateInvalidFrontMatterShortDelim(t *testing.T) {
var tests = []struct {
r io.Reader
err string
}{
{strings.NewReader(INVALID_FRONT_MATTER_SHORT_DELIM), "unable to match beginning front matter delimiter"},
{strings.NewReader(INVALID_FRONT_MATTER_SHORT_DELIM_ENDING), "unable to match ending front matter delimiter"},
{strings.NewReader(INVALID_FRONT_MATTER_MISSING), "unable to match beginning front matter delimiter"},
}
for _, test := range tests {
_, err := ReadFrom(test.r, "invalid/front/matter/short/delim")
checkError(t, err, test.err)
}
}
func TestDegenerateInvalidFrontMatterLeadingWhitespace(t *testing.T) {
_, err := ReadFrom(strings.NewReader(INVALID_FRONT_MATTER_LEADING_WS), "invalid/front/matter/leading/ws")
if err != nil {
t.Fatalf("Unable to parse front matter given leading whitespace: %s", err)
}
}

View File

@ -2,17 +2,27 @@ package hugolib
import (
"path/filepath"
"strings"
"testing"
)
var SIMPLE_PAGE_YAML = `---
contenttype: ""
---
Sample Text
`
func TestDegenerateMissingFolderInPageFilename(t *testing.T) {
p := NewPage(filepath.Join("foobar"))
p, err := ReadFrom(strings.NewReader(SIMPLE_PAGE_YAML), filepath.Join("foobar"))
if err != nil {
t.Fatalf("Error in ReadFrom")
}
if p.Section != "" {
t.Fatalf("No section should be set for a file path: foobar")
}
}
func TestCreateNewPage(t *testing.T) {
func TestNewPageWithFilePath(t *testing.T) {
toCheck := []map[string]string{
{"input": filepath.Join("sub", "foobar.html"), "expect": "sub"},
{"input": filepath.Join("content", "sub", "foobar.html"), "expect": "sub"},
@ -20,14 +30,16 @@ func TestCreateNewPage(t *testing.T) {
}
for _, el := range toCheck {
p := NewPage(el["input"])
p, err := ReadFrom(strings.NewReader(SIMPLE_PAGE_YAML), el["input"])
if err != nil {
t.Fatalf("Reading from SIMPLE_PAGE_YAML resulted in an error: %s", err)
}
if p.Section != el["expect"] {
t.Fatalf("Section not set to %s for page %s. Got: %s", el["expect"], el["input"], p.Section)
}
}
}
func TestSettingOutFileOnPageContainsCorrectSlashes(t *testing.T) {
s := NewSite(&Config{})
p := NewPage(filepath.Join("sub", "foobar"))