From ffcb4aeb8e392a80da7cad0f1e03a4102efb24ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Mon, 9 Mar 2020 12:04:33 +0100 Subject: [PATCH] Fix handling of HTML files without front matter This means that any HTML file inside /content will be treated as a regular file. If you want it processes with shortcodes and a layout, add front matter. The defintion of an HTML file here is: * File with extension .htm or .html * With first non-whitespace character "<" that isn't a HTML comment. This is in line with the documentation. Fixes #7030 Fixes #7028 See #6789 --- deps/deps.go | 6 +- deps/deps_test.go | 8 +- hugofs/files/classifier.go | 78 ++++++++++- hugofs/files/classifier_test.go | 12 ++ hugofs/filter_fs.go | 2 +- hugolib/content_map_test.go | 4 +- hugolib/content_render_hooks_test.go | 2 +- hugolib/page.go | 14 -- hugolib/page__content.go | 3 +- hugolib/page__per_output.go | 148 +++++++++------------ hugolib/shortcode.go | 2 +- hugolib/site_test.go | 67 ---------- hugolib/template_test.go | 81 ----------- parser/pageparser/item.go | 1 - parser/pageparser/pagelexer_intro.go | 11 +- parser/pageparser/pageparser_intro_test.go | 5 +- tpl/template.go | 1 - tpl/tplimpl/template.go | 32 +---- 18 files changed, 168 insertions(+), 309 deletions(-) diff --git a/deps/deps.go b/deps/deps.go index 8f3d8163..49b056a7 100644 --- a/deps/deps.go +++ b/deps/deps.go @@ -5,7 +5,6 @@ import ( "time" "github.com/pkg/errors" - "go.uber.org/atomic" "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/common/loggers" @@ -377,11 +376,8 @@ type DepsCfg struct { // BuildFlags are flags that may be turned on during a build. type BuildFlags struct { - HasLateTemplate atomic.Bool } func NewBuildFlags() BuildFlags { - return BuildFlags{ - //HasLateTemplate: atomic.NewBool(false), - } + return BuildFlags{} } diff --git a/deps/deps_test.go b/deps/deps_test.go index e2dca0ec..a7450a41 100644 --- a/deps/deps_test.go +++ b/deps/deps_test.go @@ -15,14 +15,8 @@ package deps import ( "testing" - - qt "github.com/frankban/quicktest" ) func TestBuildFlags(t *testing.T) { - c := qt.New(t) - var bf BuildFlags - c.Assert(bf.HasLateTemplate.Load(), qt.Equals, false) - bf.HasLateTemplate.Store(true) - c.Assert(bf.HasLateTemplate.Load(), qt.Equals, true) + } diff --git a/hugofs/files/classifier.go b/hugofs/files/classifier.go index e8f8241b..5e26bbac 100644 --- a/hugofs/files/classifier.go +++ b/hugofs/files/classifier.go @@ -14,10 +14,16 @@ package files import ( + "bufio" + "fmt" + "io" "os" "path/filepath" "sort" "strings" + "unicode" + + "github.com/spf13/afero" ) var ( @@ -32,6 +38,11 @@ var ( "pandoc", "pdc"} contentFileExtensionsSet map[string]bool + + htmlFileExtensions = []string{ + "html", "htm"} + + htmlFileExtensionsSet map[string]bool ) func init() { @@ -39,12 +50,20 @@ func init() { for _, ext := range contentFileExtensions { contentFileExtensionsSet[ext] = true } + htmlFileExtensionsSet = make(map[string]bool) + for _, ext := range htmlFileExtensions { + htmlFileExtensionsSet[ext] = true + } } func IsContentFile(filename string) bool { return contentFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] } +func IsHTMLFile(filename string) bool { + return htmlFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] +} + func IsContentExt(ext string) bool { return contentFileExtensionsSet[ext] } @@ -62,10 +81,33 @@ func (c ContentClass) IsBundle() bool { return c == ContentClassLeaf || c == ContentClassBranch } -func ClassifyContentFile(filename string) ContentClass { +func ClassifyContentFile(filename string, open func() (afero.File, error)) ContentClass { if !IsContentFile(filename) { return ContentClassFile } + + if IsHTMLFile(filename) { + // We need to look inside the file. If the first non-whitespace + // character is a "<", then we treat it as a regular file. + // Eearlier we created pages for these files, but that had all sorts + // of troubles, and isn't what it says in the documentation. + // See https://github.com/gohugoio/hugo/issues/7030 + if open == nil { + panic(fmt.Sprintf("no file opener provided for %q", filename)) + } + + f, err := open() + if err != nil { + return ContentClassFile + } + ishtml := isHTMLContent(f) + f.Close() + if ishtml { + return ContentClassFile + } + + } + if strings.HasPrefix(filename, "_index.") { return ContentClassBranch } @@ -77,6 +119,40 @@ func ClassifyContentFile(filename string) ContentClass { return ContentClassContent } +var htmlComment = []rune{'<', '!', '-', '-'} + +func isHTMLContent(r io.Reader) bool { + br := bufio.NewReader(r) + i := 0 + for { + c, _, err := br.ReadRune() + if err != nil { + break + } + + if i > 0 { + if i >= len(htmlComment) { + return false + } + + if c != htmlComment[i] { + return true + } + + i++ + continue + } + + if !unicode.IsSpace(c) { + if i == 0 && c != '<' { + return false + } + i++ + } + } + return true +} + const ( ComponentFolderArchetypes = "archetypes" ComponentFolderStatic = "static" diff --git a/hugofs/files/classifier_test.go b/hugofs/files/classifier_test.go index af188f34..0cd7e417 100644 --- a/hugofs/files/classifier_test.go +++ b/hugofs/files/classifier_test.go @@ -15,6 +15,7 @@ package files import ( "path/filepath" + "strings" "testing" qt "github.com/frankban/quicktest" @@ -30,6 +31,17 @@ func TestIsContentFile(t *testing.T) { c.Assert(IsContentExt("json"), qt.Equals, false) } +func TestIsHTMLContent(t *testing.T) { + c := qt.New(t) + + c.Assert(isHTMLContent(strings.NewReader(" ")), qt.Equals, true) + c.Assert(isHTMLContent(strings.NewReader(" , # more TypeFrontMatterYAML TypeFrontMatterTOML diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go index 56dd4224..539e6cfa 100644 --- a/parser/pageparser/pagelexer_intro.go +++ b/parser/pageparser/pagelexer_intro.go @@ -42,21 +42,14 @@ LOOP: if r == '<' { l.backup() if l.hasPrefix(htmlCommentStart) { - // This may be commented out front mattter, which should + // This may be commented out front matter, which should // still be read. l.consumeToNextLine() l.isInHTMLComment = true l.emit(TypeIgnore) continue LOOP } else { - if l.pos > l.start { - l.emit(tText) - } - l.next() - // This is the start of a plain HTML document with no - // front matter. I still can contain shortcodes, so we - // have to keep looking. - l.emit(TypeHTMLStart) + return l.errorf("plain HTML documents not supported") } } break LOOP diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go index 0f20ae5a..e776cb3e 100644 --- a/parser/pageparser/pageparser_intro_test.go +++ b/parser/pageparser/pageparser_intro_test.go @@ -38,7 +38,6 @@ var ( tstFrontMatterJSON = nti(TypeFrontMatterJSON, tstJSON+"\r\n") tstSomeText = nti(tText, "\nSome text.\n") tstSummaryDivider = nti(TypeLeadSummaryDivider, "\n") - tstHtmlStart = nti(TypeHTMLStart, "<") tstNewline = nti(tText, "\n") tstORG = ` @@ -55,8 +54,8 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$") var frontMatterTests = []lexerTest{ {"empty", "", []Item{tstEOF}}, {"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}}, - {"HTML Document", ` `, []Item{nti(tText, " "), tstHtmlStart, nti(tText, "html> "), tstEOF}}, - {"HTML Document with shortcode", `{{< sc1 >}}`, []Item{tstHtmlStart, nti(tText, "html>"), tstLeftNoMD, tstSC1, tstRightNoMD, nti(tText, ""), tstEOF}}, + {"HTML Document", ` `, []Item{nti(tError, "plain HTML documents not supported")}}, + {"HTML Document with shortcode", `{{< sc1 >}}`, []Item{nti(tError, "plain HTML documents not supported")}}, {"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}}, {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}}, {"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}}, diff --git a/tpl/template.go b/tpl/template.go index b9b0749b..315004b6 100644 --- a/tpl/template.go +++ b/tpl/template.go @@ -29,7 +29,6 @@ type TemplateManager interface { TemplateHandler TemplateFuncGetter AddTemplate(name, tpl string) error - AddLateTemplate(name, tpl string) error MarkReady() error } diff --git a/tpl/tplimpl/template.go b/tpl/tplimpl/template.go index b6313e1e..c01b6213 100644 --- a/tpl/tplimpl/template.go +++ b/tpl/tplimpl/template.go @@ -251,21 +251,8 @@ func (t *templateExec) MarkReady() error { } }) - if err != nil { - return err - } + return err - if t.Deps.BuildFlags.HasLateTemplate.Load() { - // This costs memory, so try to avoid it if we don't have to. - // The late templates are used to handle HTML in files in /content - // without front matter. - t.readyLateInit.Do(func() { - t.late = t.main.Clone(true) - t.late.createPrototypes() - }) - } - - return nil } type templateHandler struct { @@ -273,10 +260,7 @@ type templateHandler struct { needsBaseof map[string]templateInfo baseof map[string]templateInfo - late *templateNamespace // Templates added after main has started executing. - - readyInit sync.Once - readyLateInit sync.Once + readyInit sync.Once // This is the filesystem to load the templates from. All the templates are // stored in the root of this filesystem. @@ -309,14 +293,6 @@ type templateHandler struct { templateInfo map[string]tpl.Info } -// AddLateTemplate is used to add a template after the -// regular templates have started its execution. -// These are currently "pure HTML content files". -func (t *templateHandler) AddLateTemplate(name, tpl string) error { - _, err := t.late.parse(t.newTemplateInfo(name, tpl)) - return err -} - // AddTemplate parses and adds a template to the collection. // Templates with name prefixed with "_text" will be handled as plain // text templates. @@ -334,10 +310,6 @@ func (t *templateHandler) Lookup(name string) (tpl.Template, bool) { return templ, true } - if t.late != nil { - return t.late.Lookup(name) - } - return nil, false }