From 3a62d54745e2cbfda6772390830042908d725c71 Mon Sep 17 00:00:00 2001
From: Jim McDonald <Jim@mcdee.net>
Date: Fri, 5 Apr 2019 18:11:04 +0100
Subject: [PATCH] hugolib: Consider summary in front matter for .Summary

Add the ability to have a `summary` page variable that overrides
the auto-generated summary.  Logic for obtaining summary becomes:

  * if summary divider is present in content, use the text above it
  * if summary variables is present in page metadata, use that
  * auto-generate summary from first _x_ words of the content

Fixes #5800
---
 helpers/content.go          | 21 +++++++++++++++
 helpers/content_test.go     | 22 ++++++++++++++++
 hugolib/page__meta.go       |  5 ++++
 hugolib/page__per_output.go | 10 ++++++-
 hugolib/page_test.go        | 52 +++++++++++++++++++++++++++++++++++++
 hugolib/rss_test.go         |  3 +++
 hugolib/site_test.go        |  1 +
 tpl/transform/transform.go  | 16 ++----------
 8 files changed, 115 insertions(+), 15 deletions(-)
diff --git a/helpers/content.go b/helpers/content.go
index be5090c2..3892647b 100644
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -42,6 +42,12 @@ import (
 // SummaryDivider denotes where content summarization should end. The default is "<!--more-->".
 var SummaryDivider = []byte("<!--more-->")
 
+var (
+	openingPTag        = []byte("<p>")
+	closingPTag        = []byte("</p>")
+	paragraphIndicator = []byte("<p")
+)
+
 // ContentSpec provides functionality to render markdown content.
 type ContentSpec struct {
 	BlackFriday                *BlackFriday
@@ -580,6 +586,21 @@ func (c *ContentSpec) TruncateWordsToWholeSentence(s string) (string, bool) {
 	return strings.TrimSpace(s[:endIndex]), endIndex < len(s)
 }
 
+// TrimShortHTML removes the <p>/</p> tags from HTML input in the situation
+// where said tags are the only <p> tags in the input and enclose the content
+// of the input (whitespace excluded).
+func (c *ContentSpec) TrimShortHTML(input []byte) []byte {
+	first := bytes.Index(input, paragraphIndicator)
+	last := bytes.LastIndex(input, paragraphIndicator)
+	if first == last {
+		input = bytes.TrimSpace(input)
+		input = bytes.TrimPrefix(input, openingPTag)
+		input = bytes.TrimSuffix(input, closingPTag)
+		input = bytes.TrimSpace(input)
+	}
+	return input
+}
+
 func isEndOfSentence(r rune) bool {
 	return r == '.' || r == '?' || r == '!' || r == '"' || r == '\n'
 }
diff --git a/helpers/content_test.go b/helpers/content_test.go
index 1dd4a2fb..709c8114 100644
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -29,6 +29,28 @@ import (
 
 const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"
 
+func TestTrimShortHTML(t *testing.T) {
+	tests := []struct {
+		input, output []byte
+	}{
+		{[]byte(""), []byte("")},
+		{[]byte("Plain text"), []byte("Plain text")},
+		{[]byte("  \t\n Whitespace text\n\n"), []byte("Whitespace text")},
+		{[]byte("<p>Simple paragraph</p>"), []byte("Simple paragraph")},
+		{[]byte("\n  \n \t  <p> \t Whitespace\nHTML  \n\t </p>\n\t"), []byte("Whitespace\nHTML")},
+		{[]byte("<p>Multiple</p><p>paragraphs</p>"), []byte("<p>Multiple</p><p>paragraphs</p>")},
+		{[]byte("<p>Nested<p>paragraphs</p></p>"), []byte("<p>Nested<p>paragraphs</p></p>")},
+	}
+
+	c := newTestContentSpec()
+	for i, test := range tests {
+		output := c.TrimShortHTML(test.input)
+		if bytes.Compare(test.output, output) != 0 {
+			t.Errorf("Test %d failed. Expected %q got %q", i, test.output, output)
+		}
+	}
+}
+
 func TestStripHTML(t *testing.T) {
 	type test struct {
 		input, expected string
diff --git a/hugolib/page__meta.go b/hugolib/page__meta.go
index 1e013db6..eefecbe4 100644
--- a/hugolib/page__meta.go
+++ b/hugolib/page__meta.go
@@ -64,6 +64,8 @@ type pageMeta struct {
 	title     string
 	linkTitle string
 
+	summary string
+
 	resourcePath string
 
 	weight int
@@ -361,6 +363,9 @@ func (pm *pageMeta) setMetadata(p *pageState, frontmatter map[string]interface{}
 		case "linktitle":
 			pm.linkTitle = cast.ToString(v)
 			pm.params[loki] = pm.linkTitle
+		case "summary":
+			pm.summary = cast.ToString(v)
+			pm.params[loki] = pm.summary
 		case "description":
 			pm.description = cast.ToString(v)
 			pm.params[loki] = pm.description
diff --git a/hugolib/page__per_output.go b/hugolib/page__per_output.go
index 05b35cc8..177e0420 100644
--- a/hugolib/page__per_output.go
+++ b/hugolib/page__per_output.go
@@ -128,6 +128,14 @@ func newPageContentOutput(p *pageState) func(f output.Format) (*pageContentOutpu
 							cp.summary = helpers.BytesToHTML(summary)
 						}
 					}
+				} else if cp.p.m.summary != "" {
+					html := cp.p.s.ContentSpec.RenderBytes(&helpers.RenderingContext{
+						Content: []byte(cp.p.m.summary), RenderTOC: false, PageFmt: cp.p.m.markup,
+						Cfg:        p.Language(),
+						DocumentID: p.File().UniqueID(), DocumentName: p.File().Path(),
+						Config: cp.p.getRenderingConfig()})
+					html = cp.p.s.ContentSpec.TrimShortHTML(html)
+					cp.summary = helpers.BytesToHTML(html)
 				}
 			}
 
@@ -271,7 +279,7 @@ func (p *pageContentOutput) WordCount() int {
 }
 
 func (p *pageContentOutput) setAutoSummary() error {
-	if p.p.source.hasSummaryDivider {
+	if p.p.source.hasSummaryDivider || p.p.m.summary != "" {
 		return nil
 	}
 
diff --git a/hugolib/page_test.go b/hugolib/page_test.go
index 6d9d337e..a3b86ef2 100644
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -45,6 +45,16 @@ const (
 
 	simplePageRFC3339Date = "---\ntitle: RFC3339 Date\ndate: \"2013-05-17T16:59:30Z\"\n---\nrfc3339 content"
 
+	simplePageWithoutSummaryDelimiter = `---
+title: SimpleWithoutSummaryDelimiter
+---
+[Lorem ipsum](https://lipsum.com/) dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+Additional text.
+
+Further text.
+`
+
 	simplePageWithSummaryDelimiter = `---
 title: Simple
 ---
@@ -52,6 +62,16 @@ Summary Next Line
 
 <!--more-->
 Some more text
+`
+
+	simplePageWithSummaryParameter = `---
+title: SimpleWithSummaryParameter
+summary: "Page with summary parameter and [a link](http://www.example.com/)"
+---
+
+Some text.
+
+Some more text.
 `
 
 	simplePageWithSummaryDelimiterAndMarkdownThatCrossesBorder = `---
@@ -519,6 +539,22 @@ func TestCreateNewPage(t *testing.T) {
 	testAllMarkdownEnginesForPages(t, assertFunc, settings, simplePage)
 }
 
+func TestPageSummary(t *testing.T) {
+	t.Parallel()
+	assertFunc := func(t *testing.T, ext string, pages page.Pages) {
+		p := pages[0]
+		checkPageTitle(t, p, "SimpleWithoutSummaryDelimiter")
+		// Source is not Asciidoctor- or RST-compatibile so don't test them
+		if ext != "ad" && ext != "rst" {
+			checkPageContent(t, p, normalizeExpected(ext, "<p><a href=\"https://lipsum.com/\">Lorem ipsum</a> dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>\n\n<p>Additional text.</p>\n\n<p>Further text.</p>\n"), ext)
+			checkPageSummary(t, p, normalizeExpected(ext, "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Additional text."), ext)
+		}
+		checkPageType(t, p, "page")
+	}
+
+	testAllMarkdownEnginesForPages(t, assertFunc, nil, simplePageWithoutSummaryDelimiter)
+}
+
 func TestPageWithDelimiter(t *testing.T) {
 	t.Parallel()
 	assertFunc := func(t *testing.T, ext string, pages page.Pages) {
@@ -532,6 +568,22 @@ func TestPageWithDelimiter(t *testing.T) {
 	testAllMarkdownEnginesForPages(t, assertFunc, nil, simplePageWithSummaryDelimiter)
 }
 
+func TestPageWithSummaryParameter(t *testing.T) {
+	t.Parallel()
+	assertFunc := func(t *testing.T, ext string, pages page.Pages) {
+		p := pages[0]
+		checkPageTitle(t, p, "SimpleWithSummaryParameter")
+		checkPageContent(t, p, normalizeExpected(ext, "<p>Some text.</p>\n\n<p>Some more text.</p>\n"), ext)
+		// Summary is not Asciidoctor- or RST-compatibile so don't test them
+		if ext != "ad" && ext != "rst" {
+			checkPageSummary(t, p, normalizeExpected(ext, "Page with summary parameter and <a href=\"http://www.example.com/\">a link</a>"), ext)
+		}
+		checkPageType(t, p, "page")
+	}
+
+	testAllMarkdownEnginesForPages(t, assertFunc, nil, simplePageWithSummaryParameter)
+}
+
 // Issue #3854
 // Also see https://github.com/gohugoio/hugo/issues/3977
 func TestPageWithDateFields(t *testing.T) {
diff --git a/hugolib/rss_test.go b/hugolib/rss_test.go
index 683a737c..38f0f1ef 100644
--- a/hugolib/rss_test.go
+++ b/hugolib/rss_test.go
@@ -55,6 +55,9 @@ func TestRSSOutput(t *testing.T) {
 	if c != rssLimit {
 		t.Errorf("incorrect RSS item count: expected %d, got %d", rssLimit, c)
 	}
+
+	// Encoded summary
+	th.assertFileContent(filepath.Join("public", rssURI), "<?xml", "description", "A &lt;em&gt;custom&lt;/em&gt; summary")
 }
 
 // Before Hugo 0.49 we set the pseudo page kind RSS on the page when output to RSS.
diff --git a/hugolib/site_test.go b/hugolib/site_test.go
index 21575072..5912abbc 100644
--- a/hugolib/site_test.go
+++ b/hugolib/site_test.go
@@ -586,6 +586,7 @@ date = "2012-01-01"
 publishdate = "2012-01-01"
 my_param = "baz"
 my_date = 2010-05-27T07:32:00Z
+summary = "A _custom_ summary"
 categories = [ "hugo" ]
 +++
 Front Matter with Ordered Pages 4. This is longer content`
diff --git a/tpl/transform/transform.go b/tpl/transform/transform.go
index 42e36eb0..2aa0c195 100644
--- a/tpl/transform/transform.go
+++ b/tpl/transform/transform.go
@@ -15,7 +15,6 @@
 package transform
 
 import (
-	"bytes"
 	"html"
 	"html/template"
 
@@ -91,12 +90,6 @@ func (ns *Namespace) HTMLUnescape(s interface{}) (string, error) {
 	return html.UnescapeString(ss), nil
 }
 
-var (
-	markdownTrimPrefix         = []byte("<p>")
-	markdownTrimSuffix         = []byte("</p>\n")
-	markdownParagraphIndicator = []byte("<p")
-)
-
 // Markdownify renders a given input from Markdown to HTML.
 func (ns *Namespace) Markdownify(s interface{}) (template.HTML, error) {
 	ss, err := cast.ToStringE(s)
@@ -114,14 +107,9 @@ func (ns *Namespace) Markdownify(s interface{}) (template.HTML, error) {
 	)
 
 	// Strip if this is a short inline type of text.
-	first := bytes.Index(m, markdownParagraphIndicator)
-	last := bytes.LastIndex(m, markdownParagraphIndicator)
-	if first == last {
-		m = bytes.TrimPrefix(m, markdownTrimPrefix)
-		m = bytes.TrimSuffix(m, markdownTrimSuffix)
-	}
+	m = ns.deps.ContentSpec.TrimShortHTML(m)
 
-	return template.HTML(m), nil
+	return helpers.BytesToHTML(m), nil
 }
 
 // Plainify returns a copy of s with all HTML tags removed.