WordCount Summary support UTF-8 string

This commit is contained in:
coderzh 2015-09-03 18:22:20 +08:00 committed by Bjørn Erik Pedersen
parent c7521b3d67
commit 0e1fd78fb2
3 changed files with 63 additions and 14 deletions

View File

@ -19,6 +19,7 @@ package helpers
import (
"bytes"
"unicode/utf8"
"html/template"
"os/exec"
@ -386,21 +387,57 @@ func TruncateWords(s string, max int) string {
// and returns entire sentences from content, delimited by the int
// and whether it's truncated or not.
func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
if max >= len(words) {
return strings.Join(words, " "), false
}
for counter, word := range words[max:] {
if strings.HasSuffix(word, ".") ||
strings.HasSuffix(word, "?") ||
strings.HasSuffix(word, ".\"") ||
strings.HasSuffix(word, "!") {
upper := max + counter + 1
return strings.Join(words[:upper], " "), (upper < len(words))
count := 0
index, word := 0, ""
truncated := false
for index, word = range words {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
count++;
} else {
if count + runeCount <= max {
count += runeCount
} else {
offset := 0
for count < max {
_, width := utf8.DecodeRuneInString(word[offset:])
offset += width
count++
}
words[index] = word[:offset]
truncated = true
}
}
if count >= max {
if index < len(words) - 1 {
truncated = true
}
break
}
}
return strings.Join(words[:max], " "), true
index += 1
if index < len(words) {
for counter, word := range words[index:] {
if len(word) != utf8.RuneCountInString(word) {
break
}
if strings.HasSuffix(word, ".") ||
strings.HasSuffix(word, "?") ||
strings.HasSuffix(word, ".\"") ||
strings.HasSuffix(word, "!") {
upper := index + counter + 1
return strings.Join(words[:upper], " "), (upper < len(words))
}
}
} else if index > len(words) {
return strings.Join(words, " "), truncated
}
return strings.Join(words[:index], " "), truncated
}
// GetAsciidocContent calls asciidoctor or asciidoc as an external helper

View File

@ -54,6 +54,8 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
{"a b c", "a b c", 12, false},
{"a b c", "a b c", 3, false},
{"a", "a", 1, false},
{"Hello 中国", "Hello 中", 2, true},
{"Hello 中国", "Hello 中国", 3, false},
{"This is a sentence.", "This is a sentence.", 5, false},
{"This is also a sentence!", "This is also a sentence!", 1, false},
{"To be. Or not to be. That's the question.", "To be.", 1, true},

View File

@ -31,6 +31,7 @@ import (
"strings"
"sync"
"time"
"unicode/utf8"
"github.com/spf13/cast"
bp "github.com/spf13/hugo/bufferpool"
@ -362,7 +363,16 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
}
func (p *Page) analyzePage() {
p.WordCount = len(p.PlainWords())
p.WordCount = 0
for _, word := range p.PlainWords() {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
p.WordCount++
} else {
p.WordCount += runeCount
}
}
p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
p.ReadingTime = int((p.WordCount + 212) / 213)
}