forked from sloum/bombadillo
139 lines
2.9 KiB
Go
139 lines
2.9 KiB
Go
package gemini
|
|
|
|
import (
|
|
"fmt"
|
|
"html"
|
|
"strings"
|
|
)
|
|
|
|
type tag struct {
|
|
name string
|
|
attributes map[string]string
|
|
text string
|
|
}
|
|
|
|
var fields []string
|
|
var out strings.Builder
|
|
var links []string = make([]string, 0, 10)
|
|
var inHead bool = false
|
|
|
|
func ParseHTML(body, currentURL string) (string, []string) {
|
|
out.Reset()
|
|
links = make([]string, 0, 10)
|
|
inHead = false
|
|
|
|
body = strings.Replace(body, "<", " <", -1)
|
|
body = strings.Replace(body, ">", "> ", -1)
|
|
body = html.UnescapeString(body)
|
|
fields = strings.Fields(body)
|
|
|
|
var text string
|
|
for i := 0; i < len(fields); i++ {
|
|
if strings.HasPrefix(fields[i], "</") {
|
|
text = parseClose(i)
|
|
} else if fields[i][0] == '<' {
|
|
var newIndex int
|
|
text, newIndex = parseTag(i, currentURL)
|
|
i = newIndex
|
|
} else {
|
|
text = fields[i] + " "
|
|
}
|
|
if text != "" {
|
|
out.WriteString(text)
|
|
}
|
|
text = ""
|
|
}
|
|
|
|
return out.String(), links
|
|
}
|
|
|
|
func skipToClose(tag string, i int) int {
|
|
for ;i < len(fields) && fields[i] != tag; i++ {
|
|
continue
|
|
}
|
|
return i
|
|
}
|
|
|
|
func parseTag(i int, currentURL string) (string, int) {
|
|
out := ""
|
|
hitClose := false
|
|
tag := strings.Replace(fields[i], "<", "", 1)
|
|
if len(tag) < 1 {
|
|
i++
|
|
tag = fields[i]
|
|
}
|
|
if strings.HasSuffix(tag, ">") && len(tag) >= 2 {
|
|
tag = tag[:len(tag)-1]
|
|
hitClose = true
|
|
}
|
|
tag = strings.ToLower(tag)
|
|
MainSwitch:
|
|
switch tag {
|
|
case "head":
|
|
i = skipToClose("</head>", i)
|
|
case "script":
|
|
i = skipToClose("</script>", i)
|
|
case "h1", "h2", "h3":
|
|
out = "\n\033[1m"
|
|
case "li", "dt":
|
|
out = "* "
|
|
case "hr":
|
|
out = "\n\n____________________________\n\n"
|
|
case "blockquote":
|
|
out = "\n> "
|
|
case "p", "header", "nav", "footer", "aside", "div", "main", "article", "details", "summary", "ul", "ol", "dl", "h4", "h5", "h6":
|
|
out = "\n"
|
|
case "a", "img":
|
|
target := "href="
|
|
if tag == "img" {
|
|
target = "src="
|
|
}
|
|
for ;i < len(fields) && !strings.HasPrefix(fields[i], target) ; i++ {
|
|
if strings.HasSuffix(fields[i], ">") {
|
|
hitClose = true
|
|
break MainSwitch
|
|
}
|
|
}
|
|
fields[i] = strings.Replace(fields[i], target, "", 1)
|
|
fields[i] = strings.Replace(fields[i], "\"", "", -1)
|
|
fields[i] = strings.Replace(fields[i], "'", "", -1)
|
|
if strings.HasSuffix(fields[i], ">") {
|
|
fields[i] = fields[i][:len(fields[i])-1]
|
|
hitClose = true
|
|
}
|
|
link := fields[i]
|
|
if strings.Index(link, "://") < 0 {
|
|
link, _ = HandleRelativeUrl(link, currentURL)
|
|
}
|
|
|
|
links = append(links, link)
|
|
if tag == "img" {
|
|
out = fmt.Sprintf("\n[%d]IMG\n", len(links))
|
|
} else {
|
|
out = fmt.Sprintf("[%d]", len(links))
|
|
}
|
|
}
|
|
if !hitClose {
|
|
for ;!strings.HasSuffix(fields[i], ">"); i++ {
|
|
continue
|
|
}
|
|
}
|
|
|
|
return out, i
|
|
}
|
|
|
|
|
|
func parseClose(i int) string {
|
|
tag := strings.Trim(fields[i], " \n\r\t<>/")
|
|
switch tag {
|
|
case "p", "header", "nav", "footer", "aside", "div", "main", "article", "details", "summary", "ul", "ol", "dl", "li", "dd", "blockquote", "h4", "h5", "h6":
|
|
return "\n"
|
|
case "dt":
|
|
return " "
|
|
case "h1", "h2", "h3":
|
|
return "\033[0m\n"
|
|
default:
|
|
return ""
|
|
}
|
|
}
|