55 lines
1.5 KiB
Go
55 lines
1.5 KiB
Go
package main
|
|
|
|
import (
|
|
"golang.org/x/net/html"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
func ValidateHtml(config Config, n *html.Node, depth int, imgcount *int) (bool, string) {
|
|
if depth > config.MaxHtmlDepth {
|
|
return false, "HTML document nests elements too deeply. " + strconv.Itoa(config.MaxHtmlDepth) + " levels is the limit."
|
|
}
|
|
if n.Type == html.ElementNode {
|
|
_, ok := config.BadTagsMap[n.Data]
|
|
if ok {
|
|
return false, "HTML document contains forbiden tag: " + n.Data
|
|
}
|
|
if n.Data == "img" {
|
|
*imgcount += 1
|
|
}
|
|
if *imgcount > config.MaxImages {
|
|
return false, "HTML document contains more than " + strconv.Itoa(config.MaxImages) + " images."
|
|
}
|
|
for _, a := range n.Attr {
|
|
_, ok := config.BadAttrsMap[a.Key]
|
|
if ok {
|
|
return false, "HTML document contains <" + n.Data + "> tag with forbiden attribute: " + a.Key
|
|
}
|
|
if a.Key == "src" && strings.Contains(a.Val, "://") {
|
|
return false, "HTML document includes <" + n.Data + "> tag with a remote src"
|
|
}
|
|
if n.Data == "a" && a.Key == "href" {
|
|
u, err := url.Parse(a.Val)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
for _, bad_dom := range config.BadDomains {
|
|
if u.Host == bad_dom || strings.HasSuffix(u.Host, bad_dom) {
|
|
return false, "HTML document contains link to forbidden host: " + u.Host
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
err, msg := ValidateHtml(config, c, depth+1, imgcount)
|
|
if err == false {
|
|
return err, msg
|
|
}
|
|
}
|
|
return true, ""
|
|
|
|
}
|