shizaru/htmlinspect.go

55 lines
1.5 KiB
Go

package main
import (
"golang.org/x/net/html"
"net/url"
"strconv"
"strings"
)
func ValidateHtml(config Config, n *html.Node, depth int, imgcount *int) (bool, string) {
if depth > config.MaxHtmlDepth {
return false, "HTML document nests elements too deeply. " + strconv.Itoa(config.MaxHtmlDepth) + " levels is the limit."
}
if n.Type == html.ElementNode {
_, ok := config.BadTagsMap[n.Data]
if ok {
return false, "HTML document contains forbiden tag: " + n.Data
}
if n.Data == "img" {
*imgcount += 1
}
if *imgcount > config.MaxImages {
return false, "HTML document contains more than " + strconv.Itoa(config.MaxImages) + " images."
}
for _, a := range n.Attr {
_, ok := config.BadAttrsMap[a.Key]
if ok {
return false, "HTML document contains <" + n.Data + "> tag with forbiden attribute: " + a.Key
}
if a.Key == "src" && strings.Contains(a.Val, "://") {
return false, "HTML document includes <" + n.Data + "> tag with a remote src"
}
if n.Data == "a" && a.Key == "href" {
u, err := url.Parse(a.Val)
if err != nil {
continue
}
for _, bad_dom := range config.BadDomains {
if u.Host == bad_dom || strings.HasSuffix(u.Host, bad_dom) {
return false, "HTML document contains link to forbidden host: " + u.Host
}
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
err, msg := ValidateHtml(config, c, depth+1, imgcount)
if err == false {
return err, msg
}
}
return true, ""
}