Compare commits
3 Commits
master
...
gemini-htm
Author | SHA1 | Date | |
---|---|---|---|
|
cb14d2ddfa | ||
|
9967d08e10 | ||
|
2d5a1949f9 |
@ -78,7 +78,7 @@ func (t *TofuDigest) Match(host, localCert string, cState *tls.ConnectionState)
|
||||
return fmt.Errorf("EXP")
|
||||
}
|
||||
|
||||
if err := cert.VerifyHostname(host); err != nil {
|
||||
if err := cert.VerifyHostname(host); err != nil && cert.Subject.CommonName != host {
|
||||
return fmt.Errorf("Certificate error: %s", err)
|
||||
}
|
||||
|
||||
@ -328,14 +328,18 @@ func Visit(host, port, resource string, td *TofuDigest) (Capsule, error) {
|
||||
}
|
||||
capsule.MimeMaj = minMajMime[0]
|
||||
capsule.MimeMin = minMajMime[1]
|
||||
|
||||
if len(resource) > 0 && resource[0] != '/' {
|
||||
resource = fmt.Sprintf("/%s", resource)
|
||||
} else if resource == "" {
|
||||
resource = "/"
|
||||
}
|
||||
currentUrl := fmt.Sprintf("gemini://%s:%s%s", host, port, resource)
|
||||
|
||||
if capsule.MimeMaj == "text" && capsule.MimeMin == "gemini" {
|
||||
if len(resource) > 0 && resource[0] != '/' {
|
||||
resource = fmt.Sprintf("/%s", resource)
|
||||
} else if resource == "" {
|
||||
resource = "/"
|
||||
}
|
||||
currentUrl := fmt.Sprintf("gemini://%s:%s%s", host, port, resource)
|
||||
capsule.Content, capsule.Links = parseGemini(body, currentUrl)
|
||||
} else if capsule.MimeMaj == "text" && capsule.MimeMin == "html" {
|
||||
capsule.Content, capsule.Links = ParseHTML(body, currentUrl)
|
||||
} else {
|
||||
capsule.Content = body
|
||||
}
|
||||
@ -408,6 +412,7 @@ func parseGemini(b, currentUrl string) (string, []string) {
|
||||
return strings.Join(splitContent[:outputIndex], "\n"), links
|
||||
}
|
||||
|
||||
|
||||
// handleRelativeUrl provides link completion
|
||||
func HandleRelativeUrl(relLink, current string) (string, error) {
|
||||
base, err := url.Parse(current)
|
||||
|
138
gemini/html_parser.go
Normal file
138
gemini/html_parser.go
Normal file
@ -0,0 +1,138 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type tag struct {
|
||||
name string
|
||||
attributes map[string]string
|
||||
text string
|
||||
}
|
||||
|
||||
var fields []string
|
||||
var out strings.Builder
|
||||
var links []string = make([]string, 0, 10)
|
||||
var inHead bool = false
|
||||
|
||||
func ParseHTML(body, currentURL string) (string, []string) {
|
||||
out.Reset()
|
||||
links = make([]string, 0, 10)
|
||||
inHead = false
|
||||
|
||||
body = strings.Replace(body, "<", " <", -1)
|
||||
body = strings.Replace(body, ">", "> ", -1)
|
||||
body = html.UnescapeString(body)
|
||||
fields = strings.Fields(body)
|
||||
|
||||
var text string
|
||||
for i := 0; i < len(fields); i++ {
|
||||
if strings.HasPrefix(fields[i], "</") {
|
||||
text = parseClose(i)
|
||||
} else if fields[i][0] == '<' {
|
||||
var newIndex int
|
||||
text, newIndex = parseTag(i, currentURL)
|
||||
i = newIndex
|
||||
} else {
|
||||
text = fields[i] + " "
|
||||
}
|
||||
if text != "" {
|
||||
out.WriteString(text)
|
||||
}
|
||||
text = ""
|
||||
}
|
||||
|
||||
return out.String(), links
|
||||
}
|
||||
|
||||
func skipToClose(tag string, i int) int {
|
||||
for ;i < len(fields) && fields[i] != tag; i++ {
|
||||
continue
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func parseTag(i int, currentURL string) (string, int) {
|
||||
out := ""
|
||||
hitClose := false
|
||||
tag := strings.Replace(fields[i], "<", "", 1)
|
||||
if len(tag) < 1 {
|
||||
i++
|
||||
tag = fields[i]
|
||||
}
|
||||
if strings.HasSuffix(tag, ">") && len(tag) >= 2 {
|
||||
tag = tag[:len(tag)-1]
|
||||
hitClose = true
|
||||
}
|
||||
tag = strings.ToLower(tag)
|
||||
MainSwitch:
|
||||
switch tag {
|
||||
case "head":
|
||||
i = skipToClose("</head>", i)
|
||||
case "script":
|
||||
i = skipToClose("</script>", i)
|
||||
case "h1", "h2", "h3":
|
||||
out = "\n\033[1m"
|
||||
case "li", "dt":
|
||||
out = "* "
|
||||
case "hr":
|
||||
out = "\n\n____________________________\n\n"
|
||||
case "blockquote":
|
||||
out = "\n> "
|
||||
case "p", "header", "nav", "footer", "aside", "div", "main", "article", "details", "summary", "ul", "ol", "dl", "h4", "h5", "h6":
|
||||
out = "\n"
|
||||
case "a", "img":
|
||||
target := "href="
|
||||
if tag == "img" {
|
||||
target = "src="
|
||||
}
|
||||
for ;i < len(fields) && !strings.HasPrefix(fields[i], target) ; i++ {
|
||||
if strings.HasSuffix(fields[i], ">") {
|
||||
hitClose = true
|
||||
break MainSwitch
|
||||
}
|
||||
}
|
||||
fields[i] = strings.Replace(fields[i], target, "", 1)
|
||||
fields[i] = strings.Replace(fields[i], "\"", "", -1)
|
||||
fields[i] = strings.Replace(fields[i], "'", "", -1)
|
||||
if strings.HasSuffix(fields[i], ">") {
|
||||
fields[i] = fields[i][:len(fields[i])-1]
|
||||
hitClose = true
|
||||
}
|
||||
link := fields[i]
|
||||
if strings.Index(link, "://") < 0 {
|
||||
link, _ = HandleRelativeUrl(link, currentURL)
|
||||
}
|
||||
|
||||
links = append(links, link)
|
||||
if tag == "img" {
|
||||
out = fmt.Sprintf("\n[%d]IMG\n", len(links))
|
||||
} else {
|
||||
out = fmt.Sprintf("[%d]", len(links))
|
||||
}
|
||||
}
|
||||
if !hitClose {
|
||||
for ;!strings.HasSuffix(fields[i], ">"); i++ {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return out, i
|
||||
}
|
||||
|
||||
|
||||
func parseClose(i int) string {
|
||||
tag := strings.Trim(fields[i], " \n\r\t<>/")
|
||||
switch tag {
|
||||
case "p", "header", "nav", "footer", "aside", "div", "main", "article", "details", "summary", "ul", "ol", "dl", "li", "dd", "blockquote", "h4", "h5", "h6":
|
||||
return "\n"
|
||||
case "dt":
|
||||
return " "
|
||||
case "h1", "h2", "h3":
|
||||
return "\033[0m\n"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user