2019-10-23 05:02:32 +00:00
|
|
|
package http
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2019-10-26 03:06:13 +00:00
|
|
|
"io/ioutil"
|
|
|
|
"net/http"
|
2019-10-23 05:02:32 +00:00
|
|
|
"os/exec"
|
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
|
|
|
type page struct {
|
2019-10-23 05:13:08 +00:00
|
|
|
Content string
|
|
|
|
Links []string
|
2019-10-23 05:02:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func Visit(url string, width int) (page, error) {
|
|
|
|
if width > 80 {
|
|
|
|
width = 80
|
|
|
|
}
|
|
|
|
w := fmt.Sprintf("-width=%d", width)
|
|
|
|
c, err := exec.Command("lynx", "-dump", w, url).Output()
|
|
|
|
if err != nil {
|
|
|
|
return page{}, err
|
|
|
|
}
|
|
|
|
return parseLinks(string(c)), nil
|
|
|
|
}
|
|
|
|
|
2019-10-26 03:06:13 +00:00
|
|
|
// Returns false on err or non-text type
|
|
|
|
// Else returns true
|
|
|
|
func IsTextFile(url string) bool {
|
|
|
|
c, err := exec.Command("lynx", "-dump", "-head", url).Output()
|
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
content := string(c)
|
|
|
|
content = strings.ToLower(content)
|
|
|
|
headers := strings.Split(content, "\n")
|
|
|
|
for _, header := range headers {
|
|
|
|
if strings.Contains(header, "content-type:") && strings.Contains(header, "text") {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-10-23 05:02:32 +00:00
|
|
|
func parseLinks(c string) page {
|
|
|
|
var out page
|
|
|
|
contentUntil := strings.LastIndex(c, "References")
|
|
|
|
if contentUntil >= 1 {
|
|
|
|
out.Content = c[:contentUntil]
|
|
|
|
} else {
|
|
|
|
out.Content = c
|
|
|
|
out.Links = make([]string, 0)
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
links := c[contentUntil+11:]
|
|
|
|
links = strings.TrimSpace(links)
|
|
|
|
linkSlice := strings.Split(links, "\n")
|
|
|
|
out.Links = make([]string, 0, len(linkSlice))
|
|
|
|
for _, link := range linkSlice {
|
|
|
|
ls := strings.SplitN(link, ".", 2)
|
|
|
|
if len(ls) < 2 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
out.Links = append(out.Links, strings.TrimSpace(ls[1]))
|
|
|
|
}
|
|
|
|
return out
|
|
|
|
|
|
|
|
}
|
2019-10-26 03:06:13 +00:00
|
|
|
|
|
|
|
func Fetch(url string) ([]byte, error) {
|
|
|
|
resp, err := http.Get(url)
|
|
|
|
if err != nil {
|
|
|
|
return []byte{}, err
|
|
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
bodyBytes, err := ioutil.ReadAll(resp.Body)
|
|
|
|
if err != nil {
|
|
|
|
return []byte{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return bodyBytes, nil
|
|
|
|
}
|
|
|
|
|