130 lines
3.0 KiB
Go
130 lines
3.0 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
const gg string = "https://www.globalgreyebooks.com/category/ebooks/all-ebooks-page-%d.html"
|
|
|
|
func buildGGBookList() []string {
|
|
fmt.Println(" Gathering Global Grey book list")
|
|
list := make([]string, 0, 50)
|
|
for i := 1;;i++{
|
|
fmt.Print(" Page ", i, "\r")
|
|
body, end := getUrl(fmt.Sprintf(gg, i))
|
|
if end {
|
|
break
|
|
}
|
|
d, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
d.Find("aside.aside-books > a:first-child").Each(func(i int, s *goquery.Selection) {
|
|
href, exists := s.Attr("href")
|
|
if exists {
|
|
list = append(list, href)
|
|
}
|
|
})
|
|
|
|
var moreExists = false
|
|
|
|
d.Find(".page-nav .pagination > a").Each(func(i int, s *goquery.Selection) {
|
|
if s.Text() == "Last" {
|
|
moreExists = true
|
|
}
|
|
})
|
|
if !moreExists {
|
|
break
|
|
}
|
|
}
|
|
fmt.Println("")
|
|
return list
|
|
}
|
|
|
|
func getGGBookDoc(u string) doc {
|
|
body, fail := getUrl(u)
|
|
if fail {
|
|
return doc{}
|
|
}
|
|
d, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
var out doc
|
|
out.Subjects = make([]string, 0, 1)
|
|
out.Files = make([]fileSource, 0, 3)
|
|
|
|
out.Title = strings.TrimSpace(d.Find("h1").First().Text())
|
|
out.Author = strings.TrimSpace(d.Find("h1 + h2").First().Text())
|
|
d.Find("section.related > a > button").Each(func(i int, s *goquery.Selection) {
|
|
out.Subjects = append(out.Subjects, strings.ToLower(strings.TrimSpace(s.Text())))
|
|
})
|
|
var desc strings.Builder
|
|
d.Find("section.description > p").Each(func(i int, s *goquery.Selection) {
|
|
desc.WriteString(strings.TrimSpace(s.Text()))
|
|
desc.WriteString("\\n\\n")
|
|
})
|
|
out.Description = strings.TrimSpace(desc.String())
|
|
out.License = "Public Domain"
|
|
d.Find("section.downloads strong > a").Each(func(i int, s *goquery.Selection) {
|
|
var fs fileSource
|
|
href, exists := s.Attr("href")
|
|
if !exists {
|
|
return
|
|
}
|
|
if strings.Contains(href, "donate") {
|
|
return
|
|
}
|
|
fs.Url = href
|
|
ext := path.Ext(href)
|
|
if len(ext) > 1 {
|
|
ext = ext[1:]
|
|
}
|
|
fs.Format = strings.ToLower(ext)
|
|
|
|
out.Files = append(out.Files, fs)
|
|
})
|
|
return out
|
|
}
|
|
|
|
func globalGrey() {
|
|
fmt.Println("\033[1mGlobal Grey\033[0m")
|
|
t := time.Now().Format("2006/01/02 15:04PM")
|
|
bookList := buildGGBookList()
|
|
sd := sourceData{
|
|
"Global Grey",
|
|
t,
|
|
"https://www.globalgreyebooks.com",
|
|
make([]doc, 0, len(bookList)),
|
|
}
|
|
fmt.Println(" Gathering book data")
|
|
for i := range bookList {
|
|
fmt.Print( " ",i+1, " / ", len(bookList), "\r")
|
|
sd.Documents = append(sd.Documents, getGGBookDoc(bookList[i]))
|
|
}
|
|
fmt.Println("")
|
|
fmt.Println(" Marshaling JSON")
|
|
b, err := json.MarshalIndent(sd, "", " ")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
fmt.Println(" Creating file")
|
|
f, err := os.Create("global-grey.json")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
defer f.Close()
|
|
fmt.Println(" Writing file")
|
|
f.Write(b)
|
|
fmt.Println("Done.")
|
|
}
|
|
|