89 lines
2.2 KiB
Go
89 lines
2.2 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/csv"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const pgCsvURL string = "https://www.gutenberg.org/cache/epub/feeds/pg_catalog.csv"
|
|
const pgRoot string = "https://www.gutenberg.org"
|
|
const pgDownload string = "https://www.gutenberg.org/cache/epub/%s/pg%s.%s"
|
|
const pgReplacement string = `$2 $1`
|
|
|
|
var pgAuthorRE = regexp.MustCompile(`(?m)^([^,]+),\s+([^,]+).*$`)
|
|
var pgFormats []string = []string{"epub", "mobi", "txt", "html"}
|
|
|
|
|
|
func retrieveCSV() ([][]string, error) {
|
|
fmt.Println(" Gathering book cache")
|
|
body, err := getUrl(pgCsvURL)
|
|
if err {
|
|
return [][]string{}, fmt.Errorf("Error retrieving book cache from Project Gutenberg")
|
|
}
|
|
r := csv.NewReader(bytes.NewReader(body))
|
|
r.Read()
|
|
return r.ReadAll()
|
|
}
|
|
|
|
func projectGutenberg() {
|
|
fmt.Println("\033[1mStandard Ebooks\033[0m")
|
|
t := time.Now().Format("2006/01/02 15:04PM")
|
|
bookList, err := retrieveCSV()
|
|
if err != nil {
|
|
fmt.Fprintln(os.Stderr, err.Error())
|
|
return
|
|
}
|
|
sd := sourceData{
|
|
"Project Gutenberg",
|
|
t,
|
|
pgRoot,
|
|
make([]doc, 0, len(bookList)),
|
|
}
|
|
for i, ln := range bookList {
|
|
fmt.Printf("Book %d\r", i)
|
|
if len(ln) < 7 || ln[4] != "en" || ln[1] != "Text" {
|
|
continue
|
|
}
|
|
var b doc
|
|
b.Files = make([]fileSource, len(pgFormats))
|
|
b.Title = ln[3]
|
|
b.Author = pgAuthorRE.ReplaceAllString(ln[5], pgReplacement)
|
|
b.Subjects = strings.Split(ln[6], ";")
|
|
b.Description = "Not provided by source"
|
|
for i := range b.Subjects {
|
|
b.Subjects[i] = strings.TrimSpace(b.Subjects[i])
|
|
}
|
|
b.LastUpdate = ln[2]
|
|
for i := range b.Files {
|
|
var fs fileSource
|
|
fs.Format = pgFormats[i]
|
|
fs.Url = fmt.Sprintf(pgDownload, ln[0], ln[0], pgFormats[i])
|
|
b.Files[i] = fs
|
|
}
|
|
b.License = "Public Domain (USA)"
|
|
sd.Documents = append(sd.Documents, b)
|
|
}
|
|
|
|
fmt.Println("")
|
|
fmt.Println(" Marshaling JSON")
|
|
b, err := json.MarshalIndent(sd, "", " ")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
fmt.Println(" Creating file")
|
|
f, err := os.Create("project-gutenberg.json")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
defer f.Close()
|
|
fmt.Println(" Writing file")
|
|
f.Write(b)
|
|
fmt.Println("Done.")
|
|
}
|