Working gopher and http(s). Gemini to be added.
This commit is contained in:
commit
57c1ad018e
|
@ -0,0 +1 @@
|
|||
ncg
|
|
@ -0,0 +1,19 @@
|
|||
# ncg
|
||||
|
||||
Pass a url, get a text rendering back. Supports gemini, gopher, and gemini.
|
||||
|
||||
## Run
|
||||
|
||||
```
|
||||
ncg http://some-address.tld
|
||||
```
|
||||
|
||||
## Buid
|
||||
|
||||
```
|
||||
go build
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
FFSL
|
|
@ -0,0 +1,11 @@
|
|||
module git.rawtext.club/sloum/ncg
|
||||
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
github.com/olekukonko/tablewriter v0.0.5
|
||||
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf
|
||||
golang.org/x/net v0.11.0
|
||||
)
|
||||
|
||||
require github.com/mattn/go-runewidth v0.0.9 // indirect
|
|
@ -0,0 +1,8 @@
|
|||
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
|
||||
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
|
||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
|
||||
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
|
||||
golang.org/x/net v0.11.0 h1:Gi2tvZIJyBtO9SDr1q9h5hEQCp/4L2RQ+ar0qjx2oNU=
|
||||
golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ=
|
|
@ -0,0 +1,658 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/olekukonko/tablewriter"
|
||||
"github.com/ssor/bom"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
const (
|
||||
linkFormat string = `<a[^>]*href="([^"]*)"[^>]*>([^<]*)<\/a>`
|
||||
commentFormat string = `(?s)<!--.*-->`
|
||||
)
|
||||
|
||||
// Options provide toggles and overrides to control specific rendering behaviors.
|
||||
type Options struct {
|
||||
PrettyTables bool // Turns on pretty ASCII rendering for table elements.
|
||||
PrettyTablesOptions *PrettyTablesOptions // Configures pretty ASCII rendering for table elements.
|
||||
OmitLinks bool // Turns on omitting links
|
||||
TextOnly bool // Returns only plain text
|
||||
}
|
||||
|
||||
// PrettyTablesOptions overrides tablewriter behaviors
|
||||
type PrettyTablesOptions struct {
|
||||
AutoFormatHeader bool
|
||||
AutoWrapText bool
|
||||
ReflowDuringAutoWrap bool
|
||||
ColWidth int
|
||||
ColumnSeparator string
|
||||
RowSeparator string
|
||||
CenterSeparator string
|
||||
HeaderAlignment int
|
||||
FooterAlignment int
|
||||
Alignment int
|
||||
ColumnAlignment []int
|
||||
NewLine string
|
||||
HeaderLine bool
|
||||
RowLine bool
|
||||
AutoMergeCells bool
|
||||
Borders tablewriter.Border
|
||||
}
|
||||
|
||||
// NewPrettyTablesOptions creates PrettyTablesOptions with default settings
|
||||
func NewPrettyTablesOptions() *PrettyTablesOptions {
|
||||
return &PrettyTablesOptions{
|
||||
AutoFormatHeader: true,
|
||||
AutoWrapText: true,
|
||||
ReflowDuringAutoWrap: true,
|
||||
ColWidth: tablewriter.MAX_ROW_WIDTH,
|
||||
ColumnSeparator: tablewriter.COLUMN,
|
||||
RowSeparator: tablewriter.ROW,
|
||||
CenterSeparator: tablewriter.CENTER,
|
||||
HeaderAlignment: tablewriter.ALIGN_DEFAULT,
|
||||
FooterAlignment: tablewriter.ALIGN_DEFAULT,
|
||||
Alignment: tablewriter.ALIGN_DEFAULT,
|
||||
ColumnAlignment: []int{},
|
||||
NewLine: tablewriter.NEWLINE,
|
||||
HeaderLine: true,
|
||||
RowLine: false,
|
||||
AutoMergeCells: false,
|
||||
Borders: tablewriter.Border{Left: true, Right: true, Bottom: true, Top: true},
|
||||
}
|
||||
}
|
||||
|
||||
// FromHTMLNode renders text output from a pre-parsed HTML document.
|
||||
func FromHTMLNode(doc *html.Node, o ...Options) (string, error) {
|
||||
var options Options
|
||||
if len(o) > 0 {
|
||||
options = o[0]
|
||||
}
|
||||
|
||||
ctx := textifyTraverseContext{
|
||||
buf: bytes.Buffer{},
|
||||
options: options,
|
||||
}
|
||||
if err := ctx.traverse(doc); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// NOTE this was commended because it was interacting negatively
|
||||
// with pre blocks where a line might start with a space.
|
||||
//
|
||||
// text := strings.TrimSpace(newlineRe.ReplaceAllString(
|
||||
// strings.Replace(ctx.buf.String(), "\n ", "\n", -1), "\n\n"),
|
||||
// )
|
||||
text := ctx.buf.String()
|
||||
return text, nil
|
||||
}
|
||||
|
||||
// FromReader renders text output after parsing HTML for the specified
|
||||
// io.Reader.
|
||||
func FromReader(reader io.Reader, options ...Options) (string, error) {
|
||||
newReader, err := bom.NewReaderWithoutBom(reader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
doc, err := html.Parse(newReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return FromHTMLNode(doc, options...)
|
||||
}
|
||||
|
||||
// FromString parses HTML from the input string, then renders the text form.
|
||||
func FromString(input string, options ...Options) (string, error) {
|
||||
bs := bom.CleanBom([]byte(input))
|
||||
text, err := FromReader(bytes.NewReader(bs), options...)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
|
||||
var (
|
||||
spacingRe = regexp.MustCompile(`[ \r\n\t]+`)
|
||||
newlineRe = regexp.MustCompile(`\n\n+`)
|
||||
)
|
||||
|
||||
// traverseTableCtx holds text-related context.
|
||||
type textifyTraverseContext struct {
|
||||
buf bytes.Buffer
|
||||
|
||||
prefix string
|
||||
tableCtx tableTraverseContext
|
||||
options Options
|
||||
endsWithSpace bool
|
||||
justClosedDiv bool
|
||||
blockquoteLevel int
|
||||
lineLength int
|
||||
isPre bool
|
||||
}
|
||||
|
||||
// tableTraverseContext holds table ASCII-form related context.
|
||||
type tableTraverseContext struct {
|
||||
header []string
|
||||
body [][]string
|
||||
footer []string
|
||||
tmpRow int
|
||||
isInFooter bool
|
||||
}
|
||||
|
||||
func (tableCtx *tableTraverseContext) init() {
|
||||
tableCtx.body = [][]string{}
|
||||
tableCtx.header = []string{}
|
||||
tableCtx.footer = []string{}
|
||||
tableCtx.isInFooter = false
|
||||
tableCtx.tmpRow = 0
|
||||
}
|
||||
|
||||
func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
|
||||
ctx.justClosedDiv = false
|
||||
|
||||
// TODO add support for atom.Img, it should be: [I am the alt text | http://path/to/img]
|
||||
switch node.DataAtom {
|
||||
case atom.Br:
|
||||
return ctx.emit("\n")
|
||||
|
||||
case atom.H1, atom.H2, atom.H3:
|
||||
subCtx := textifyTraverseContext{}
|
||||
if err := subCtx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
str := strings.TrimSpace(subCtx.buf.String())
|
||||
if ctx.options.TextOnly {
|
||||
return ctx.emit(str + ".\n\n")
|
||||
}
|
||||
dividerLen := 0
|
||||
for _, line := range strings.Split(str, "\n") {
|
||||
if lineLen := len([]rune(line)); lineLen-1 > dividerLen {
|
||||
dividerLen = lineLen
|
||||
}
|
||||
}
|
||||
var divider string
|
||||
if node.DataAtom == atom.H1 {
|
||||
divider = strings.Repeat("=", dividerLen)
|
||||
} else if node.DataAtom == atom.H2 {
|
||||
divider = strings.Repeat("*", dividerLen)
|
||||
} else if node.DataAtom == atom.H3 {
|
||||
divider = strings.Repeat("-", dividerLen)
|
||||
} else {
|
||||
divider = strings.Repeat(".", dividerLen)
|
||||
}
|
||||
|
||||
if node.DataAtom != atom.H1 && node.DataAtom != atom.H2 {
|
||||
return ctx.emit("\n\n" + str + "\n" + divider + "\n\n")
|
||||
}
|
||||
return ctx.emit("\n\n" + divider + "\n" + str + "\n" + divider + "\n\n")
|
||||
|
||||
case atom.Blockquote:
|
||||
ctx.blockquoteLevel++
|
||||
if !ctx.options.TextOnly {
|
||||
ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) + " "
|
||||
}
|
||||
if err := ctx.emit("\n"); err != nil {
|
||||
return err
|
||||
}
|
||||
if ctx.blockquoteLevel == 1 {
|
||||
if err := ctx.emit("\n"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
ctx.blockquoteLevel--
|
||||
if !ctx.options.TextOnly {
|
||||
ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel)
|
||||
}
|
||||
if ctx.blockquoteLevel > 0 {
|
||||
ctx.prefix += " "
|
||||
}
|
||||
return ctx.emit("\n\n")
|
||||
|
||||
case atom.Div:
|
||||
if ctx.lineLength > 0 {
|
||||
if err := ctx.emit("\n"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
var err error
|
||||
if !ctx.justClosedDiv {
|
||||
err = ctx.emit("\n")
|
||||
}
|
||||
ctx.justClosedDiv = true
|
||||
return err
|
||||
|
||||
case atom.Li:
|
||||
if !ctx.options.TextOnly {
|
||||
if err := ctx.emit("* "); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return ctx.emit("\n")
|
||||
|
||||
case atom.Dt:
|
||||
if !ctx.options.TextOnly {
|
||||
if err := ctx.emit("* "); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return ctx.emit("\n")
|
||||
|
||||
case atom.Dd:
|
||||
if !ctx.options.TextOnly {
|
||||
if err := ctx.emit("\t "); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return ctx.emit("\n")
|
||||
|
||||
case atom.B, atom.Strong:
|
||||
subCtx := textifyTraverseContext{}
|
||||
subCtx.endsWithSpace = true
|
||||
if err := subCtx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
str := subCtx.buf.String()
|
||||
if ctx.options.TextOnly {
|
||||
return ctx.emit(str + ".")
|
||||
}
|
||||
return ctx.emit("*" + str + "*")
|
||||
|
||||
case atom.A:
|
||||
linkText := ""
|
||||
// For simple link element content with single text node only, peek at the link text.
|
||||
if node.FirstChild != nil && node.FirstChild.NextSibling == nil && node.FirstChild.Type == html.TextNode {
|
||||
linkText = node.FirstChild.Data
|
||||
}
|
||||
|
||||
// If image is the only child, take its alt text as the link text.
|
||||
if img := node.FirstChild; img != nil && node.LastChild == img && img.DataAtom == atom.Img {
|
||||
if altText := getAttrVal(img, "alt"); altText != "" {
|
||||
if err := ctx.emit(altText); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
hrefLink := ""
|
||||
if attrVal := getAttrVal(node, "href"); attrVal != "" {
|
||||
attrVal = ctx.normalizeHrefLink(attrVal)
|
||||
// Don't print link href if it matches link element content or if the link is empty.
|
||||
if (attrVal != "" && linkText != attrVal) && !ctx.options.OmitLinks && !ctx.options.TextOnly {
|
||||
hrefLink = "( " + attrVal + " )"
|
||||
}
|
||||
}
|
||||
|
||||
return ctx.emit(hrefLink)
|
||||
|
||||
case atom.P, atom.Ul, atom.Dl:
|
||||
return ctx.paragraphHandler(node)
|
||||
|
||||
case atom.Table, atom.Tfoot, atom.Th, atom.Tr, atom.Td:
|
||||
if ctx.options.PrettyTables {
|
||||
return ctx.handleTableElement(node)
|
||||
} else if node.DataAtom == atom.Table {
|
||||
return ctx.paragraphHandler(node)
|
||||
}
|
||||
return ctx.traverseChildren(node)
|
||||
|
||||
case atom.Pre:
|
||||
ctx.isPre = true
|
||||
err := ctx.traverseChildren(node)
|
||||
ctx.isPre = false
|
||||
return err
|
||||
|
||||
case atom.Style, atom.Script, atom.Head:
|
||||
// Ignore the subtree.
|
||||
return nil
|
||||
|
||||
default:
|
||||
return ctx.traverseChildren(node)
|
||||
}
|
||||
}
|
||||
|
||||
// paragraphHandler renders node children surrounded by double newlines.
|
||||
func (ctx *textifyTraverseContext) paragraphHandler(node *html.Node) error {
|
||||
if err := ctx.emit("\n\n"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
return ctx.emit("\n\n")
|
||||
}
|
||||
|
||||
// handleTableElement is only to be invoked when options.PrettyTables is active.
|
||||
func (ctx *textifyTraverseContext) handleTableElement(node *html.Node) error {
|
||||
if !ctx.options.PrettyTables {
|
||||
panic("handleTableElement invoked when PrettyTables not active")
|
||||
}
|
||||
|
||||
switch node.DataAtom {
|
||||
case atom.Table:
|
||||
if err := ctx.emit("\n\n"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Re-intialize all table context.
|
||||
ctx.tableCtx.init()
|
||||
|
||||
// Browse children, enriching context with table data.
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
table := tablewriter.NewWriter(buf)
|
||||
if ctx.options.PrettyTablesOptions != nil {
|
||||
options := ctx.options.PrettyTablesOptions
|
||||
table.SetAutoFormatHeaders(options.AutoFormatHeader)
|
||||
table.SetAutoWrapText(options.AutoWrapText)
|
||||
table.SetReflowDuringAutoWrap(options.ReflowDuringAutoWrap)
|
||||
table.SetColWidth(options.ColWidth)
|
||||
table.SetColumnSeparator(options.ColumnSeparator)
|
||||
table.SetRowSeparator(options.RowSeparator)
|
||||
table.SetCenterSeparator(options.CenterSeparator)
|
||||
table.SetHeaderAlignment(options.HeaderAlignment)
|
||||
table.SetFooterAlignment(options.FooterAlignment)
|
||||
table.SetAlignment(options.Alignment)
|
||||
table.SetColumnAlignment(options.ColumnAlignment)
|
||||
table.SetNewLine(options.NewLine)
|
||||
table.SetHeaderLine(options.HeaderLine)
|
||||
table.SetRowLine(options.RowLine)
|
||||
table.SetAutoMergeCells(options.AutoMergeCells)
|
||||
table.SetBorders(options.Borders)
|
||||
}
|
||||
table.SetHeader(ctx.tableCtx.header)
|
||||
table.SetFooter(ctx.tableCtx.footer)
|
||||
table.AppendBulk(ctx.tableCtx.body)
|
||||
|
||||
// Render the table using ASCII.
|
||||
table.Render()
|
||||
if err := ctx.emit(buf.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return ctx.emit("\n\n")
|
||||
|
||||
case atom.Tfoot:
|
||||
ctx.tableCtx.isInFooter = true
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
ctx.tableCtx.isInFooter = false
|
||||
|
||||
case atom.Tr:
|
||||
ctx.tableCtx.body = append(ctx.tableCtx.body, []string{})
|
||||
if err := ctx.traverseChildren(node); err != nil {
|
||||
return err
|
||||
}
|
||||
ctx.tableCtx.tmpRow++
|
||||
|
||||
case atom.Th:
|
||||
res, err := ctx.renderEachChild(node)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx.tableCtx.header = append(ctx.tableCtx.header, res)
|
||||
|
||||
case atom.Td:
|
||||
res, err := ctx.renderEachChild(node)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if ctx.tableCtx.isInFooter {
|
||||
ctx.tableCtx.footer = append(ctx.tableCtx.footer, res)
|
||||
} else {
|
||||
ctx.tableCtx.body[ctx.tableCtx.tmpRow] = append(ctx.tableCtx.body[ctx.tableCtx.tmpRow], res)
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ctx *textifyTraverseContext) traverse(node *html.Node) error {
|
||||
switch node.Type {
|
||||
default:
|
||||
return ctx.traverseChildren(node)
|
||||
|
||||
case html.TextNode:
|
||||
var data string
|
||||
if ctx.isPre {
|
||||
data = node.Data
|
||||
// TODO something is happening that is weird here.
|
||||
// It looks good as data here, but something happens with spacing that
|
||||
// just isnt right... not sure what it is
|
||||
} else {
|
||||
data = strings.TrimSpace(spacingRe.ReplaceAllString(node.Data, " "))
|
||||
}
|
||||
return ctx.emit(data)
|
||||
|
||||
case html.ElementNode:
|
||||
return ctx.handleElement(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *textifyTraverseContext) traverseChildren(node *html.Node) error {
|
||||
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := ctx.traverse(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ctx *textifyTraverseContext) emit(data string) error {
|
||||
if data == "" {
|
||||
return nil
|
||||
}
|
||||
var (
|
||||
lines = ctx.breakLongLines(data)
|
||||
err error
|
||||
)
|
||||
for _, line := range lines {
|
||||
runes := []rune(line)
|
||||
startsWithSpace := unicode.IsSpace(runes[0])
|
||||
if !startsWithSpace && !ctx.endsWithSpace && !strings.HasPrefix(data, ".") {
|
||||
if err = ctx.buf.WriteByte(' '); err != nil {
|
||||
return err
|
||||
}
|
||||
ctx.lineLength++
|
||||
}
|
||||
ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1])
|
||||
for _, c := range line {
|
||||
if _, err = ctx.buf.WriteString(string(c)); err != nil {
|
||||
return err
|
||||
}
|
||||
ctx.lineLength++
|
||||
if c == '\n' {
|
||||
ctx.lineLength = 0
|
||||
if ctx.prefix != "" {
|
||||
if _, err = ctx.buf.WriteString(ctx.prefix); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const maxLineLen = 74
|
||||
|
||||
func (ctx *textifyTraverseContext) breakLongLines(data string) []string {
|
||||
// Only break lines when in blockquotes.
|
||||
if ctx.blockquoteLevel == 0 {
|
||||
return []string{data}
|
||||
}
|
||||
var (
|
||||
ret = []string{}
|
||||
runes = []rune(data)
|
||||
l = len(runes)
|
||||
existing = ctx.lineLength
|
||||
)
|
||||
if existing >= maxLineLen {
|
||||
ret = append(ret, "\n")
|
||||
existing = 0
|
||||
}
|
||||
for l+existing > maxLineLen {
|
||||
i := maxLineLen - existing
|
||||
for i >= 0 && !unicode.IsSpace(runes[i]) {
|
||||
i--
|
||||
}
|
||||
if i == -1 {
|
||||
// No spaces, so go the other way.
|
||||
i = maxLineLen - existing
|
||||
for i < l && !unicode.IsSpace(runes[i]) {
|
||||
i++
|
||||
}
|
||||
}
|
||||
ret = append(ret, string(runes[:i])+"\n")
|
||||
for i < l && unicode.IsSpace(runes[i]) {
|
||||
i++
|
||||
}
|
||||
runes = runes[i:]
|
||||
l = len(runes)
|
||||
existing = 0
|
||||
}
|
||||
if len(runes) > 0 {
|
||||
ret = append(ret, string(runes))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (ctx *textifyTraverseContext) normalizeHrefLink(link string) string {
|
||||
link = strings.TrimSpace(link)
|
||||
link = strings.TrimPrefix(link, "mailto:")
|
||||
return link
|
||||
}
|
||||
|
||||
// renderEachChild visits each direct child of a node and collects the sequence of
|
||||
// textuual representaitons separated by a single newline.
|
||||
func (ctx *textifyTraverseContext) renderEachChild(node *html.Node) (string, error) {
|
||||
buf := &bytes.Buffer{}
|
||||
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
||||
s, err := FromHTMLNode(c, ctx.options)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if _, err = buf.WriteString(s); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if c.NextSibling != nil {
|
||||
if err = buf.WriteByte('\n'); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
func getAttrVal(node *html.Node, attrName string) string {
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == attrName {
|
||||
return attr.Val
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
type urlList struct {
|
||||
l []string
|
||||
}
|
||||
|
||||
func (u urlList) String() string {
|
||||
var b strings.Builder
|
||||
b.WriteString("\n\nReferences >>\n")
|
||||
for i, s := range u.l {
|
||||
b.WriteString(fmt.Sprintf("%d. %s\n", i+1, s))
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (u *urlList) Append(s string) {
|
||||
u.l = append(u.l, s)
|
||||
}
|
||||
|
||||
func NewUrlList(initCap int) urlList {
|
||||
if initCap < 5 {
|
||||
initCap = 5
|
||||
}
|
||||
return urlList{make([]string, 0, initCap)}
|
||||
}
|
||||
|
||||
func Render(ur string) (string, error) {
|
||||
u, err := url.Parse(ur)
|
||||
if err != nil {
|
||||
return "", errors.Join(errors.New("Could not parse url"),err)
|
||||
}
|
||||
page, err := getURL(u)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
commentRe := regexp.MustCompile(commentFormat)
|
||||
page = commentRe.ReplaceAllString(page, ``)
|
||||
uList := NewUrlList(-1)
|
||||
linkRe := regexp.MustCompile(linkFormat)
|
||||
for _, s := range linkRe.FindAllStringSubmatch(page, -1) {
|
||||
page = strings.Replace(page, s[0], fmt.Sprintf(`%s[%d]`, s[2], len(uList.l)+1), -1)
|
||||
href := s[1]
|
||||
if !strings.Contains(href, "://") {
|
||||
href = u.JoinPath(s[1]).String()
|
||||
}
|
||||
uList.Append(href)
|
||||
}
|
||||
plain, _ := FromString(page)
|
||||
return fmt.Sprintf("%s\n\n%s", plain, uList.String()), nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Fprintf(os.Stderr, "Expected a URL as an argument\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
s, err := Render(os.Args[1])
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s\n", err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Println(s)
|
||||
}
|
|
@ -0,0 +1,222 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func getURL(u *url.URL) (string, error) {
|
||||
switch u.Scheme {
|
||||
case "http", "https":
|
||||
resp, err := http.Get(u.String())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
case "gemini":
|
||||
status, meta, resp := GeminiRequest(u, 0)
|
||||
if meta == "error" {
|
||||
return "", errors.New(resp)
|
||||
} else if status == 1 {
|
||||
/*
|
||||
TODO
|
||||
- Query user, using meta as prompt
|
||||
- Rerequest at same address, but with prompt response as querystring
|
||||
- Return the value of the rerequest
|
||||
*/
|
||||
} else if status == 2 {
|
||||
meta = strings.ToLower(meta)
|
||||
if strings.Contains(meta, "html") {
|
||||
return resp, nil
|
||||
} else if strings.Contains(meta, "gemini") {
|
||||
return gem2html(resp), nil
|
||||
} else if strings.Contains(meta, "markdown") || strings.HasSuffix(u.String(), ".md") {
|
||||
return md2html(resp), nil
|
||||
} else {
|
||||
return "<html><body><pre>"+resp+"</pre></body></html>", nil
|
||||
}
|
||||
} else {
|
||||
return fmt.Sprintf(`<html><body><h1>%d Error (Gemini)</h1><p>%s</p></body></html>`, status, resp), nil
|
||||
}
|
||||
case "gopher":
|
||||
if u.Port() == "" {
|
||||
u.Host = u.Host + ":70"
|
||||
}
|
||||
conn, err := net.Dial("tcp", u.Host)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer conn.Close()
|
||||
gType := "1"
|
||||
p := u.Path
|
||||
if len(u.Path) < 2 {
|
||||
p = "/" + "\n"
|
||||
} else {
|
||||
gType = p[1:2]
|
||||
p = p[2:] + "\n"
|
||||
}
|
||||
_, err = conn.Write([]byte(p))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
resp, err := io.ReadAll(conn)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
switch gType {
|
||||
case "0":
|
||||
return `<html><body><pre>`+string(resp)+`</pre></body></html>`, nil
|
||||
case "1":
|
||||
return gopher2html(string(resp)), nil
|
||||
default:
|
||||
return "", errors.New("Unsupported gopher type")
|
||||
}
|
||||
default:
|
||||
return "", fmt.Errorf("Unsupported URL scheme: %s\n", u.Scheme)
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
type gopherLine struct {
|
||||
url string
|
||||
text string
|
||||
gType rune
|
||||
}
|
||||
|
||||
func splitGopherLine(l string) gopherLine {
|
||||
var out gopherLine
|
||||
parts := strings.SplitN(l, "\t", -1)
|
||||
|
||||
if len(parts[0]) > 0 {
|
||||
out.gType = rune(parts[0][0])
|
||||
}
|
||||
if len(parts[0]) > 1 {
|
||||
out.text = parts[0][1:]
|
||||
out.text = strings.TrimRight(parts[0][1:], "\n\r ")
|
||||
}
|
||||
if len(parts) > 1 {
|
||||
out.url = "gopher://" + parts[2]+ ":" + parts[3] + "/" + string(out.gType) + parts[1]
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func gopher2html(s string) string {
|
||||
var b strings.Builder
|
||||
b.WriteString(`<html><body>`)
|
||||
var inPre bool
|
||||
for _, line := range strings.SplitN(s, "\n", -1) {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "." {
|
||||
break
|
||||
}
|
||||
gl := splitGopherLine(line)
|
||||
if gl.gType == 'i' && !inPre {
|
||||
b.WriteString(`<pre>`)
|
||||
b.WriteString(gl.text)
|
||||
b.WriteRune('\n')
|
||||
inPre = true
|
||||
} else if gl.gType == 'i' {
|
||||
b.WriteString(gl.text)
|
||||
b.WriteRune('\n')
|
||||
} else if inPre {
|
||||
inPre = false
|
||||
b.WriteString(fmt.Sprintf(`</pre><a href="%s">%s</a></br>`, gl.url, gl.text))
|
||||
} else {
|
||||
b.WriteString(fmt.Sprintf(`<a href="%s">%s</a><br>`, gl.url, gl.text))
|
||||
}
|
||||
}
|
||||
b.WriteString(`</body></html>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func gem2html(s string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func md2html(s string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func GeminiRequest(u *url.URL, redirectCount int) (int, string, string) {
|
||||
if redirectCount >= 10 {
|
||||
return 3, "error", "Too many redirects"
|
||||
}
|
||||
if u.Port() == "" {
|
||||
u.Host = u.Host + ":1965"
|
||||
}
|
||||
conf := &tls.Config{
|
||||
MinVersion: tls.VersionTLS12,
|
||||
InsecureSkipVerify: true,
|
||||
}
|
||||
conn, err := tls.Dial("tcp", u.Host, conf)
|
||||
if err != nil {
|
||||
return -1, "error", err.Error()
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
_, err = conn.Write([]byte(u.String() + "\r\n"))
|
||||
if err != nil {
|
||||
return -1, "error", err.Error()
|
||||
}
|
||||
res, err := io.ReadAll(conn)
|
||||
if err != nil {
|
||||
return -1, "error", err.Error()
|
||||
}
|
||||
resp := strings.SplitN(string(res), "\r\n", 2)
|
||||
if len(resp) != 2 {
|
||||
if err != nil {
|
||||
return -1, "error", "Invalid response from server"
|
||||
}
|
||||
}
|
||||
header := strings.SplitN(resp[0], " ", 2)
|
||||
if len([]rune(header[0])) != 2 {
|
||||
header = strings.SplitN(resp[0], "\t", 2)
|
||||
if len([]rune(header[0])) != 2 {
|
||||
return -1, "error", "Invalid response format from server"
|
||||
}
|
||||
}
|
||||
|
||||
// Get status code single digit form
|
||||
status, err := strconv.Atoi(string(header[0][0]))
|
||||
if err != nil {
|
||||
return -1, "error", "Invalid status response from server"
|
||||
}
|
||||
|
||||
if status != 2 {
|
||||
switch status {
|
||||
case 1:
|
||||
resp[1] = header[1]
|
||||
case 3:
|
||||
// This does not support relative redirects
|
||||
// TODO add support
|
||||
newUrl, err := url.Parse(header[1])
|
||||
if err != nil {
|
||||
resp[1] = "Redirect attempted to invalid URL"
|
||||
break
|
||||
}
|
||||
return GeminiRequest(newUrl, redirectCount+1)
|
||||
case 4:
|
||||
resp[1] = fmt.Sprintf("Temporary failure; %s", header[1])
|
||||
case 5:
|
||||
resp[1] = fmt.Sprintf("Permanent failure; %s", header[1])
|
||||
case 6:
|
||||
resp[1] = "Client certificate required (unsupported by 'net-get')"
|
||||
default:
|
||||
resp[1] = "Invalid response status from server"
|
||||
}
|
||||
}
|
||||
return status, header[1], resp[1]
|
||||
}
|
||||
|
Loading…
Reference in New Issue