Add tests for a tags and add config struct.
This commit is contained in:
parent
5fca9d4990
commit
2b9d54d182
|
@ -0,0 +1,2 @@
|
|||
tab_spaces = 2
|
||||
hard_tabs = true
|
98
src/lib.rs
98
src/lib.rs
|
@ -8,33 +8,79 @@ mod tests;
|
|||
|
||||
extern crate scraper;
|
||||
|
||||
use scraper::{Html, Selector, node::Node};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
pub fn convert(html_src: &str) -> String {
|
||||
let document = Html::parse_document(html_src);
|
||||
let selector = Selector::parse("h1, h2, h3, h4, h5, h6, p, br, a, img").unwrap();
|
||||
pub struct Config {
|
||||
empty_lines_before_h: usize,
|
||||
empty_lines_after_h: usize,
|
||||
|
||||
let empty_lines_after_h="\n\n";
|
||||
let empty_lines_after_p="\n\n";
|
||||
let empty_lines_after_a="\n\n";
|
||||
let br_lines="\n";
|
||||
empty_lines_before_p: usize,
|
||||
empty_lines_after_p: usize,
|
||||
|
||||
let mut gmi = String::new();
|
||||
for node in document.select(&selector) {
|
||||
let html = node.inner_html();
|
||||
match node.value().name() {
|
||||
"h1" => gmi.push_str(&format!("# {}{}", html, empty_lines_after_h)),
|
||||
"h2" => gmi.push_str(&format!("## {}{}", html, empty_lines_after_h)),
|
||||
"h3" => gmi.push_str(&format!("### {}{}", html, empty_lines_after_h)),
|
||||
"h4" => gmi.push_str(&format!("#### {}{}", html, empty_lines_after_h)),
|
||||
"h5" => gmi.push_str(&format!("##### {}{}", html, empty_lines_after_h)),
|
||||
"h6" => gmi.push_str(&format!("###### {}{}", html, empty_lines_after_h)),
|
||||
"p" => gmi.push_str(&format!("{}{}", html, empty_lines_after_p)),
|
||||
"br" => gmi.push_str(&format!("{}", br_lines)),
|
||||
"a" => gmi.push_str(&format!("=> {}{}", html, empty_lines_after_a)),
|
||||
"img" => gmi.push_str(&format!("=> {}{}", html, empty_lines_after_a)),
|
||||
tag => panic!("matched an unsupported tag: {}", tag),
|
||||
}
|
||||
}
|
||||
gmi
|
||||
empty_lines_before_a: usize,
|
||||
empty_lines_after_a: usize,
|
||||
|
||||
br_empty_lines: usize,
|
||||
|
||||
convert_internal_links_to_gmi: bool,
|
||||
}
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Config {
|
||||
empty_lines_before_h: 1,
|
||||
empty_lines_after_h: 0,
|
||||
|
||||
empty_lines_before_p: 1,
|
||||
empty_lines_after_p: 0,
|
||||
|
||||
empty_lines_before_a: 1,
|
||||
empty_lines_after_a: 0,
|
||||
|
||||
br_empty_lines: 1,
|
||||
|
||||
convert_internal_links_to_gmi: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Config {
|
||||
pub fn empty_lines_before_h_str(&self) -> String {
|
||||
(0..self.empty_lines_before_h).map(|i| "\n").collect()
|
||||
}
|
||||
}
|
||||
pub fn convert(html_src: &str) -> String {
|
||||
let document = Html::parse_document(html_src);
|
||||
let selector = Selector::parse("h1, h2, h3, h4, h5, h6, p, br, a, img").unwrap();
|
||||
|
||||
let empty_lines_before_h="\n";
|
||||
let empty_lines_after_h="";
|
||||
let empty_lines_before_p="\n";
|
||||
let empty_lines_after_p="";
|
||||
let empty_lines_before_a="\n";
|
||||
let empty_lines_after_a="";
|
||||
let br_lines="\n";
|
||||
|
||||
let mut gmi = String::new();
|
||||
for node in document.select(&selector) {
|
||||
let html = node.inner_html();
|
||||
match node.value().name() {
|
||||
"h1" => gmi.push_str(&format!("{}# {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
|
||||
"h2" => gmi.push_str(&format!("{}## {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
|
||||
"h3" => gmi.push_str(&format!("{}### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
|
||||
"h4" => gmi.push_str(&format!("{}#### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
|
||||
"h5" => gmi.push_str(&format!("{}##### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
|
||||
"h6" => gmi.push_str(&format!("{}###### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
|
||||
"p" => gmi.push_str(&format!("{}{}\n{}", empty_lines_before_p, html, empty_lines_after_p)),
|
||||
"br" => gmi.push_str(&format!("{}", br_lines)),
|
||||
"a" => {
|
||||
let link = node.value().attr("href").unwrap_or("");
|
||||
gmi.push_str(&format!("{}=> {} {}\n{}", empty_lines_before_a, link, html, empty_lines_after_a));
|
||||
}
|
||||
"img" => {
|
||||
let link = node.value().attr("src").unwrap_or("");
|
||||
gmi.push_str(&format!("{}=> {}\n{}", empty_lines_before_a, html, empty_lines_after_a));
|
||||
}
|
||||
tag => panic!("matched an unsupported tag: {}", tag),
|
||||
}
|
||||
}
|
||||
gmi
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>empty</title>
|
||||
</head>
|
||||
<body>
|
||||
<a>Display</a>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>a tag with href attribute</title>
|
||||
</head>
|
||||
<body>
|
||||
<a href="https://gemini.circumlunar.space/">circumlunar space</a>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>a tag with href attribute</title>
|
||||
</head>
|
||||
<body>
|
||||
<a href="https://mydomain.space/article.html">article</a>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Multiple a tags</title>
|
||||
</head>
|
||||
<body>
|
||||
<a>Link Display</a>
|
||||
<a href="https://gemini.circumlunar.space/">circumlunar space</a>
|
||||
<a href="https://mydomain.space/article.html">article</a>
|
||||
</body>
|
||||
</html>
|
|
@ -8,58 +8,68 @@ use super::*;
|
|||
#[test]
|
||||
fn h1() {
|
||||
let html = include_str!("data/h1.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("# Header 1\n\n", gmi);
|
||||
assert_eq!("\n# Header 1\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn h2() {
|
||||
let html = include_str!("data/h2.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("## Header 2\n\n", gmi);
|
||||
assert_eq!("\n## Header 2\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn h3() {
|
||||
let html = include_str!("data/h3.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("### Header 3\n\n", gmi);
|
||||
assert_eq!("\n### Header 3\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn h4() {
|
||||
let html = include_str!("data/h4.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("#### Header 4\n\n", gmi);
|
||||
assert_eq!("\n#### Header 4\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn h5() {
|
||||
let html = include_str!("data/h5.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("##### Header 5\n\n", gmi);
|
||||
assert_eq!("\n##### Header 5\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn h6() {
|
||||
let html = include_str!("data/h6.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("###### Header 6\n\n", gmi);
|
||||
assert_eq!("\n###### Header 6\n", convert(html));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn p() {
|
||||
let html = include_str!("data/p.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("Paragraph\n\n", gmi);
|
||||
assert_eq!("\nParagraph\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn multiple_p() {
|
||||
let html = include_str!("data/multiple_p.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\n", gmi);
|
||||
let html = include_str!("data/p_multiple.html");
|
||||
assert_eq!("\nParagraph 1\n\nParagraph 2\n\nParagraph 3\n", convert(html));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn br() {
|
||||
let html = include_str!("data/br.html");
|
||||
let gmi = convert(html);
|
||||
assert_eq!("Paragraph 1\n\n\nParagraph 2\n\n\nParagraph 3\n\n", gmi);
|
||||
assert_eq!("\nParagraph 1\n\n\nParagraph 2\n\n\nParagraph 3\n", convert(html));
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn a() {
|
||||
let html = include_str!("data/a.html");
|
||||
assert_eq!("\n=> Display\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn a_href() {
|
||||
let html = include_str!("data/a_href.html");
|
||||
assert_eq!("\n=> https://gemini.circumlunar.space/ circumlunar space\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn a_href_internal() {
|
||||
let html = include_str!("data/a_href_internal.html");
|
||||
assert_eq!("\n=> https://mydomain.space/article.gmi article\n", convert(html));
|
||||
}
|
||||
#[test]
|
||||
fn a_multiple() {
|
||||
// let html = include_str!("data/a_href_multiple.html");
|
||||
// assert_eq!("\n=> Display\n", convert(html));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue