Add tests for a tags and add config struct.

This commit is contained in:
chmod777 2021-02-26 10:51:56 +00:00
parent 5fca9d4990
commit 2b9d54d182
8 changed files with 142 additions and 46 deletions

2
rustfmt.toml Normal file
View File

@ -0,0 +1,2 @@
tab_spaces = 2
hard_tabs = true

View File

@ -8,33 +8,79 @@ mod tests;
extern crate scraper;
use scraper::{Html, Selector, node::Node};
use scraper::{Html, Selector};
pub fn convert(html_src: &str) -> String {
let document = Html::parse_document(html_src);
let selector = Selector::parse("h1, h2, h3, h4, h5, h6, p, br, a, img").unwrap();
pub struct Config {
empty_lines_before_h: usize,
empty_lines_after_h: usize,
let empty_lines_after_h="\n\n";
let empty_lines_after_p="\n\n";
let empty_lines_after_a="\n\n";
let br_lines="\n";
empty_lines_before_p: usize,
empty_lines_after_p: usize,
let mut gmi = String::new();
for node in document.select(&selector) {
let html = node.inner_html();
match node.value().name() {
"h1" => gmi.push_str(&format!("# {}{}", html, empty_lines_after_h)),
"h2" => gmi.push_str(&format!("## {}{}", html, empty_lines_after_h)),
"h3" => gmi.push_str(&format!("### {}{}", html, empty_lines_after_h)),
"h4" => gmi.push_str(&format!("#### {}{}", html, empty_lines_after_h)),
"h5" => gmi.push_str(&format!("##### {}{}", html, empty_lines_after_h)),
"h6" => gmi.push_str(&format!("###### {}{}", html, empty_lines_after_h)),
"p" => gmi.push_str(&format!("{}{}", html, empty_lines_after_p)),
"br" => gmi.push_str(&format!("{}", br_lines)),
"a" => gmi.push_str(&format!("=> {}{}", html, empty_lines_after_a)),
"img" => gmi.push_str(&format!("=> {}{}", html, empty_lines_after_a)),
tag => panic!("matched an unsupported tag: {}", tag),
}
}
gmi
empty_lines_before_a: usize,
empty_lines_after_a: usize,
br_empty_lines: usize,
convert_internal_links_to_gmi: bool,
}
impl Default for Config {
fn default() -> Self {
Config {
empty_lines_before_h: 1,
empty_lines_after_h: 0,
empty_lines_before_p: 1,
empty_lines_after_p: 0,
empty_lines_before_a: 1,
empty_lines_after_a: 0,
br_empty_lines: 1,
convert_internal_links_to_gmi: true,
}
}
}
impl Config {
pub fn empty_lines_before_h_str(&self) -> String {
(0..self.empty_lines_before_h).map(|i| "\n").collect()
}
}
pub fn convert(html_src: &str) -> String {
let document = Html::parse_document(html_src);
let selector = Selector::parse("h1, h2, h3, h4, h5, h6, p, br, a, img").unwrap();
let empty_lines_before_h="\n";
let empty_lines_after_h="";
let empty_lines_before_p="\n";
let empty_lines_after_p="";
let empty_lines_before_a="\n";
let empty_lines_after_a="";
let br_lines="\n";
let mut gmi = String::new();
for node in document.select(&selector) {
let html = node.inner_html();
match node.value().name() {
"h1" => gmi.push_str(&format!("{}# {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
"h2" => gmi.push_str(&format!("{}## {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
"h3" => gmi.push_str(&format!("{}### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
"h4" => gmi.push_str(&format!("{}#### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
"h5" => gmi.push_str(&format!("{}##### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
"h6" => gmi.push_str(&format!("{}###### {}\n{}", empty_lines_before_h, html, empty_lines_after_h)),
"p" => gmi.push_str(&format!("{}{}\n{}", empty_lines_before_p, html, empty_lines_after_p)),
"br" => gmi.push_str(&format!("{}", br_lines)),
"a" => {
let link = node.value().attr("href").unwrap_or("");
gmi.push_str(&format!("{}=> {} {}\n{}", empty_lines_before_a, link, html, empty_lines_after_a));
}
"img" => {
let link = node.value().attr("src").unwrap_or("");
gmi.push_str(&format!("{}=> {}\n{}", empty_lines_before_a, html, empty_lines_after_a));
}
tag => panic!("matched an unsupported tag: {}", tag),
}
}
gmi
}

9
src/tests/data/a.html Normal file
View File

@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<head>
<title>empty</title>
</head>
<body>
<a>Display</a>
</body>
</html>

View File

@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<head>
<title>a tag with href attribute</title>
</head>
<body>
<a href="https://gemini.circumlunar.space/">circumlunar space</a>
</body>
</html>

View File

@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<head>
<title>a tag with href attribute</title>
</head>
<body>
<a href="https://mydomain.space/article.html">article</a>
</body>
</html>

View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<title>Multiple a tags</title>
</head>
<body>
<a>Link Display</a>
<a href="https://gemini.circumlunar.space/">circumlunar space</a>
<a href="https://mydomain.space/article.html">article</a>
</body>
</html>

View File

@ -8,58 +8,68 @@ use super::*;
#[test]
fn h1() {
let html = include_str!("data/h1.html");
let gmi = convert(html);
assert_eq!("# Header 1\n\n", gmi);
assert_eq!("\n# Header 1\n", convert(html));
}
#[test]
fn h2() {
let html = include_str!("data/h2.html");
let gmi = convert(html);
assert_eq!("## Header 2\n\n", gmi);
assert_eq!("\n## Header 2\n", convert(html));
}
#[test]
fn h3() {
let html = include_str!("data/h3.html");
let gmi = convert(html);
assert_eq!("### Header 3\n\n", gmi);
assert_eq!("\n### Header 3\n", convert(html));
}
#[test]
fn h4() {
let html = include_str!("data/h4.html");
let gmi = convert(html);
assert_eq!("#### Header 4\n\n", gmi);
assert_eq!("\n#### Header 4\n", convert(html));
}
#[test]
fn h5() {
let html = include_str!("data/h5.html");
let gmi = convert(html);
assert_eq!("##### Header 5\n\n", gmi);
assert_eq!("\n##### Header 5\n", convert(html));
}
#[test]
fn h6() {
let html = include_str!("data/h6.html");
let gmi = convert(html);
assert_eq!("###### Header 6\n\n", gmi);
assert_eq!("\n###### Header 6\n", convert(html));
}
#[test]
fn p() {
let html = include_str!("data/p.html");
let gmi = convert(html);
assert_eq!("Paragraph\n\n", gmi);
assert_eq!("\nParagraph\n", convert(html));
}
#[test]
fn multiple_p() {
let html = include_str!("data/multiple_p.html");
let gmi = convert(html);
assert_eq!("Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\n", gmi);
let html = include_str!("data/p_multiple.html");
assert_eq!("\nParagraph 1\n\nParagraph 2\n\nParagraph 3\n", convert(html));
}
#[test]
fn br() {
let html = include_str!("data/br.html");
let gmi = convert(html);
assert_eq!("Paragraph 1\n\n\nParagraph 2\n\n\nParagraph 3\n\n", gmi);
assert_eq!("\nParagraph 1\n\n\nParagraph 2\n\n\nParagraph 3\n", convert(html));
}
#[test]
fn a() {
let html = include_str!("data/a.html");
assert_eq!("\n=> Display\n", convert(html));
}
#[test]
fn a_href() {
let html = include_str!("data/a_href.html");
assert_eq!("\n=> https://gemini.circumlunar.space/ circumlunar space\n", convert(html));
}
#[test]
fn a_href_internal() {
let html = include_str!("data/a_href_internal.html");
assert_eq!("\n=> https://mydomain.space/article.gmi article\n", convert(html));
}
#[test]
fn a_multiple() {
// let html = include_str!("data/a_href_multiple.html");
// assert_eq!("\n=> Display\n", convert(html));
}