atom: allow excluding pages from feed with regex

This commit is contained in:
xfnw 2024-06-20 22:02:25 -04:00
parent 06d5d0f390
commit 0d7ce98a1b
4 changed files with 54 additions and 16 deletions

39
Cargo.lock generated
View File

@ -2,6 +2,15 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anstyle"
version = "1.0.7"
@ -76,6 +85,7 @@ dependencies = [
"git2",
"html-escaper",
"orgize",
"regex",
"rowan",
"serde",
"serde_derive",
@ -534,6 +544,35 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "rowan"
version = "0.15.15"

View File

@ -13,6 +13,7 @@ clap = { version = "4.5.3", default-features = false, features = ["derive", "std
git2 = { version = "0.18.3", default-features = false }
html-escaper = "0.2.0"
orgize = "=0.10.0-alpha.10"
regex = { version = "1.10.5", default-features = false, features = ["std", "perf"] }
rowan = "0.15.15"
serde = { version = "1.0.197", default-features = false }
serde_derive = { version = "1.0.197" }

View File

@ -1,8 +1,9 @@
use crate::git::ModifyMap;
use chrono::{DateTime, NaiveDateTime};
use html_escaper::Escape;
use regex::RegexSet;
use std::{
collections::{BTreeMap, BTreeSet},
collections::BTreeMap,
fmt::{self, Write},
path::PathBuf,
};
@ -49,7 +50,7 @@ impl fmt::Display for AtomDateTime {
pub fn entries<'a>(
titles: &'a BTreeMap<PathBuf, (String, PathBuf)>,
mtime: &'a ModifyMap,
exclude: &Option<BTreeSet<String>>,
exclude: &RegexSet,
) -> Result<Vec<AtomEntry<'a>>, Box<dyn std::error::Error>> {
let mut entries = vec![];
@ -59,10 +60,8 @@ pub fn entries<'a>(
None => continue,
};
if let Some(exclude) = exclude {
if exclude.contains(path) {
continue;
}
if exclude.is_match(path) {
continue;
}
let (updated, author) = mtime.get(old).ok_or("missing modification info")?;

View File

@ -3,15 +3,9 @@
use clap::Parser;
use git2::{Object, Repository};
use orgize::ParseConfig;
use regex::RegexSet;
use serde_derive::Deserialize;
use std::{
cmp::min,
collections::{BTreeMap, BTreeSet},
error::Error,
fs,
io::Write,
path::PathBuf,
};
use std::{cmp::min, collections::BTreeMap, error::Error, fs, io::Write, path::PathBuf};
mod atom;
mod git;
@ -31,7 +25,7 @@ struct ClamConfig {
title: String,
id: Option<String>,
url: String,
exclude: Option<BTreeSet<String>>,
exclude: Option<Vec<String>>,
}
fn generate(
@ -77,8 +71,13 @@ fn generate(
if let Ok(config) = fs::read_to_string(".clam.toml") {
let config: ClamConfig = toml_edit::de::from_str(&config)?;
let exclude = if let Some(e) = config.exclude {
RegexSet::new(e)?
} else {
RegexSet::empty()
};
let feed = atom::entries(&titles, &mtime, &config.exclude)?;
let feed = atom::entries(&titles, &mtime, &exclude)?;
let mut f = fs::File::create("feed.xml")?;
f.write_all(