generate files while walking git tree

this reduces peak ram usage and allows for future expansion
for processing non-traditional files such as symlinks
This commit is contained in:
xfnw 2024-04-10 17:32:24 -04:00
parent 8151294d48
commit 4b226472e9
3 changed files with 137 additions and 112 deletions

View File

@ -1,6 +1,6 @@
use git2::{Oid, Repository, Time};
use std::{collections::BTreeMap, error::Error, path::PathBuf};
use orgize::ParseConfig;
use std::{collections::BTreeMap, error::Error, fs, path::PathBuf};
pub type CreateMap = BTreeMap<PathBuf, (Time, String)>;
pub type ModifyMap = BTreeMap<PathBuf, (Time, String)>;
@ -69,7 +69,12 @@ pub fn walk_callback(
repo: &Repository,
dir: &str,
entry: &git2::TreeEntry,
dir_map: &mut BTreeMap<String, Vec<(String, Vec<u8>)>>,
org_cfg: &ParseConfig,
ctime: &BTreeMap<PathBuf, (Time, String)>,
mtime: &BTreeMap<PathBuf, (Time, String)>,
year_ago: i64,
short_id: &str,
titles: &mut BTreeMap<PathBuf, (String, PathBuf)>,
) -> Result<(), Box<dyn Error>> {
let object = entry.to_object(repo)?;
let name = entry.name().ok_or("invalid unicode in a file name")?;
@ -79,13 +84,22 @@ pub fn walk_callback(
Err(_) => {
// is probably a directory
dir_map.insert(format!("{}{}/", dir, name), vec![]);
fs::create_dir_all(format!("{}{}/", dir, name))?;
return Ok(());
}
};
let directory = dir_map.get_mut(dir).ok_or("VERBODEN TOEGANG")?;
directory.push((name.to_string(), blob.content().to_vec()));
crate::html::generate_page(
dir,
name,
blob.content(),
org_cfg,
ctime,
mtime,
year_ago,
short_id,
titles,
)?;
Ok(())
}

View File

@ -1,10 +1,26 @@
use crate::git::{CreateMap, ModifyMap};
use chrono::{DateTime, Datelike, NaiveDateTime};
use html_escaper::{Escape, Trusted};
use orgize::{
ast::{PropertyDrawer, TodoType},
ast::{Keyword, PropertyDrawer, TodoType},
export::{Container, Event, HtmlEscape, HtmlExport, TraversalContext, Traverser},
ParseConfig,
};
use rowan::ast::{support, AstNode};
use slugify::slugify;
use std::cmp::min;
use std::{cmp::min, collections::BTreeMap, error::Error, fs, io::Write, path::PathBuf};
#[derive(boilerplate::Boilerplate)]
struct PageHtml<'a> {
title: String,
body: String,
commit: &'a str,
author: &'a str,
created: NaiveDateTime,
modified: NaiveDateTime,
numdir: usize,
old_page: bool,
}
#[derive(Default)]
pub struct Handler {
@ -158,3 +174,75 @@ impl Handler {
}
}
}
pub fn generate_page(
dir: &str,
name: &str,
file: &[u8],
org_cfg: &ParseConfig,
ctime: &CreateMap,
mtime: &ModifyMap,
year_ago: i64,
short_id: &str,
titles: &mut BTreeMap<PathBuf, (String, PathBuf)>,
) -> Result<(), Box<dyn Error>> {
let mut full_path: PathBuf = format!("{}{}", dir, name).into();
let pcontent: Option<Vec<u8>> = match full_path.extension().and_then(std::ffi::OsStr::to_str) {
Some("org") => {
let fstr = std::str::from_utf8(file)?;
let res = org_cfg.clone().parse(fstr);
// https://github.com/PoiScript/orgize/issues/70#issuecomment-1916068875
let mut title = "untitled".to_string();
if let Some(section) = res.document().section() {
for keyword in support::children::<Keyword>(section.syntax()) {
if keyword.key().eq_ignore_ascii_case("TITLE") {
title = keyword.value().trim().to_string();
}
}
}
let (created, author) = ctime.get(&full_path).ok_or("missing creation time")?;
let modified = mtime.get(&full_path).ok_or("missing modification time")?.0;
let numdir = full_path.iter().count();
let mut html_export = Handler {
numdir,
..Default::default()
};
res.traverse(&mut html_export);
let old_page = modified.seconds() - year_ago < 0;
let template = PageHtml {
title: title.clone(),
body: html_export.exp.finish(),
commit: short_id,
author,
created: DateTime::from_timestamp(created.seconds(), 0)
.ok_or("broken creation date")?
.naive_utc(),
modified: DateTime::from_timestamp(modified.seconds(), 0)
.ok_or("broken modification date")?
.naive_utc(),
numdir,
old_page,
};
let old_path = full_path.clone();
full_path.set_extension("html");
titles.insert(full_path.clone(), (title, old_path));
Some(template.to_string().into_bytes())
}
_ => None,
};
let content = match &pcontent {
Some(c) => c,
None => file,
};
let mut f = fs::File::create(full_path)?;
f.write_all(content)?;
Ok(())
}

View File

@ -1,9 +1,8 @@
use chrono::{DateTime, Datelike, NaiveDateTime};
#![allow(clippy::too_many_arguments)]
use clap::Parser;
use git2::{Oid, Repository};
use html_escaper::{Escape, Trusted};
use orgize::{ast::Keyword, ParseConfig};
use rowan::ast::{support, AstNode};
use git2::{Object, Repository};
use orgize::ParseConfig;
use serde_derive::Deserialize;
use std::{cmp::min, collections::BTreeMap, error::Error, fs, io::Write, path::PathBuf};
@ -20,18 +19,6 @@ struct Opt {
branch: String,
}
#[derive(boilerplate::Boilerplate)]
struct PageHtml<'a> {
title: String,
body: String,
commit: &'a str,
author: &'a str,
created: NaiveDateTime,
modified: NaiveDateTime,
numdir: usize,
old_page: bool,
}
#[derive(Deserialize, Debug)]
struct ClamConfig {
title: String,
@ -42,12 +29,14 @@ struct ClamConfig {
fn generate(
org_cfg: &ParseConfig,
repo: &Repository,
dir_map: &BTreeMap<String, Vec<(String, Vec<u8>)>>,
short_id: &str,
// FIXME: needing both a short_id and oid is pretty silly, however git2
// annoyingly does not provide an easy way to derive one from the other
oid: Oid,
commit: Object,
) -> Result<(), Box<dyn Error>> {
let short_id = commit.short_id().unwrap();
let short_id = short_id.as_str().unwrap();
let commit = commit.into_commit().unwrap();
let oid = commit.id();
let tree = commit.tree().unwrap();
let (ctime, mtime) = git::make_time_tree(repo, oid)?;
{
@ -62,74 +51,21 @@ fn generate(
let year_ago: i64 = year_ago.try_into()?;
let mut titles = BTreeMap::new();
for (dir, files) in dir_map.iter() {
fs::create_dir_all(dir)?;
for file in files.iter() {
let mut full_path: PathBuf = format!("{}{}", dir, file.0).into();
let pcontent: Option<Vec<u8>> =
match full_path.extension().and_then(std::ffi::OsStr::to_str) {
Some("org") => {
let fstr = std::str::from_utf8(file.1.as_slice())?;
let res = org_cfg.clone().parse(fstr);
// https://github.com/PoiScript/orgize/issues/70#issuecomment-1916068875
let mut title = "untitled".to_string();
if let Some(section) = res.document().section() {
for keyword in support::children::<Keyword>(section.syntax()) {
if keyword.key().eq_ignore_ascii_case("TITLE") {
title = keyword.value().trim().to_string();
}
}
}
let (created, author) =
ctime.get(&full_path).ok_or("missing creation time")?;
let modified = mtime.get(&full_path).ok_or("missing modification time")?.0;
let numdir = full_path.iter().count();
let mut html_export = html::Handler {
numdir,
..Default::default()
};
res.traverse(&mut html_export);
let old_page = modified.seconds() - year_ago < 0;
let template = PageHtml {
title: title.clone(),
body: html_export.exp.finish(),
commit: short_id,
author,
created: DateTime::from_timestamp(created.seconds(), 0)
.ok_or("broken creation date")?
.naive_utc(),
modified: DateTime::from_timestamp(modified.seconds(), 0)
.ok_or("broken modification date")?
.naive_utc(),
numdir,
old_page,
};
let old_path = full_path.clone();
full_path.set_extension("html");
titles.insert(full_path.clone(), (title, old_path));
Some(template.to_string().into_bytes())
}
_ => None,
};
let content = match &pcontent {
Some(c) => c,
None => &file.1,
};
let mut f = fs::File::create(full_path)?;
f.write_all(content)?;
}
}
tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
git::walk_callback(
repo,
dir,
entry,
org_cfg,
&ctime,
&mtime,
year_ago,
short_id,
&mut titles,
)
.unwrap();
0
})?;
if let Ok(config) = fs::read_to_string(".clam.toml") {
let config: ClamConfig = toml::from_str(&config)?;
@ -160,19 +96,6 @@ fn main() {
let repo = Repository::open(&opt.repository).unwrap();
let commit = repo.revparse_single(&opt.branch).unwrap();
let short_id = commit.short_id().unwrap();
let short_id = short_id.as_str().unwrap();
let commit = commit.into_commit().unwrap();
let oid = commit.id();
let tree = commit.tree().unwrap();
let mut dir_map = BTreeMap::new();
dir_map.insert("".to_string(), vec![]);
tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
git::walk_callback(&repo, dir, entry, &mut dir_map).unwrap();
0
})
.unwrap();
// TODO: get this stuff from .clam.toml or something
let org_cfg = ParseConfig {
@ -187,5 +110,5 @@ fn main() {
..Default::default()
};
generate(&org_cfg, &repo, &dir_map, short_id, oid).unwrap();
generate(&org_cfg, &repo, commit).unwrap();
}