diff --git a/Cargo.lock b/Cargo.lock index b5d986e..d4e9d43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2177,6 +2177,7 @@ dependencies = [ "schemars", "serde", "serde_json", + "sitemap-rs", "slug", "smartstring", "syntect", @@ -2691,9 +2692,19 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "sitemap-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9973bbcfa8fb93f9bee6625b6bfbabcc918f734034d229c18642dd870ac6b2df" +dependencies = [ + "chrono", + "xml-builder", +] [[package]] name = "sketches-ddsketch" @@ -3619,6 +3630,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xml-builder" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c023e38161c176b6ed516e2c398acd755cd19983661eff519b811c01646e10f" + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index be0b4f2..d3b2813 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ rhai = { version = "1.23.6", features = ["only_i32", "f32_float", "internals", " schemars = "1.0.4" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.145" +sitemap-rs = "0.3.0" slug = "0.1.6" smartstring = "1.0.1" syntect = "5.2.0" diff --git a/poet/Cargo.toml b/poet/Cargo.toml index afecf4f..95e7783 100644 --- a/poet/Cargo.toml +++ b/poet/Cargo.toml @@ -45,6 +45,7 @@ rhai_components = { path = "../rhai_components", version = "0.5" } schemars = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +sitemap-rs = { workspace = true } slug = { workspace = true } smartstring = { workspace = true } syntect = { workspace = true } diff --git a/poet/src/build_project/build_project_params.rs b/poet/src/build_project/build_project_params.rs index e172f08..4cd8aa9 100644 --- a/poet/src/build_project/build_project_params.rs +++ b/poet/src/build_project/build_project_params.rs @@ -10,6 +10,7 @@ pub struct BuildProjectParams { pub asset_path_renderer: AssetPathRenderer, pub esbuild_metafile: Arc, pub generated_page_base_path: String, + pub generate_sitemap: bool, pub is_watching: bool, pub rhai_template_renderer: RhaiTemplateRenderer, pub source_filesystem: Arc, diff --git a/poet/src/build_project/mod.rs b/poet/src/build_project/mod.rs index dcfad34..3ee1ef3 100644 --- a/poet/src/build_project/mod.rs +++ b/poet/src/build_project/mod.rs @@ -7,6 +7,7 @@ mod content_document_rendering_context; use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; +use std::path::Path; use std::path::PathBuf; use std::sync::Arc; @@ -40,6 +41,7 @@ use crate::filesystem::Filesystem as _; use crate::filesystem::memory::Memory; use crate::find_front_matter_in_mdast::find_front_matter_in_mdast; use crate::find_table_of_contents_in_mdast::find_table_of_contents_in_mdast; +use crate::generate_sitemap::create_sitemap; use crate::string_to_mdast::string_to_mdast; fn render_document<'render>( @@ -101,11 +103,12 @@ fn render_document<'render>( pub async fn build_project( BuildProjectParams { - asset_path_renderer, + mut asset_path_renderer, esbuild_metafile, generated_page_base_path, is_watching, rhai_template_renderer, + generate_sitemap, source_filesystem, }: BuildProjectParams, ) -> Result { @@ -259,7 +262,7 @@ pub async fn build_project( let content_document_reference_collection_dashmap: DashMap = Default::default(); let content_document_basename_by_id_arc = Arc::new(content_document_basename_by_id); - let content_document_by_basename_arc = Arc::new(content_document_by_basename); + let content_document_by_basename_arc = Arc::new(content_document_by_basename.clone()); let content_document_collections_ranked_arc = Arc::new(content_document_collections_ranked); let content_document_linker = ContentDocumentLinker { content_document_basename_by_id: content_document_basename_by_id_arc.clone(), @@ -323,6 +326,26 @@ pub async fn build_project( } }); + if generate_sitemap { + info!("Building sitemap"); + + match create_sitemap( + &mut asset_path_renderer.base_path, + content_document_by_basename.values(), + ) { + Ok(sitemap) => { + if let Err(err) = + memory_filesystem.set_file_contents_sync(&Path::new("sitemap.xml"), &sitemap) + { + error_collection.register_error("sitemap.xml".to_string(), err); + } + } + Err(err) => { + error_collection.register_error("sitemap.xml".to_string(), err); + } + } + } + if error_collection.is_empty() { Ok(BuildProjectResultStub { esbuild_metafile, diff --git a/poet/src/cmd/make/static_pages.rs b/poet/src/cmd/make/static_pages.rs index ce9dba6..cf24c37 100644 --- a/poet/src/cmd/make/static_pages.rs +++ b/poet/src/cmd/make/static_pages.rs @@ -27,6 +27,9 @@ pub struct StaticPages { #[arg(long)] public_path: String, + #[arg(long, default_value = "false")] + sitemap: bool, + #[arg(value_parser = validate_is_directory)] source_directory: PathBuf, } @@ -55,6 +58,7 @@ impl Handler for StaticPages { generated_page_base_path: self.public_path.clone(), is_watching: false, rhai_template_renderer, + generate_sitemap: self.sitemap, source_filesystem, }) .await?; diff --git a/poet/src/cmd/serve/mod.rs b/poet/src/cmd/serve/mod.rs index 419ef3a..9206f16 100644 --- a/poet/src/cmd/serve/mod.rs +++ b/poet/src/cmd/serve/mod.rs @@ -10,9 +10,9 @@ use actix_web::App; use actix_web::HttpServer; use actix_web::web::Data; use anyhow::Result; -use indoc::formatdoc; use async_trait::async_trait; use clap::Parser; +use indoc::formatdoc; use log::info; use crate::app_dir_desktop_entry::AppDirDesktopEntry; @@ -60,6 +60,9 @@ pub struct Serve { #[arg(long)] public_path: String, + + #[arg(long, default_value = "false")] + sitemap: bool, } impl BuildsProject for Serve { @@ -114,6 +117,7 @@ impl Handler for Serve { generated_page_base_path: self.public_path.clone(), is_watching: false, rhai_template_renderer: rhai_template_renderer.clone(), + generate_sitemap: self.sitemap, source_filesystem: source_filesystem.clone(), }) .await? diff --git a/poet/src/cmd/watch/mod.rs b/poet/src/cmd/watch/mod.rs index 989e675..222d638 100644 --- a/poet/src/cmd/watch/mod.rs +++ b/poet/src/cmd/watch/mod.rs @@ -47,6 +47,9 @@ pub struct Watch { #[arg(value_parser = validate_is_directory)] source_directory: PathBuf, + + #[arg(long, default_value = "false")] + sitemap: bool, } impl BuildsProject for Watch { @@ -134,6 +137,7 @@ impl Handler for Watch { on_content_file_changed, rhai_template_renderer_holder: rhai_template_renderer_holder.clone(), session_manager, + generate_sitemap: self.sitemap, source_filesystem: source_filesystem.clone(), })); diff --git a/poet/src/cmd/watch/service/filesystem_http_route_index_builder.rs b/poet/src/cmd/watch/service/filesystem_http_route_index_builder.rs index 2c374a6..7909536 100644 --- a/poet/src/cmd/watch/service/filesystem_http_route_index_builder.rs +++ b/poet/src/cmd/watch/service/filesystem_http_route_index_builder.rs @@ -20,7 +20,7 @@ pub struct FilesystemHttpRouteIndexBuilder { } impl FilesystemHttpRouteIndexBuilder { - async fn do_build_filesystem_htto_route_index(&self) { + async fn do_build_filesystem_http_route_index(&self) { let BuildProjectResult { memory_filesystem, .. } = match self.build_project_result_holder.get().await { @@ -51,7 +51,7 @@ impl FilesystemHttpRouteIndexBuilder { impl Service for FilesystemHttpRouteIndexBuilder { async fn run(&self) -> Result<()> { loop { - self.do_build_filesystem_htto_route_index().await; + self.do_build_filesystem_http_route_index().await; tokio::select! { _ = self.build_project_result_holder.update_notifier.notified() => continue, diff --git a/poet/src/cmd/watch/service/project_builder.rs b/poet/src/cmd/watch/service/project_builder.rs index 134b102..cb005e6 100644 --- a/poet/src/cmd/watch/service/project_builder.rs +++ b/poet/src/cmd/watch/service/project_builder.rs @@ -31,6 +31,7 @@ pub struct ProjectBuilder { pub on_content_file_changed: Arc, pub rhai_template_renderer_holder: RhaiTemplateRendererHolder, pub session_manager: SessionManager, + pub generate_sitemap: bool, pub source_filesystem: Arc, } @@ -60,6 +61,7 @@ impl ProjectBuilder { generated_page_base_path: self.generated_page_base_path.clone(), is_watching: true, rhai_template_renderer, + generate_sitemap: self.generate_sitemap, source_filesystem: self.source_filesystem.clone(), }) .await diff --git a/poet/src/filesystem_http_route_index.rs b/poet/src/filesystem_http_route_index.rs index 3cd775e..ce32eb8 100644 --- a/poet/src/filesystem_http_route_index.rs +++ b/poet/src/filesystem_http_route_index.rs @@ -29,7 +29,7 @@ impl FilesystemHttpRouteIndex { Ok(this) } - pub fn register_file(&self, file: FileEntry) -> Result<()> { + fn register_file(&self, file: FileEntry) -> Result<()> { let filename = file.relative_path.to_string_lossy().to_string(); if filename.ends_with("/index.html") { @@ -43,6 +43,8 @@ impl FilesystemHttpRouteIndex { } else if filename == "index.html" { self.routes.insert("".to_string(), file.clone()); self.routes.insert("index.html".to_string(), file.clone()); + } else if filename == "sitemap.xml" { + self.routes.insert(filename, file.clone()); } else { return Err(anyhow!("Unexpected filename: '{filename}'")); } diff --git a/poet/src/generate_sitemap.rs b/poet/src/generate_sitemap.rs new file mode 100644 index 0000000..b93b8b8 --- /dev/null +++ b/poet/src/generate_sitemap.rs @@ -0,0 +1,59 @@ +use anyhow::{Error, Result}; +use chrono::Utc; +use sitemap_rs::url::Url; +use sitemap_rs::url_set::UrlSet; + +use crate::content_document_basename::ContentDocumentBasename; +use crate::content_document_reference::ContentDocumentReference; + +pub fn create_sitemap( + base_url: &mut String, + content_document_by_basename: std::collections::hash_map::Values< + '_, + ContentDocumentBasename, + ContentDocumentReference, + >, +) -> Result { + let last_modified = Utc::now().fixed_offset(); + let mut urls: Vec = vec![Url::new( + base_url.clone(), + Some(last_modified), + None, + Some(0.8), + None, + None, + None, + )?]; + + for reference in content_document_by_basename { + let mut page_path = reference + .basename_path + .to_string_lossy() + .into_owned() + .replace("index", ""); + + if !base_url.ends_with('/') { + base_url.push_str("/"); + } + + if page_path != "" { + page_path = format!("{base_url}{page_path}"); + + urls.push(Url::new( + page_path, + Some(last_modified), + None, + Some(0.5), + None, + None, + None, + )?); + } + } + + let url_set: UrlSet = UrlSet::new(urls)?; + let mut buf: Vec = Vec::::new(); + url_set.write(&mut buf).unwrap(); + + Ok(String::from_utf8(buf)?) +} diff --git a/poet/src/lib.rs b/poet/src/lib.rs index 01b48d9..4e58749 100644 --- a/poet/src/lib.rs +++ b/poet/src/lib.rs @@ -37,6 +37,7 @@ pub mod find_front_matter_in_mdast; pub mod find_table_of_contents_in_mdast; pub mod find_text_content_in_mdast; pub mod flexible_datetime; +pub mod generate_sitemap; pub mod holder; pub mod is_external_link; pub mod is_valid_desktop_entry_string; diff --git a/poet/src/search_index.rs b/poet/src/search_index.rs index 1085e86..5aca27c 100644 --- a/poet/src/search_index.rs +++ b/poet/src/search_index.rs @@ -119,6 +119,7 @@ mod tests { generated_page_base_path: public_path, is_watching: false, rhai_template_renderer, + generate_sitemap: false, source_filesystem, }) .await diff --git a/rhai_components/src/component_syntax/eval_tag_stack_node.rs b/rhai_components/src/component_syntax/eval_tag_stack_node.rs index 34c9c3e..06f0d18 100644 --- a/rhai_components/src/component_syntax/eval_tag_stack_node.rs +++ b/rhai_components/src/component_syntax/eval_tag_stack_node.rs @@ -111,7 +111,11 @@ pub fn eval_tag_stack_node( Ok(rhai_call_template_function( eval_context.engine(), &opening_tag.tag_name.name, - (context, Dynamic::from_map(props), Dynamic::from(result.to_string())), + ( + context, + Dynamic::from_map(props), + Dynamic::from(result.to_string()), + ), ) .map_err(|err| { EvalAltResult::ErrorRuntime( diff --git a/rhai_components/src/rhai_call_template_function.rs b/rhai_components/src/rhai_call_template_function.rs index c27c702..f90eff1 100644 --- a/rhai_components/src/rhai_call_template_function.rs +++ b/rhai_components/src/rhai_call_template_function.rs @@ -19,7 +19,8 @@ pub fn rhai_call_template_function( let tmp_ast = AST::new([], module); - let result = engine.call_fn::(&mut Scope::new(), &tmp_ast, "template", args)?; + let result = + engine.call_fn::(&mut Scope::new(), &tmp_ast, "template", args)?; Ok(result.into()) } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..0510007 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,71 @@ +mod anyhow_error_aggregate; +mod app_dir_desktop_entry; +mod assert_valid_desktop_entry_string; +mod asset_manager; +mod asset_path_renderer; +mod build_project; +mod build_prompt_document_controller; +mod build_prompt_document_controller_collection; +mod build_prompt_document_controller_params; +mod build_timer; +pub mod cmd; +mod compile_shortcodes; +mod content_document; +mod content_document_basename; +mod content_document_collection; +mod content_document_collection_ranked; +mod content_document_component_context; +mod content_document_front_matter; +mod content_document_hierarchy; +mod content_document_in_collection; +mod content_document_linker; +mod content_document_reference; +mod content_document_source; +mod content_document_tree_node; +mod copy_esbuild_metafile_assets_to; +mod document_error; +mod document_error_collection; +mod esbuild_metafile_holder; +mod escape_html; +mod escape_html_attribute; +mod eval_content_document_mdast; +mod eval_mdx_element; +mod eval_prompt_document_mdast; +mod eval_prompt_document_mdast_params; +mod filesystem; +mod filesystem_http_route_index; +mod filesystem_http_route_index_holder; +mod find_front_matter_in_mdast; +mod find_table_of_contents_in_mdast; +mod find_text_content_in_mdast; +mod flexible_datetime; +mod generate_sitemap; +mod holder; +mod is_external_link; +mod is_valid_desktop_entry_string; +mod mcp; +mod mcp_resource_provider_content_documents; +mod mdast_children_to_heading_id; +mod mdast_to_tantivy_document; +mod parse_markdown_metadata_line; +mod prompt_controller_collection_holder; +mod prompt_document_component_context; +mod prompt_document_controller; +mod prompt_document_front_matter; +mod read_esbuild_metafile_or_default; +mod rhai_components; +mod rhai_functions; +mod rhai_safe_random_affix; +mod rhai_template_factory; +mod rhai_template_renderer; +mod rhai_template_renderer_holder; +mod search_index; +mod search_index_fields; +mod search_index_found_document; +mod search_index_query_params; +mod search_index_reader; +mod search_index_reader_holder; +mod search_index_schema; +mod search_tool; +mod string_to_mdast; +mod table_of_contents;