From c4662b6240970c1108ed5f65d06b6846adb7d278 Mon Sep 17 00:00:00 2001 From: Propfend Date: Mon, 10 Nov 2025 15:23:11 +0000 Subject: [PATCH 1/3] created configuration file. Sitemap and robots.txt are configurated and generated through it. Logs are shown. And fields are optional. --- Cargo.lock | 28 ++++++++++++++++++++ Cargo.toml | 4 ++- src/build_project/mod.rs | 56 +++++++++++++++++++++++++++++++++++++++- src/cmd/serve/mod.rs | 2 +- src/configuration.rs | 19 ++++++++++++++ src/generate_robot.rs | 20 ++++++++++++++ src/generate_sitemap.rs | 55 +++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 +++ 8 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 src/configuration.rs create mode 100644 src/generate_robot.rs create mode 100644 src/generate_sitemap.rs diff --git a/Cargo.lock b/Cargo.lock index 40fe906..3c00b03 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2157,9 +2157,11 @@ dependencies = [ "petgraph", "poet-rhai", "rayon", + "robots_txt", "schemars", "serde", "serde_json", + "sitemap-rs", "slug", "syntect", "tantivy", @@ -2438,6 +2440,16 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "robots_txt" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e8e523889c75c35b3a018da4dbe311d9b248db161786624141b10859800fa0d" +dependencies = [ + "unicase", + "url", +] + [[package]] name = "rust-stemmers" version = "1.2.0" @@ -2664,6 +2676,16 @@ dependencies = [ "libc", ] +[[package]] +name = "sitemap-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9973bbcfa8fb93f9bee6625b6bfbabcc918f734034d229c18642dd870ac6b2df" +dependencies = [ + "chrono", + "xml-builder", +] + [[package]] name = "sketches-ddsketch" version = "0.3.0" @@ -3618,6 +3640,12 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +[[package]] +name = "xml-builder" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c023e38161c176b6ed516e2c398acd755cd19983661eff519b811c01646e10f" + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index cdb1d35..b789334 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,9 +43,11 @@ petgraph = { version = "0.8.2", features = ["serde", "serde_derive"] } rayon = "1.11.0" # rhai = { version = "1.22.2", features = ["no_closure", "serde", "serde_json", "sync"] } rhai = { package="poet-rhai", version="0.2.0", features = ["internals", "metadata", "no_closure", "serde", "serde_json", "sync"] } +robots_txt = "0.7.0" schemars = "1.0.4" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.145" +sitemap-rs = "0.3.0" slug = "0.1.6" syntect = "5.2.0" tantivy = "0.25.0" @@ -57,4 +59,4 @@ url = "2.5.6" uuid = { version = "1.18.1", features = ["rng", "serde", "v4"] } [dev-dependencies] -tempfile = "3.21.0" +tempfile = "3.21.0" \ No newline at end of file diff --git a/src/build_project/mod.rs b/src/build_project/mod.rs index 89a6a33..b98dad7 100644 --- a/src/build_project/mod.rs +++ b/src/build_project/mod.rs @@ -7,6 +7,8 @@ mod content_document_rendering_context; use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; +use std::env; +use std::path::Path; use std::path::PathBuf; use std::sync::Arc; @@ -15,6 +17,7 @@ use anyhow::anyhow; use dashmap::DashMap; use log::debug; use log::info; +use log::warn; use rayon::prelude::*; use rhai::Dynamic; use syntect::parsing::SyntaxSet; @@ -24,6 +27,7 @@ use crate::build_project::build_project_params::BuildProjectParams; use crate::build_project::build_project_result_stub::BuildProjectResultStub; use crate::build_project::content_document_rendering_context::ContentDocumentRenderingContext; use crate::build_timer::BuildTimer; +use crate::configuration::parse_configuration_file; use crate::content_document::ContentDocument; use crate::content_document_basename::ContentDocumentBasename; use crate::content_document_collection::ContentDocumentCollection; @@ -40,6 +44,8 @@ use crate::filesystem::Filesystem as _; use crate::filesystem::memory::Memory; use crate::find_front_matter_in_mdast::find_front_matter_in_mdast; use crate::find_table_of_contents_in_mdast::find_table_of_contents_in_mdast; +use crate::generate_robot::generate_robots; +use crate::generate_sitemap::generate_sitemap; use crate::string_to_mdast::string_to_mdast; fn render_document<'render>( @@ -259,7 +265,7 @@ pub async fn build_project( let content_document_reference_collection_dashmap: DashMap = Default::default(); let content_document_basename_by_id_arc = Arc::new(content_document_basename_by_id); - let content_document_by_basename_arc = Arc::new(content_document_by_basename); + let content_document_by_basename_arc = Arc::new(content_document_by_basename.clone()); let content_document_collections_ranked_arc = Arc::new(content_document_collections_ranked); let content_document_linker = ContentDocumentLinker { content_document_basename_by_id: content_document_basename_by_id_arc.clone(), @@ -323,6 +329,54 @@ pub async fn build_project( } }); + let config_path = match env::var("CONFIG_FILE") { + Ok(path) => path, + Err(_) => "./config.toml".to_string(), + }; + + info!("Parsing configuration file"); + + match parse_configuration_file(&config_path) { + Ok(configuration) => { + if configuration.sitemap { + info!("Building Sitemap"); + + match generate_sitemap( + &asset_path_renderer.base_path, + content_document_by_basename.values(), + ) { + Ok(sitemap) => { + if let Err(err) = memory_filesystem + .set_file_contents_sync(&Path::new("sitemap.xml"), &sitemap) + { + error_collection.register_error(config_path.clone(), err); + } + } + Err(err) => { + error_collection.register_error("sitemap.xml".to_string(), err); + } + } + } + + if configuration.robots { + info!("Building Robots"); + match generate_robots(asset_path_renderer.base_path) { + Ok(robots) => { + if let Err(err) = memory_filesystem + .set_file_contents_sync(&Path::new("robots.txt"), &robots) + { + error_collection.register_error(config_path, err); + } + } + Err(err) => error_collection.register_error("robots.txt".to_string(), err), + }; + } + } + Err(err) => warn!( + "Could not parse configuraton file {config_path}: {err}. Make sure it exists and is a valid TOML document" + ), + }; + if error_collection.is_empty() { Ok(BuildProjectResultStub { esbuild_metafile, diff --git a/src/cmd/serve/mod.rs b/src/cmd/serve/mod.rs index 419ef3a..5d162b8 100644 --- a/src/cmd/serve/mod.rs +++ b/src/cmd/serve/mod.rs @@ -10,9 +10,9 @@ use actix_web::App; use actix_web::HttpServer; use actix_web::web::Data; use anyhow::Result; -use indoc::formatdoc; use async_trait::async_trait; use clap::Parser; +use indoc::formatdoc; use log::info; use crate::app_dir_desktop_entry::AppDirDesktopEntry; diff --git a/src/configuration.rs b/src/configuration.rs new file mode 100644 index 0000000..16db302 --- /dev/null +++ b/src/configuration.rs @@ -0,0 +1,19 @@ +use std::fs; + +use anyhow::{Error, Result}; +use serde::Deserialize; + +#[derive(Deserialize)] +pub struct Configuration { + #[serde(default)] + pub sitemap: bool, + #[serde(default)] + pub robots: bool, +} + +pub fn parse_configuration_file(config_path: &String) -> Result { + let configuration_file: String = fs::read_to_string(config_path)?; + let config: Configuration = toml::from_str(&configuration_file)?; + + Ok(config) +} diff --git a/src/generate_robot.rs b/src/generate_robot.rs new file mode 100644 index 0000000..51472f0 --- /dev/null +++ b/src/generate_robot.rs @@ -0,0 +1,20 @@ +use std::str::FromStr; + +use anyhow::{Error, Result}; +use robots_txt::Robots; +use url::Url; + +pub fn generate_robots(base_url: String) -> Result { + let base_url = Url::from_str(&base_url)?.to_string(); + + let robots = Robots::builder() + .start_section("*") + .allow("/") + .disallow("") + .crawl_delay(3.0) + .sitemap(Url::from_str(&format!("{base_url}sitemap.xml"))?) + .end_section() + .build(); + + Ok(robots.to_string()) +} diff --git a/src/generate_sitemap.rs b/src/generate_sitemap.rs new file mode 100644 index 0000000..abc0e39 --- /dev/null +++ b/src/generate_sitemap.rs @@ -0,0 +1,55 @@ +use anyhow::{Error, Result}; +use chrono::Utc; +use sitemap_rs::url::Url; +use sitemap_rs::url_set::UrlSet; + +use crate::content_document_basename::ContentDocumentBasename; +use crate::content_document_reference::ContentDocumentReference; + +pub fn generate_sitemap( + base_url: &String, + content_document_by_basename: std::collections::hash_map::Values< + '_, + ContentDocumentBasename, + ContentDocumentReference, + >, +) -> Result { + let last_modified = Utc::now().fixed_offset(); + let mut urls: Vec = vec![Url::new( + base_url.clone(), + Some(last_modified), + None, + Some(0.8), + None, + None, + None, + )?]; + + for reference in content_document_by_basename { + let mut page_path = reference + .basename_path + .to_string_lossy() + .into_owned() + .replace("index", ""); + + if page_path != "" { + page_path = format!("{base_url}{page_path}"); + + urls.push(Url::new( + page_path, + Some(last_modified), + None, + Some(0.5), + None, + None, + None, + )?); + } + } + + let url_set: UrlSet = UrlSet::new(urls)?; + let mut buf: Vec = Vec::::new(); + url_set.write(&mut buf).unwrap(); + + Ok(String::from_utf8(buf)?) +} diff --git a/src/lib.rs b/src/lib.rs index 8d132f1..b1601b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ mod build_prompt_document_controller_params; mod build_timer; pub mod cmd; mod compile_shortcodes; +mod configuration; mod content_document; mod content_document_basename; mod content_document_collection; @@ -39,6 +40,8 @@ mod find_front_matter_in_mdast; mod find_table_of_contents_in_mdast; mod find_text_content_in_mdast; mod flexible_datetime; +mod generate_robot; +mod generate_sitemap; mod holder; mod is_external_link; mod is_valid_desktop_entry_string; From 689e6d49a4c975136556d991d62d5f660ebdb6cc Mon Sep 17 00:00:00 2001 From: Propfend Date: Tue, 11 Nov 2025 14:15:09 +0000 Subject: [PATCH 2/3] only sitemap is generated, it is behind a flag. Watch and serve commands generate that as well. --- Cargo.lock | 11 ---- Cargo.toml | 1 - src/build_project/build_project_params.rs | 1 + src/build_project/mod.rs | 65 +++++-------------- src/cmd/make/static_pages.rs | 4 ++ src/cmd/serve/mod.rs | 4 ++ src/cmd/watch/mod.rs | 4 ++ .../filesystem_http_route_index_builder.rs | 4 +- src/cmd/watch/service/project_builder.rs | 2 + src/configuration.rs | 19 ------ src/filesystem_http_route_index.rs | 4 +- src/generate_robot.rs | 20 ------ src/generate_sitemap.rs | 2 +- src/lib.rs | 2 - src/search_index.rs | 1 + 15 files changed, 39 insertions(+), 105 deletions(-) delete mode 100644 src/configuration.rs delete mode 100644 src/generate_robot.rs diff --git a/Cargo.lock b/Cargo.lock index 3c00b03..2a8871e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2157,7 +2157,6 @@ dependencies = [ "petgraph", "poet-rhai", "rayon", - "robots_txt", "schemars", "serde", "serde_json", @@ -2440,16 +2439,6 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" -[[package]] -name = "robots_txt" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8e523889c75c35b3a018da4dbe311d9b248db161786624141b10859800fa0d" -dependencies = [ - "unicase", - "url", -] - [[package]] name = "rust-stemmers" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index b789334..7d79197 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,6 @@ petgraph = { version = "0.8.2", features = ["serde", "serde_derive"] } rayon = "1.11.0" # rhai = { version = "1.22.2", features = ["no_closure", "serde", "serde_json", "sync"] } rhai = { package="poet-rhai", version="0.2.0", features = ["internals", "metadata", "no_closure", "serde", "serde_json", "sync"] } -robots_txt = "0.7.0" schemars = "1.0.4" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.145" diff --git a/src/build_project/build_project_params.rs b/src/build_project/build_project_params.rs index 5bfa432..d81bb57 100644 --- a/src/build_project/build_project_params.rs +++ b/src/build_project/build_project_params.rs @@ -12,5 +12,6 @@ pub struct BuildProjectParams { pub generated_page_base_path: String, pub is_watching: bool, pub rhai_template_renderer: RhaiTemplateRenderer, + pub generate_sitemap: bool, pub source_filesystem: Arc, } diff --git a/src/build_project/mod.rs b/src/build_project/mod.rs index b98dad7..aae16e6 100644 --- a/src/build_project/mod.rs +++ b/src/build_project/mod.rs @@ -7,7 +7,6 @@ mod content_document_rendering_context; use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; -use std::env; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; @@ -17,7 +16,6 @@ use anyhow::anyhow; use dashmap::DashMap; use log::debug; use log::info; -use log::warn; use rayon::prelude::*; use rhai::Dynamic; use syntect::parsing::SyntaxSet; @@ -27,7 +25,6 @@ use crate::build_project::build_project_params::BuildProjectParams; use crate::build_project::build_project_result_stub::BuildProjectResultStub; use crate::build_project::content_document_rendering_context::ContentDocumentRenderingContext; use crate::build_timer::BuildTimer; -use crate::configuration::parse_configuration_file; use crate::content_document::ContentDocument; use crate::content_document_basename::ContentDocumentBasename; use crate::content_document_collection::ContentDocumentCollection; @@ -44,8 +41,7 @@ use crate::filesystem::Filesystem as _; use crate::filesystem::memory::Memory; use crate::find_front_matter_in_mdast::find_front_matter_in_mdast; use crate::find_table_of_contents_in_mdast::find_table_of_contents_in_mdast; -use crate::generate_robot::generate_robots; -use crate::generate_sitemap::generate_sitemap; +use crate::generate_sitemap::create_sitemap; use crate::string_to_mdast::string_to_mdast; fn render_document<'render>( @@ -112,6 +108,7 @@ pub async fn build_project( generated_page_base_path, is_watching, rhai_template_renderer, + generate_sitemap, source_filesystem, }: BuildProjectParams, ) -> Result { @@ -329,53 +326,25 @@ pub async fn build_project( } }); - let config_path = match env::var("CONFIG_FILE") { - Ok(path) => path, - Err(_) => "./config.toml".to_string(), - }; - - info!("Parsing configuration file"); - - match parse_configuration_file(&config_path) { - Ok(configuration) => { - if configuration.sitemap { - info!("Building Sitemap"); - - match generate_sitemap( - &asset_path_renderer.base_path, - content_document_by_basename.values(), - ) { - Ok(sitemap) => { - if let Err(err) = memory_filesystem - .set_file_contents_sync(&Path::new("sitemap.xml"), &sitemap) - { - error_collection.register_error(config_path.clone(), err); - } - } - Err(err) => { - error_collection.register_error("sitemap.xml".to_string(), err); - } + if generate_sitemap { + info!("Building sitemap"); + + match create_sitemap( + &asset_path_renderer.base_path, + content_document_by_basename.values(), + ) { + Ok(sitemap) => { + if let Err(err) = + memory_filesystem.set_file_contents_sync(&Path::new("sitemap.xml"), &sitemap) + { + error_collection.register_error("sitemap.xml".to_string(), err); } } - - if configuration.robots { - info!("Building Robots"); - match generate_robots(asset_path_renderer.base_path) { - Ok(robots) => { - if let Err(err) = memory_filesystem - .set_file_contents_sync(&Path::new("robots.txt"), &robots) - { - error_collection.register_error(config_path, err); - } - } - Err(err) => error_collection.register_error("robots.txt".to_string(), err), - }; + Err(err) => { + error_collection.register_error("sitemap.xml".to_string(), err); } } - Err(err) => warn!( - "Could not parse configuraton file {config_path}: {err}. Make sure it exists and is a valid TOML document" - ), - }; + } if error_collection.is_empty() { Ok(BuildProjectResultStub { diff --git a/src/cmd/make/static_pages.rs b/src/cmd/make/static_pages.rs index ce9dba6..cf24c37 100644 --- a/src/cmd/make/static_pages.rs +++ b/src/cmd/make/static_pages.rs @@ -27,6 +27,9 @@ pub struct StaticPages { #[arg(long)] public_path: String, + #[arg(long, default_value = "false")] + sitemap: bool, + #[arg(value_parser = validate_is_directory)] source_directory: PathBuf, } @@ -55,6 +58,7 @@ impl Handler for StaticPages { generated_page_base_path: self.public_path.clone(), is_watching: false, rhai_template_renderer, + generate_sitemap: self.sitemap, source_filesystem, }) .await?; diff --git a/src/cmd/serve/mod.rs b/src/cmd/serve/mod.rs index 5d162b8..9206f16 100644 --- a/src/cmd/serve/mod.rs +++ b/src/cmd/serve/mod.rs @@ -60,6 +60,9 @@ pub struct Serve { #[arg(long)] public_path: String, + + #[arg(long, default_value = "false")] + sitemap: bool, } impl BuildsProject for Serve { @@ -114,6 +117,7 @@ impl Handler for Serve { generated_page_base_path: self.public_path.clone(), is_watching: false, rhai_template_renderer: rhai_template_renderer.clone(), + generate_sitemap: self.sitemap, source_filesystem: source_filesystem.clone(), }) .await? diff --git a/src/cmd/watch/mod.rs b/src/cmd/watch/mod.rs index 989e675..222d638 100644 --- a/src/cmd/watch/mod.rs +++ b/src/cmd/watch/mod.rs @@ -47,6 +47,9 @@ pub struct Watch { #[arg(value_parser = validate_is_directory)] source_directory: PathBuf, + + #[arg(long, default_value = "false")] + sitemap: bool, } impl BuildsProject for Watch { @@ -134,6 +137,7 @@ impl Handler for Watch { on_content_file_changed, rhai_template_renderer_holder: rhai_template_renderer_holder.clone(), session_manager, + generate_sitemap: self.sitemap, source_filesystem: source_filesystem.clone(), })); diff --git a/src/cmd/watch/service/filesystem_http_route_index_builder.rs b/src/cmd/watch/service/filesystem_http_route_index_builder.rs index 2c374a6..7909536 100644 --- a/src/cmd/watch/service/filesystem_http_route_index_builder.rs +++ b/src/cmd/watch/service/filesystem_http_route_index_builder.rs @@ -20,7 +20,7 @@ pub struct FilesystemHttpRouteIndexBuilder { } impl FilesystemHttpRouteIndexBuilder { - async fn do_build_filesystem_htto_route_index(&self) { + async fn do_build_filesystem_http_route_index(&self) { let BuildProjectResult { memory_filesystem, .. } = match self.build_project_result_holder.get().await { @@ -51,7 +51,7 @@ impl FilesystemHttpRouteIndexBuilder { impl Service for FilesystemHttpRouteIndexBuilder { async fn run(&self) -> Result<()> { loop { - self.do_build_filesystem_htto_route_index().await; + self.do_build_filesystem_http_route_index().await; tokio::select! { _ = self.build_project_result_holder.update_notifier.notified() => continue, diff --git a/src/cmd/watch/service/project_builder.rs b/src/cmd/watch/service/project_builder.rs index 134b102..cb005e6 100644 --- a/src/cmd/watch/service/project_builder.rs +++ b/src/cmd/watch/service/project_builder.rs @@ -31,6 +31,7 @@ pub struct ProjectBuilder { pub on_content_file_changed: Arc, pub rhai_template_renderer_holder: RhaiTemplateRendererHolder, pub session_manager: SessionManager, + pub generate_sitemap: bool, pub source_filesystem: Arc, } @@ -60,6 +61,7 @@ impl ProjectBuilder { generated_page_base_path: self.generated_page_base_path.clone(), is_watching: true, rhai_template_renderer, + generate_sitemap: self.generate_sitemap, source_filesystem: self.source_filesystem.clone(), }) .await diff --git a/src/configuration.rs b/src/configuration.rs deleted file mode 100644 index 16db302..0000000 --- a/src/configuration.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::fs; - -use anyhow::{Error, Result}; -use serde::Deserialize; - -#[derive(Deserialize)] -pub struct Configuration { - #[serde(default)] - pub sitemap: bool, - #[serde(default)] - pub robots: bool, -} - -pub fn parse_configuration_file(config_path: &String) -> Result { - let configuration_file: String = fs::read_to_string(config_path)?; - let config: Configuration = toml::from_str(&configuration_file)?; - - Ok(config) -} diff --git a/src/filesystem_http_route_index.rs b/src/filesystem_http_route_index.rs index 3cd775e..ce32eb8 100644 --- a/src/filesystem_http_route_index.rs +++ b/src/filesystem_http_route_index.rs @@ -29,7 +29,7 @@ impl FilesystemHttpRouteIndex { Ok(this) } - pub fn register_file(&self, file: FileEntry) -> Result<()> { + fn register_file(&self, file: FileEntry) -> Result<()> { let filename = file.relative_path.to_string_lossy().to_string(); if filename.ends_with("/index.html") { @@ -43,6 +43,8 @@ impl FilesystemHttpRouteIndex { } else if filename == "index.html" { self.routes.insert("".to_string(), file.clone()); self.routes.insert("index.html".to_string(), file.clone()); + } else if filename == "sitemap.xml" { + self.routes.insert(filename, file.clone()); } else { return Err(anyhow!("Unexpected filename: '{filename}'")); } diff --git a/src/generate_robot.rs b/src/generate_robot.rs deleted file mode 100644 index 51472f0..0000000 --- a/src/generate_robot.rs +++ /dev/null @@ -1,20 +0,0 @@ -use std::str::FromStr; - -use anyhow::{Error, Result}; -use robots_txt::Robots; -use url::Url; - -pub fn generate_robots(base_url: String) -> Result { - let base_url = Url::from_str(&base_url)?.to_string(); - - let robots = Robots::builder() - .start_section("*") - .allow("/") - .disallow("") - .crawl_delay(3.0) - .sitemap(Url::from_str(&format!("{base_url}sitemap.xml"))?) - .end_section() - .build(); - - Ok(robots.to_string()) -} diff --git a/src/generate_sitemap.rs b/src/generate_sitemap.rs index abc0e39..ef2a4e4 100644 --- a/src/generate_sitemap.rs +++ b/src/generate_sitemap.rs @@ -6,7 +6,7 @@ use sitemap_rs::url_set::UrlSet; use crate::content_document_basename::ContentDocumentBasename; use crate::content_document_reference::ContentDocumentReference; -pub fn generate_sitemap( +pub fn create_sitemap( base_url: &String, content_document_by_basename: std::collections::hash_map::Values< '_, diff --git a/src/lib.rs b/src/lib.rs index b1601b0..0510007 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,6 @@ mod build_prompt_document_controller_params; mod build_timer; pub mod cmd; mod compile_shortcodes; -mod configuration; mod content_document; mod content_document_basename; mod content_document_collection; @@ -40,7 +39,6 @@ mod find_front_matter_in_mdast; mod find_table_of_contents_in_mdast; mod find_text_content_in_mdast; mod flexible_datetime; -mod generate_robot; mod generate_sitemap; mod holder; mod is_external_link; diff --git a/src/search_index.rs b/src/search_index.rs index f477f2e..48a2ad8 100644 --- a/src/search_index.rs +++ b/src/search_index.rs @@ -119,6 +119,7 @@ mod tests { generated_page_base_path: public_path, is_watching: false, rhai_template_renderer, + generate_sitemap: false, source_filesystem, }) .await From 4c1ab803eb4e0575d5db8949ec2df735d8faf652 Mon Sep 17 00:00:00 2001 From: Propfend Date: Tue, 23 Dec 2025 15:34:23 +0000 Subject: [PATCH 3/3] fix last slash in sitemap urls --- Cargo.lock | 10 ++++++---- poet/Cargo.toml | 1 + poet/src/build_project/build_project_params.rs | 2 +- poet/src/build_project/mod.rs | 4 ++-- poet/src/generate_sitemap.rs | 6 +++++- poet/src/lib.rs | 1 + .../src/component_syntax/eval_tag_stack_node.rs | 6 +++++- rhai_components/src/rhai_call_template_function.rs | 3 ++- 8 files changed, 23 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44f836b..d4e9d43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2690,6 +2690,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "sitemap-rs" version = "0.3.0" @@ -2699,10 +2705,6 @@ dependencies = [ "chrono", "xml-builder", ] -name = "simd-adler32" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" [[package]] name = "sketches-ddsketch" diff --git a/poet/Cargo.toml b/poet/Cargo.toml index afecf4f..95e7783 100644 --- a/poet/Cargo.toml +++ b/poet/Cargo.toml @@ -45,6 +45,7 @@ rhai_components = { path = "../rhai_components", version = "0.5" } schemars = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +sitemap-rs = { workspace = true } slug = { workspace = true } smartstring = { workspace = true } syntect = { workspace = true } diff --git a/poet/src/build_project/build_project_params.rs b/poet/src/build_project/build_project_params.rs index 7f7e808..4cd8aa9 100644 --- a/poet/src/build_project/build_project_params.rs +++ b/poet/src/build_project/build_project_params.rs @@ -10,8 +10,8 @@ pub struct BuildProjectParams { pub asset_path_renderer: AssetPathRenderer, pub esbuild_metafile: Arc, pub generated_page_base_path: String, + pub generate_sitemap: bool, pub is_watching: bool, pub rhai_template_renderer: RhaiTemplateRenderer, - pub generate_sitemap: bool, pub source_filesystem: Arc, } diff --git a/poet/src/build_project/mod.rs b/poet/src/build_project/mod.rs index 7831ff4..3ee1ef3 100644 --- a/poet/src/build_project/mod.rs +++ b/poet/src/build_project/mod.rs @@ -103,7 +103,7 @@ fn render_document<'render>( pub async fn build_project( BuildProjectParams { - asset_path_renderer, + mut asset_path_renderer, esbuild_metafile, generated_page_base_path, is_watching, @@ -330,7 +330,7 @@ pub async fn build_project( info!("Building sitemap"); match create_sitemap( - &asset_path_renderer.base_path, + &mut asset_path_renderer.base_path, content_document_by_basename.values(), ) { Ok(sitemap) => { diff --git a/poet/src/generate_sitemap.rs b/poet/src/generate_sitemap.rs index ef2a4e4..b93b8b8 100644 --- a/poet/src/generate_sitemap.rs +++ b/poet/src/generate_sitemap.rs @@ -7,7 +7,7 @@ use crate::content_document_basename::ContentDocumentBasename; use crate::content_document_reference::ContentDocumentReference; pub fn create_sitemap( - base_url: &String, + base_url: &mut String, content_document_by_basename: std::collections::hash_map::Values< '_, ContentDocumentBasename, @@ -32,6 +32,10 @@ pub fn create_sitemap( .into_owned() .replace("index", ""); + if !base_url.ends_with('/') { + base_url.push_str("/"); + } + if page_path != "" { page_path = format!("{base_url}{page_path}"); diff --git a/poet/src/lib.rs b/poet/src/lib.rs index 01b48d9..4e58749 100644 --- a/poet/src/lib.rs +++ b/poet/src/lib.rs @@ -37,6 +37,7 @@ pub mod find_front_matter_in_mdast; pub mod find_table_of_contents_in_mdast; pub mod find_text_content_in_mdast; pub mod flexible_datetime; +pub mod generate_sitemap; pub mod holder; pub mod is_external_link; pub mod is_valid_desktop_entry_string; diff --git a/rhai_components/src/component_syntax/eval_tag_stack_node.rs b/rhai_components/src/component_syntax/eval_tag_stack_node.rs index 34c9c3e..06f0d18 100644 --- a/rhai_components/src/component_syntax/eval_tag_stack_node.rs +++ b/rhai_components/src/component_syntax/eval_tag_stack_node.rs @@ -111,7 +111,11 @@ pub fn eval_tag_stack_node( Ok(rhai_call_template_function( eval_context.engine(), &opening_tag.tag_name.name, - (context, Dynamic::from_map(props), Dynamic::from(result.to_string())), + ( + context, + Dynamic::from_map(props), + Dynamic::from(result.to_string()), + ), ) .map_err(|err| { EvalAltResult::ErrorRuntime( diff --git a/rhai_components/src/rhai_call_template_function.rs b/rhai_components/src/rhai_call_template_function.rs index c27c702..f90eff1 100644 --- a/rhai_components/src/rhai_call_template_function.rs +++ b/rhai_components/src/rhai_call_template_function.rs @@ -19,7 +19,8 @@ pub fn rhai_call_template_function( let tmp_ast = AST::new([], module); - let result = engine.call_fn::(&mut Scope::new(), &tmp_ast, "template", args)?; + let result = + engine.call_fn::(&mut Scope::new(), &tmp_ast, "template", args)?; Ok(result.into()) }