diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index d42de9d..de35382 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1423,8 +1423,15 @@ struct AuditResult { struct AuditFileResult { file: String, bytes: usize, + /// A file is considered matched if wagl already has a file-memory item with `source:`. matched: bool, matched_id: Option, + /// Current file content hash (normalized). + file_hash: String, + /// Stored item content hash (normalized), if a match was found. + stored_hash: Option, + /// True if a matched file's current content differs from the stored item text. + drifted: bool, } fn collect_memory_files(root: &Path) -> anyhow::Result> { @@ -1450,22 +1457,59 @@ fn collect_memory_files(root: &Path) -> anyhow::Result> { fn audit_files(root: &Path, files: &[PathBuf], items: &[MemoryItem]) -> AuditResult { let mut results = Vec::new(); let mut matched = 0; + + fn audit_norm(s: &str) -> &str { + // Normalize for file-memory matching. + // Treat trailing whitespace/newlines as non-semantic to avoid duplicate imports + // when editors add/remove a final newline. + s.trim_end() + } + + fn blake3_hex(s: &str) -> String { + blake3::hash(s.as_bytes()).to_hex().to_string() + } + for path in files { let content = std::fs::read_to_string(path).unwrap_or_default(); let bytes = content.len(); - let matched_item = items + + let file_hash = blake3_hex(audit_norm(&content)); + + let src_tag = format!("source:{}", path.display()); + let matched_id = items .iter() - .find(|item| item.text == content) + .find(|item| { + item.tags.iter().any(|t| t == "file-memory") + && item.tags.iter().any(|t| t == &src_tag) + }) .map(|item| item.id.clone()); - let is_match = matched_item.is_some(); + + let is_match = matched_id.is_some(); if is_match { matched += 1; } + + let stored_hash = matched_id.as_deref().and_then(|id| { + items + .iter() + .find(|item| item.id == id) + .map(|item| blake3_hex(audit_norm(&item.text))) + }); + + // If we matched by source tag, check whether content drifted (e.g., file edited). + let drifted = stored_hash + .as_deref() + .map(|h| h != file_hash.as_str()) + .unwrap_or(false); + results.push(AuditFileResult { file: path.display().to_string(), bytes, matched: is_match, - matched_id: matched_item, + matched_id, + file_hash, + stored_hash, + drifted, }); } @@ -1489,9 +1533,17 @@ async fn import_missing_files(db: &MemoryDb, audit: &AuditResult) -> anyhow::Res if content.trim().is_empty() { continue; } + + // Include a stable content hash tag to aid future dedupe/debugging. + // (Normalization matches audit behavior: ignore trailing whitespace.) + let content_hash = blake3::hash(content.trim_end().as_bytes()) + .to_hex() + .to_string(); + let tags = vec![ "file-memory".to_string(), format!("source:{}", path.display()), + format!("contenthash:blake3:{}", content_hash), ]; let item = MemoryItem { id: uuid::Uuid::new_v4().to_string(),