Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 56 additions & 4 deletions crates/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1423,8 +1423,15 @@ struct AuditResult {
struct AuditFileResult {
file: String,
bytes: usize,
/// A file is considered matched if wagl already has a file-memory item with `source:<path>`.
matched: bool,
matched_id: Option<String>,
/// Current file content hash (normalized).
file_hash: String,
/// Stored item content hash (normalized), if a match was found.
stored_hash: Option<String>,
/// True if a matched file's current content differs from the stored item text.
drifted: bool,
}

fn collect_memory_files(root: &Path) -> anyhow::Result<Vec<PathBuf>> {
Expand All @@ -1450,22 +1457,59 @@ fn collect_memory_files(root: &Path) -> anyhow::Result<Vec<PathBuf>> {
fn audit_files(root: &Path, files: &[PathBuf], items: &[MemoryItem]) -> AuditResult {
let mut results = Vec::new();
let mut matched = 0;

fn audit_norm(s: &str) -> &str {
// Normalize for file-memory matching.
// Treat trailing whitespace/newlines as non-semantic to avoid duplicate imports
// when editors add/remove a final newline.
s.trim_end()
}

fn blake3_hex(s: &str) -> String {
blake3::hash(s.as_bytes()).to_hex().to_string()
}

for path in files {
let content = std::fs::read_to_string(path).unwrap_or_default();
let bytes = content.len();
let matched_item = items

let file_hash = blake3_hex(audit_norm(&content));

let src_tag = format!("source:{}", path.display());
let matched_id = items
.iter()
.find(|item| item.text == content)
.find(|item| {
item.tags.iter().any(|t| t == "file-memory")
&& item.tags.iter().any(|t| t == &src_tag)
})
.map(|item| item.id.clone());
let is_match = matched_item.is_some();

let is_match = matched_id.is_some();
if is_match {
matched += 1;
}

let stored_hash = matched_id.as_deref().and_then(|id| {
items
.iter()
.find(|item| item.id == id)
.map(|item| blake3_hex(audit_norm(&item.text)))
});

// If we matched by source tag, check whether content drifted (e.g., file edited).
let drifted = stored_hash
.as_deref()
.map(|h| h != file_hash.as_str())
.unwrap_or(false);

results.push(AuditFileResult {
file: path.display().to_string(),
bytes,
matched: is_match,
matched_id: matched_item,
matched_id,
file_hash,
stored_hash,
drifted,
});
}

Expand All @@ -1489,9 +1533,17 @@ async fn import_missing_files(db: &MemoryDb, audit: &AuditResult) -> anyhow::Res
if content.trim().is_empty() {
continue;
}

// Include a stable content hash tag to aid future dedupe/debugging.
// (Normalization matches audit behavior: ignore trailing whitespace.)
let content_hash = blake3::hash(content.trim_end().as_bytes())
.to_hex()
.to_string();

let tags = vec![
"file-memory".to_string(),
format!("source:{}", path.display()),
format!("contenthash:blake3:{}", content_hash),
];
let item = MemoryItem {
id: uuid::Uuid::new_v4().to_string(),
Expand Down