diff --git a/.decapod/generated/artifacts/provenance/artifact_manifest.json b/.decapod/generated/artifacts/provenance/artifact_manifest.json index 2af79c2f..e5fcbce4 100644 --- a/.decapod/generated/artifacts/provenance/artifact_manifest.json +++ b/.decapod/generated/artifacts/provenance/artifact_manifest.json @@ -2,7 +2,7 @@ "artifacts": [ { "path": "README.md", - "sha256": "50c89e477e08d9f4d96f06b5a4961c7119856e6481f4102b299fc63fd1f057a6" + "sha256": "661b34c00d5be075a55d08f8e200d4b0896769f52d073c8e65c7c5ab14c9155e" } ], "kind": "artifact_manifest", diff --git a/CHANGELOG.md b/CHANGELOG.md index cb4baa91..562e391a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- tolerate empty capsule files in release check and manifest schema/interface validation + ## [0.46.3](https://github.com/DecapodLabs/decapod/compare/v0.46.2...v0.46.3) - 2026-03-01 ### Added diff --git a/README.md b/README.md index ad2f7fe2..4f402819 100644 --- a/README.md +++ b/README.md @@ -88,11 +88,15 @@ Override any constitution default with plain English in `.decapod/OVERRIDE.md`. ## Why this exists -AI coding agents are extraordinarily good at generating code. They are extraordinarily bad at knowing when to stop, what not to touch, and whether the thing they built is the thing you asked for. +Coding agents suck. But it's not their fault. -The failure mode isn't "bad code." It's unaccountable code: no intent recorded, no boundaries enforced, no proof that completion criteria were met. You get a PR that compiles. You have no idea if it's right. +You can't solve the world inside the agent. Like any serious technology, agents need infrastructure — a way to interface with the host machine (files, repos, terminals, policies) in a way that's intelligent, bounded, and provable. -Decapod closes that gap. Agents call it mid-run to lock intent, enforce boundaries, and prove completion. It shapes what goes into inference without doing inference itself. +The Unix philosophy ("do one thing well") breaks down the moment the "one thing" becomes: reason over ambiguous intent, plan work, write code, validate it, manage state, coordinate tools, and ship safely. We expect agents to generate great code. They mostly can. But the gaps aren't something you patch by making the agent fatter. The gaps exist because the agent isn't the right place for control-plane responsibilities. + +Right now, agent makers keep stuffing more into the agent: task management, memory, rules, planning, codegen, toolchains, browsers — until it's mediocre at everything. Agents shouldn't be responsible for control-plane work. They shouldn't be your TODO database. They shouldn't be the place you encode a team's behavioral expectations. They shouldn't be the system of record for "what got done" or "what's allowed." That belongs in infrastructure. + +Decapod is a repo-native governance kernel that agents call into — like a device driver for agent work. It makes intent explicit, boundaries explicit, and completion provable. The agent stays the brain. Decapod becomes the control plane that turns agent output into something shippable. State is local and durable in `.decapod/`. Context, decisions, and traces persist across sessions and stay retrievable over time. Nothing hides. Nothing phones home. @@ -125,7 +129,7 @@ AI Agent(s) <----> Decapod <----> Repository + Policy - **Parallel-safe.** Multiple agents, one repo, no collisions. - **Proof-gated completion.** `VERIFIED` requires passing proof-plan results, not narrative. - **Fully auditable.** Every decision, trace, and proof artifact lives in `.decapod/` as plain files. -- **Context internalization.** Turn long documents into mountable, verifiable context adapters so agents stop re-ingesting the same 50-page spec every session. +- **Context internalization.** Turn long documents into mountable, verifiable context adapters with explicit source hashes, determinism labels, session-scoped attach leases, and explicit detach so agents stop re-ingesting the same 50-page spec every session. The deep surface area — interfaces, capsules, eval kernel, knowledge promotions, obligation graphs — lives in the embedded constitution. Ask your agent to explore it. diff --git a/constitution/core/INTERFACES.md b/constitution/core/INTERFACES.md index c9e47f51..789a116f 100644 --- a/constitution/core/INTERFACES.md +++ b/constitution/core/INTERFACES.md @@ -27,6 +27,7 @@ This registry defines the canonical binding interface surfaces. | `interfaces/DEMANDS_SCHEMA.md` | User-demand schema + precedence rules | Yes | | `interfaces/RISK_POLICY_GATE.md` | Deterministic PR risk-policy gate semantics | Yes | | `interfaces/INTERNALIZATION_SCHEMA.md` | Internalized context artifact schema + lifecycle contract | Yes | +| `interfaces/jsonschema/internalization/*.json` | Stable JSON Schemas for internalization manifests and CLI results | Yes | | `interfaces/AGENT_CONTEXT_PACK.md` | Agent context-pack layout and mutation contract | Yes | | `interfaces/PROJECT_SPECS.md` | Canonical local `specs/*.md` contract and constitution mapping | Yes | @@ -43,6 +44,12 @@ This registry defines the canonical binding interface surfaces. - Agent memory/context pack semantics: `interfaces/AGENT_CONTEXT_PACK.md` - Canonical local project specs contract: `interfaces/PROJECT_SPECS.md` - Internalized context artifact lifecycle: `interfaces/INTERNALIZATION_SCHEMA.md` +- Internalization JSON schemas: +- `interfaces/jsonschema/internalization/InternalizationManifest.schema.json` +- `interfaces/jsonschema/internalization/InternalizationCreateResult.schema.json` +- `interfaces/jsonschema/internalization/InternalizationAttachResult.schema.json` +- `interfaces/jsonschema/internalization/InternalizationDetachResult.schema.json` +- `interfaces/jsonschema/internalization/InternalizationInspectResult.schema.json` --- diff --git a/constitution/interfaces/CLAIMS.md b/constitution/interfaces/CLAIMS.md index 804febf9..2f8df6e5 100644 --- a/constitution/interfaces/CLAIMS.md +++ b/constitution/interfaces/CLAIMS.md @@ -43,6 +43,8 @@ Columns: | claim.foundation.daemonless_repo_native_canonicality | Decapod remains daemonless and repo-native for promotion-relevant state and evidence. | `specs/SYSTEM.md` | partially_enforced | `decapod validate` + repo-native manifest/provenance gates | Operationally enforced in current control plane; hardening continues through gate expansion. | | claim.foundation.proof_gated_promotion | Promotion-relevant outcomes are invalid without executable proof and machine-verifiable artifacts. | `specs/SYSTEM.md` | partially_enforced | `decapod validate` + workspace publish proof gates | Publish paths enforce this today; broader policy coupling is still evolving. | | claim.doc.readme_human_only | README is human-facing product documentation; agent-operational rules must live in entrypoint and constitution surfaces. | `core/DECAPOD.md` | not_enforced | planned: docs-surface partition gate | Prevents README from becoming implicit agent policy. | +| claim.internalize.explicit_attach_lease | Internalized context may affect inference only through an explicit session-scoped attach lease; ambient reuse is forbidden. | `interfaces/INTERNALIZATION_SCHEMA.md` | partially_enforced | `decapod internalize attach` + `decapod internalize detach` + `decapod validate` internalization gate | Lease files and provenance logs are enforced; downstream inference callers must honor the contract. | +| claim.internalize.best_effort_not_replayable | Best-effort internalizer profiles must never claim replayability and must record binary/runtime fingerprints. | `interfaces/INTERNALIZATION_SCHEMA.md` | enforced | `decapod internalize create` + `decapod internalize inspect` + `decapod validate` internalization gate | Prevents fake reproducibility claims for non-deterministic profiles. | | claim.agent.invocation_checkpoints_required | Agents must call Decapod before plan commitment, before mutation, and after mutation for proof. | `interfaces/CONTROL_PLANE.md` | partially_enforced | `decapod todo` ownership records + `decapod validate` + required tests | Enforcement is partly procedural until explicit checkpoint trace gate exists. | | claim.agent.no_capability_hallucination | Agents must not claim capabilities absent from the Decapod command surface. | `interfaces/CONTROL_PLANE.md` | not_enforced | planned: capability-claim consistency gate | Missing surfaces must be reported as gaps, not fabricated behavior. | | claim.proof.executable_check | A "proof" is an executable check that can fail loudly (tests, linters, validators, etc). No new DSL. | `core/PLUGINS.md` | enforced | `decapod validate` | Definition is normative; proof registry (Epoch 1) will formalize. | diff --git a/constitution/interfaces/INTERNALIZATION_SCHEMA.md b/constitution/interfaces/INTERNALIZATION_SCHEMA.md index c029226c..f9084333 100644 --- a/constitution/interfaces/INTERNALIZATION_SCHEMA.md +++ b/constitution/interfaces/INTERNALIZATION_SCHEMA.md @@ -3,169 +3,143 @@ **Authority:** interface (machine-readable contract) **Layer:** Interfaces **Binding:** Yes -**Scope:** schema, invariants, and lifecycle for internalized context artifacts -**Non-goals:** internalizer implementation details, model training +**Scope:** schema, invariants, CLI lifecycle, and proof gates for internalized context artifacts +**Non-goals:** model training, hidden memory, background services --- ## 1. Purpose -Internalized context artifacts let agents convert long documents into mountable, verifiable context adapters. This eliminates redundant long-context ingestion across sessions while maintaining full auditability. +Internalized context artifacts let agents reuse long-document context without re-sending the full document on every call. -An internalization is **not training**. It is a governed artifact produced by a pluggable external tool (an "internalizer profile") and managed by Decapod's artifact lifecycle. +An internalization is **not training** and **not hidden state**. It is a governed repo-local artifact produced on demand by a pluggable profile tool, bound to exact source bytes, and attachable only through an explicit lease-bearing mount step. --- -## 2. Artifact Layout +## 2. Capability Decision + Scope + +### Added + +One capability family: `internalize.*` + +- `internalize.create` creates or reuses a content-addressed internalization artifact. +- `internalize.attach` creates a session-scoped mount lease with explicit expiry. +- `internalize.detach` revokes the mount explicitly before lease expiry. +- `internalize.inspect` proves exact bindings, integrity status, and determinism labeling. + +### Not Added + +- No background daemon or auto-mounting. +- No silent GPU dependency. +- No implicit session reuse across tools. +- No claim that best-effort profiles are replayable. +- No general-purpose ambient memory layer. + +--- + +## 3. Artifact Layout ```text .decapod/generated/artifacts/internalizations// - manifest.json # InternalizationManifest (see schema below) - adapter.bin # adapter payload (or pointer) + manifest.json + adapter.bin ``` ---- +Session-scoped active mount leases are stored at: -## 3. InternalizationManifest Schema (v1.0.0) - -```json -{ - "schema_version": "1.0.0", - "id": "", - "source_hash": "", - "source_path": "", - "extraction_method": "", - "chunking_params": {}, - "base_model_id": "", - "internalizer_profile": "", - "internalizer_version": "", - "adapter_format": "", - "created_at": "", - "ttl_seconds": 0, - "expires_at": "", - "provenance": [ - { - "op": "internalize.create", - "timestamp": "", - "actor": "", - "inputs_hash": "" - } - ], - "replay_recipe": { - "command": "decapod", - "args": ["internalize", "create", "--source", "..."], - "env": {} - }, - "adapter_hash": "", - "adapter_path": "adapter.bin", - "capabilities_contract": { - "allowed_scopes": ["qa", "summarization"], - "permitted_tools": ["*"], - "allow_code_gen": false - }, - "risk_tier": { - "creation": "compute-risky", - "attach": "behavior-changing", - "inspect": "read-only" - } -} +```text +.decapod/generated/sessions//internalize_mounts/ + mount_.json ``` --- -## 4. Result Schemas +## 4. Manifest Contract -### InternalizationCreateResult +Schema version: `1.2.0` -```json -{ - "schema_version": "1.0.0", - "success": true, - "artifact_id": "", - "artifact_path": "", - "manifest": { "...InternalizationManifest..." }, - "source_hash": "", - "adapter_hash": "" -} -``` +Required fields include: -### InternalizationAttachResult - -```json -{ - "schema_version": "1.0.0", - "success": true, - "artifact_id": "", - "session_id": "", - "attached_at": "", - "expires_at": "", - "capabilities_contract": { "...CapabilitiesContract..." }, - "risk_classification": "behavior-changing", - "provenance_entry": { "...ProvenanceEntry..." } -} -``` +- `source_hash` +- `base_model_id` +- `internalizer_profile` +- `internalizer_version` +- `adapter_hash` +- `determinism_class` +- `binary_hash` +- `runtime_fingerprint` +- `replay_recipe` +- `capabilities_contract` -### InternalizationInspectResult - -```json -{ - "schema_version": "1.0.0", - "artifact_id": "", - "manifest": { "...InternalizationManifest..." }, - "integrity": { - "source_hash_valid": true, - "adapter_hash_valid": true, - "manifest_consistent": true, - "expired": false - }, - "status": "valid" -} -``` +Determinism rules: ---- +- `determinism_class` is `deterministic` or `best_effort` +- only deterministic profiles may claim `replay_recipe.mode=replayable` +- best-effort profiles must be `non_replayable` +- best-effort manifests must carry `binary_hash` and `runtime_fingerprint` -## 5. Invariants +Capabilities rules: -1. **Source binding:** `source_hash` must be the SHA-256 of the document at creation time. No silent changes. -2. **Base model binding:** `base_model_id` must be recorded; adapters are model-specific. -3. **Reproducibility:** `internalizer_profile` + `internalizer_version` + `replay_recipe` must be sufficient to reproduce the artifact. -4. **Explicit attach:** Agents cannot reference an internalization without a logged `internalize.attach` operation. -5. **TTL enforcement:** If `expires_at` is set and in the past, `attach` MUST fail. -6. **Adapter integrity:** `adapter_hash` must match the SHA-256 of the payload file at attach time. -7. **Provenance logging:** Every `attach` operation appends a provenance entry to the session directory. +- default scope is `qa` +- `allow_code_gen=false` by default +- attach must enforce `permitted_tools` --- -## 6. Risk Classification +## 5. CLI Surface -| Operation | Risk Level | Rationale | -|-----------|-----------|-----------| -| `create` | compute-risky | Invokes external tool; no repo mutation beyond artifact dir | -| `attach` | behavior-changing | Affects inference behavior; logged as dependency | -| `inspect` | read-only | No side effects | +### `decapod internalize create` ---- +Creates or reuses a content-addressed artifact from: +- `--source` +- `--model` +- `--profile` +- `--ttl` +- `--scope` -## 7. Internalizer Profiles +### `decapod internalize attach` -Profiles are pluggable external tools stored in `.decapod/generated/profiles/internalizers/.json`. +Creates a session-scoped mount lease from: +- `--id` +- `--session` +- `--tool` +- `--lease-seconds` -Profile schema: -```json -{ - "name": "", - "version": "", - "executable": "", - "default_params": {}, - "adapter_format": "" -} -``` +### `decapod internalize detach` + +Revokes the session-scoped mount lease: +- `--id` +- `--session` + +### `decapod internalize inspect` + +Proves artifact status: +- `valid` +- `best-effort` +- `expired` +- `integrity-failed` + +--- + +## 6. Provable Acceptance Criteria + +An internalization is provable only if: -The built-in `noop` profile produces an empty adapter for pipeline testing without GPU dependencies. +1. `source_hash` binds to exact source bytes. +2. `base_model_id` is recorded. +3. `adapter_hash` matches the adapter payload. +4. replayability claims match determinism policy. +5. use requires a successful attach lease. +6. expired artifacts cannot be attached. +7. expired mount leases fail validation if left active. +8. the attach tool is allowed by `permitted_tools`. --- -## Links +## 7. Stable JSON Schemas -- `core/PLUGINS.md` - Subsystem registry -- `core/INTERFACES.md` - Interface contracts registry +- `constitution/interfaces/jsonschema/internalization/InternalizationManifest.schema.json` +- `constitution/interfaces/jsonschema/internalization/InternalizationCreateResult.schema.json` +- `constitution/interfaces/jsonschema/internalization/InternalizationAttachResult.schema.json` +- `constitution/interfaces/jsonschema/internalization/InternalizationDetachResult.schema.json` +- `constitution/interfaces/jsonschema/internalization/InternalizationInspectResult.schema.json` diff --git a/constitution/interfaces/jsonschema/internalization/InternalizationAttachResult.schema.json b/constitution/interfaces/jsonschema/internalization/InternalizationAttachResult.schema.json new file mode 100644 index 00000000..f4cf0d1d --- /dev/null +++ b/constitution/interfaces/jsonschema/internalization/InternalizationAttachResult.schema.json @@ -0,0 +1,17 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/attach-result-1.2.0.json", + "title": "InternalizationAttachResult", + "type": "object", + "required": [ + "schema_version", + "success", + "artifact_id", + "session_id", + "tool", + "attached_at", + "lease_id", + "lease_seconds", + "lease_expires_at" + ] +} diff --git a/constitution/interfaces/jsonschema/internalization/InternalizationCreateResult.schema.json b/constitution/interfaces/jsonschema/internalization/InternalizationCreateResult.schema.json new file mode 100644 index 00000000..c319facc --- /dev/null +++ b/constitution/interfaces/jsonschema/internalization/InternalizationCreateResult.schema.json @@ -0,0 +1,16 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/create-result-1.2.0.json", + "title": "InternalizationCreateResult", + "type": "object", + "required": [ + "schema_version", + "success", + "artifact_id", + "artifact_path", + "cache_hit", + "manifest", + "source_hash", + "adapter_hash" + ] +} diff --git a/constitution/interfaces/jsonschema/internalization/InternalizationDetachResult.schema.json b/constitution/interfaces/jsonschema/internalization/InternalizationDetachResult.schema.json new file mode 100644 index 00000000..226e7eb2 --- /dev/null +++ b/constitution/interfaces/jsonschema/internalization/InternalizationDetachResult.schema.json @@ -0,0 +1,15 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/detach-result-1.2.0.json", + "title": "InternalizationDetachResult", + "type": "object", + "required": [ + "schema_version", + "success", + "artifact_id", + "session_id", + "detached_at", + "lease_id", + "detached" + ] +} diff --git a/constitution/interfaces/jsonschema/internalization/InternalizationInspectResult.schema.json b/constitution/interfaces/jsonschema/internalization/InternalizationInspectResult.schema.json new file mode 100644 index 00000000..5bcdf3c1 --- /dev/null +++ b/constitution/interfaces/jsonschema/internalization/InternalizationInspectResult.schema.json @@ -0,0 +1,13 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/inspect-result-1.2.0.json", + "title": "InternalizationInspectResult", + "type": "object", + "required": [ + "schema_version", + "artifact_id", + "manifest", + "integrity", + "status" + ] +} diff --git a/constitution/interfaces/jsonschema/internalization/InternalizationManifest.schema.json b/constitution/interfaces/jsonschema/internalization/InternalizationManifest.schema.json new file mode 100644 index 00000000..5f272db0 --- /dev/null +++ b/constitution/interfaces/jsonschema/internalization/InternalizationManifest.schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/manifest-1.2.0.json", + "title": "InternalizationManifest", + "type": "object", + "required": [ + "schema_version", + "id", + "source_hash", + "source_path", + "base_model_id", + "internalizer_profile", + "internalizer_version", + "adapter_format", + "created_at", + "ttl_seconds", + "provenance", + "replay_recipe", + "adapter_hash", + "adapter_path", + "capabilities_contract", + "risk_tier", + "determinism_class", + "binary_hash", + "runtime_fingerprint" + ] +} diff --git a/constitution/methodology/ARCHITECTURE.md b/constitution/methodology/ARCHITECTURE.md index 5a6808f8..b491d271 100644 --- a/constitution/methodology/ARCHITECTURE.md +++ b/constitution/methodology/ARCHITECTURE.md @@ -69,6 +69,34 @@ Binding system rules live in `specs/SYSTEM.md` and `specs/INTENT.md`. --- +## 6. Internalized Context Artifact Sequence + +```text +Agent + | + | decapod internalize create --source doc.md --model base-model --profile noop + v +Decapod CLI + | + | hashes source + resolves profile + writes manifest/adapter + v +.decapod/generated/artifacts/internalizations/ + | + | decapod internalize attach --id --session --tool --lease-seconds 1800 + v +Session-scoped mount lease + | + | inference payload references artifact_id only while lease is active + v +Inference caller + | + | decapod internalize detach --id --session + v +Lease revoked +``` + +--- + ## Links ### Core Router diff --git a/src/core/validate.rs b/src/core/validate.rs index d85c0ec9..137d6864 100644 --- a/src/core/validate.rs +++ b/src/core/validate.rs @@ -21,6 +21,7 @@ use crate::core::scaffold::DECAPOD_GITIGNORE_RULES; use crate::core::store::{Store, StoreKind}; use crate::core::workunit::{self, WorkUnitManifest, WorkUnitStatus}; use crate::plugins::aptitude::{SkillCard, SkillResolution}; +use crate::plugins::internalize::{self, DeterminismClass, InternalizationManifest, ReplayClass}; use crate::{db, primitives, todo}; use regex::Regex; use serde_json; @@ -2234,6 +2235,207 @@ fn validate_skill_resolutions_if_present( Ok(()) } +fn validate_internalization_artifacts_if_present( + ctx: &ValidationContext, + repo_root: &Path, +) -> Result<(), error::DecapodError> { + info("Internalization Artifact Gate"); + + let artifacts_dir = repo_root + .join(".decapod") + .join("generated") + .join("artifacts") + .join("internalizations"); + if !artifacts_dir.exists() { + skip( + "No internalization artifacts found; skipping internalization gate", + ctx, + ); + return Ok(()); + } + + let mut files = 0usize; + for entry in fs::read_dir(&artifacts_dir).map_err(error::DecapodError::IoError)? { + let entry = entry.map_err(error::DecapodError::IoError)?; + let path = entry.path(); + if !path.is_dir() { + continue; + } + let manifest_path = path.join("manifest.json"); + if !manifest_path.exists() { + fail( + &format!( + "Internalization artifact is missing manifest.json ({})", + path.display() + ), + ctx, + ); + continue; + } + + files += 1; + let raw = fs::read_to_string(&manifest_path).map_err(error::DecapodError::IoError)?; + let manifest: InternalizationManifest = serde_json::from_str(&raw).map_err(|e| { + error::DecapodError::ValidationError(format!( + "invalid internalization manifest {}: {}", + manifest_path.display(), + e + )) + })?; + + if manifest.schema_version != internalize::SCHEMA_VERSION { + fail( + &format!( + "Internalization manifest schema mismatch in {} (actual={}, expected={})", + manifest_path.display(), + manifest.schema_version, + internalize::SCHEMA_VERSION + ), + ctx, + ); + } + if manifest.base_model_id.trim().is_empty() { + fail( + &format!( + "Internalization manifest missing base_model_id ({})", + manifest_path.display() + ), + ctx, + ); + } + if manifest.capabilities_contract.permitted_tools.is_empty() { + fail( + &format!( + "Internalization manifest must declare permitted_tools ({})", + manifest_path.display() + ), + ctx, + ); + } + if manifest.replay_recipe.mode == ReplayClass::Replayable + && manifest.determinism_class != DeterminismClass::Deterministic + { + fail( + &format!( + "Internalization manifest claims replayable despite non-deterministic profile ({})", + manifest_path.display() + ), + ctx, + ); + } + if manifest.determinism_class == DeterminismClass::BestEffort + && (manifest.binary_hash.trim().is_empty() + || manifest.runtime_fingerprint.trim().is_empty()) + { + fail( + &format!( + "Best-effort internalization manifest must include binary_hash and runtime_fingerprint ({})", + manifest_path.display() + ), + ctx, + ); + } + + let inspect = + internalize::inspect_internalization(&repo_root.join(".decapod"), &manifest.id) + .map_err(|e| { + error::DecapodError::ValidationError(format!( + "internalization inspect failed for {}: {}", + manifest_path.display(), + e + )) + })?; + if !inspect.integrity.adapter_hash_valid { + fail( + &format!( + "Internalization adapter hash mismatch ({})", + manifest_path.display() + ), + ctx, + ); + } + if inspect.integrity.source_verification == "mismatch" { + fail( + &format!( + "Internalization source hash mismatch ({})", + manifest_path.display() + ), + ctx, + ); + } + if !inspect.integrity.replayable_claim_valid { + fail( + &format!( + "Internalization replay metadata is inconsistent ({})", + manifest_path.display() + ), + ctx, + ); + } + } + + let sessions_dir = repo_root + .join(".decapod") + .join("generated") + .join("sessions"); + if sessions_dir.exists() { + for session_entry in fs::read_dir(&sessions_dir).map_err(error::DecapodError::IoError)? { + let session_entry = session_entry.map_err(error::DecapodError::IoError)?; + let mounts_dir = session_entry.path().join("internalize_mounts"); + if !mounts_dir.exists() { + continue; + } + for mount_entry in fs::read_dir(&mounts_dir).map_err(error::DecapodError::IoError)? { + let mount_entry = mount_entry.map_err(error::DecapodError::IoError)?; + let mount_path = mount_entry.path(); + if mount_path.extension().and_then(|s| s.to_str()) != Some("json") { + continue; + } + let raw = fs::read_to_string(&mount_path).map_err(error::DecapodError::IoError)?; + let mount: serde_json::Value = serde_json::from_str(&raw).map_err(|e| { + error::DecapodError::ValidationError(format!( + "invalid internalization mount lease {}: {}", + mount_path.display(), + e + )) + })?; + let lease_expires_at = mount + .get("lease_expires_at") + .and_then(|v| v.as_str()) + .unwrap_or(""); + if lease_expires_at.is_empty() { + fail( + &format!( + "Internalization mount missing lease_expires_at ({})", + mount_path.display() + ), + ctx, + ); + continue; + } + if lease_expires_at < internalize::now_iso8601().as_str() { + fail( + &format!( + "Internalization mount lease expired but still present ({})", + mount_path.display() + ), + ctx, + ); + } + } + } + } + + pass( + &format!( + "Internalization artifact contract checked for {} artifact(s)", + files + ), + ctx, + ); + Ok(()) +} + fn validate_schema_determinism( ctx: &ValidationContext, _decapod_dir: &Path, @@ -4390,6 +4592,13 @@ pub fn run_validation( "validate_skill_resolutions_if_present", validate_skill_resolutions_if_present(ctx, decapod_dir) ); + gate!( + s, + timings, + ctx, + "validate_internalization_artifacts_if_present", + validate_internalization_artifacts_if_present(ctx, decapod_dir) + ); gate!( s, timings, diff --git a/src/lib.rs b/src/lib.rs index ac6dfa79..f4f488c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2377,21 +2377,28 @@ fn resolve_release_capsule(project_root: &Path) -> Result<(String, String), erro let fallback_path = core::context_capsule::context_capsule_path(project_root, &fallback); let capsule = if fallback_path.exists() { let raw = fs::read_to_string(&fallback_path).map_err(error::DecapodError::IoError)?; - let parsed: core::context_capsule::DeterministicContextCapsule = serde_json::from_str(&raw) - .map_err(|e| { + // If the on-disk capsule is empty (e.g. after a shallow checkout or + // force-push that left a zero-length tracked file), fall through to + // the freshly-generated capsule instead of failing with a parse error. + if raw.trim().is_empty() { + fallback + } else { + let parsed: core::context_capsule::DeterministicContextCapsule = + serde_json::from_str(&raw).map_err(|e| { + error::DecapodError::ValidationError(format!( + "invalid release capsule JSON at '{}': {}", + fallback_path.display(), + e + )) + })?; + parsed.with_recomputed_hash().map_err(|e| { error::DecapodError::ValidationError(format!( - "invalid release capsule JSON at '{}': {}", + "failed to recompute release capsule hash at '{}': {}", fallback_path.display(), e )) - })?; - parsed.with_recomputed_hash().map_err(|e| { - error::DecapodError::ValidationError(format!( - "failed to recompute release capsule hash at '{}': {}", - fallback_path.display(), - e - )) - })? + })? + } } else { fallback }; @@ -2548,31 +2555,36 @@ fn validate_policy_lineage( } let raw_capsule = fs::read_to_string(&abs).map_err(error::DecapodError::IoError)?; - let parsed: core::context_capsule::DeterministicContextCapsule = - serde_json::from_str(&raw_capsule).map_err(|e| { + // If the on-disk capsule is empty (e.g. shallow checkout or force-push + // left a zero-length tracked file), skip integrity checks — the capsule + // will be regenerated on the next resolve pass. + if !raw_capsule.trim().is_empty() { + let parsed: core::context_capsule::DeterministicContextCapsule = + serde_json::from_str(&raw_capsule).map_err(|e| { + error::DecapodError::ValidationError(format!( + "{manifest_label} policy_lineage capsule at '{}' is not valid deterministic capsule JSON: {}", + capsule_path, e + )) + })?; + let normalized = parsed.with_recomputed_hash().map_err(|e| { error::DecapodError::ValidationError(format!( - "{manifest_label} policy_lineage capsule at '{}' is not valid deterministic capsule JSON: {}", + "{manifest_label} policy_lineage capsule hash computation failed for '{}': {}", capsule_path, e )) })?; - let normalized = parsed.with_recomputed_hash().map_err(|e| { - error::DecapodError::ValidationError(format!( - "{manifest_label} policy_lineage capsule hash computation failed for '{}': {}", - capsule_path, e - )) - })?; - if parsed.capsule_hash != normalized.capsule_hash { - return Err(error::DecapodError::ValidationError(format!( - "{manifest_label} policy_lineage capsule file '{}' has internal hash mismatch", - capsule_path - ))); - } - if capsule_hash != normalized.capsule_hash { - return Err(error::DecapodError::ValidationError(format!( - "{manifest_label} policy_lineage capsule_hash mismatch for '{}'", - capsule_path - ))); + if parsed.capsule_hash != normalized.capsule_hash { + return Err(error::DecapodError::ValidationError(format!( + "{manifest_label} policy_lineage capsule file '{}' has internal hash mismatch", + capsule_path + ))); + } + if capsule_hash != normalized.capsule_hash { + return Err(error::DecapodError::ValidationError(format!( + "{manifest_label} policy_lineage capsule_hash mismatch for '{}'", + capsule_path + ))); + } } Ok(PolicyLineage { @@ -4027,6 +4039,7 @@ fn schema_catalog() -> std::collections::BTreeMap<&'static str, serde_json::Valu schemas.insert("lcm", lcm::schema()); schemas.insert("map", map_ops::schema()); schemas.insert("eval", eval::schema()); + schemas.insert("internalize", internalize::schema()); schemas.insert( "command_registry", serde_json::json!({ diff --git a/src/plugins/internalize.rs b/src/plugins/internalize.rs index c91d2df7..948bd700 100644 --- a/src/plugins/internalize.rs +++ b/src/plugins/internalize.rs @@ -20,8 +20,6 @@ use std::path::{Path, PathBuf}; use std::process::Command as ProcessCommand; use std::time::{SystemTime, UNIX_EPOCH}; -// ── CLI ──────────────────────────────────────────────────────────────── - #[derive(clap::Args, Debug)] pub struct InternalizeCli { #[clap(subcommand)] @@ -32,94 +30,92 @@ pub struct InternalizeCli { pub enum InternalizeCommand { /// Produce an internalized context artifact from a source document Create { - /// Path to source document (file path) #[clap(long)] source: String, - /// Base model identifier this adapter targets #[clap(long)] model: String, - /// Internalizer profile name (default: noop) #[clap(long, default_value = "noop")] profile: String, - /// Time-to-live in seconds (0 = no expiry) #[clap(long, default_value_t = 0)] ttl: u64, - /// Allowed usage scopes (repeatable: qa, summarization, code-gen) #[clap(long = "scope", value_delimiter = ',')] scopes: Vec, - /// Output format: 'json' or 'text' #[clap(long, default_value = "json")] format: String, }, - /// Attach an internalized context artifact to an active agent session + /// Attach an internalized context artifact to a session-scoped mount lease Attach { - /// Artifact ID to attach #[clap(long)] id: String, - /// Session identifier to attach to #[clap(long)] session: String, - /// Output format: 'json' or 'text' + #[clap(long, default_value = "decapod-cli")] + tool: String, + #[clap(long, default_value_t = 1800)] + lease_seconds: u64, + #[clap(long, default_value = "json")] + format: String, + }, + /// Explicitly revoke a session-scoped internalization mount + Detach { + #[clap(long)] + id: String, + #[clap(long)] + session: String, #[clap(long, default_value = "json")] format: String, }, /// Inspect an internalized context artifact (manifest + integrity) Inspect { - /// Artifact ID to inspect #[clap(long)] id: String, - /// Output format: 'json' or 'text' #[clap(long, default_value = "json")] format: String, }, } -// ── Schemas (stable, versioned) ──────────────────────────────────────── +pub const SCHEMA_VERSION: &str = "1.2.0"; +pub const DEFAULT_ATTACH_LEASE_SECONDS: u64 = 1800; -pub const SCHEMA_VERSION: &str = "1.0.0"; +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum DeterminismClass { + Deterministic, + BestEffort, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ReplayClass { + Replayable, + NonReplayable, +} -/// Internalization manifest — the core artifact model. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct InternalizationManifest { - /// Schema version for forward compatibility. pub schema_version: String, - /// Unique artifact identifier (ULID). pub id: String, - /// SHA-256 hash of the source document. pub source_hash: String, - /// Original source path or URI. pub source_path: String, - /// Extraction method used (profile name). pub extraction_method: String, - /// Chunking parameters (profile-specific). pub chunking_params: BTreeMap, - /// Base model identifier this adapter was produced for. pub base_model_id: String, - /// Internalizer profile identifier. pub internalizer_profile: String, - /// Internalizer profile version. pub internalizer_version: String, - /// Adapter format (e.g., "lora", "compressed-context", "noop"). pub adapter_format: String, - /// ISO 8601 creation timestamp. pub created_at: String, - /// TTL in seconds (0 = no expiry). pub ttl_seconds: u64, - /// ISO 8601 expiry timestamp (null if ttl is 0). #[serde(skip_serializing_if = "Option::is_none")] pub expires_at: Option, - /// Provenance chain: ordered list of operations that produced this artifact. pub provenance: Vec, - /// Replay recipe: deterministic command to reproduce this artifact. pub replay_recipe: ReplayRecipe, - /// SHA-256 hash of the adapter payload. pub adapter_hash: String, - /// Relative path to adapter payload within artifact directory. pub adapter_path: String, - /// Capabilities contract: what this internalization is allowed for. pub capabilities_contract: CapabilitiesContract, - /// Risk classification for this artifact. pub risk_tier: RiskTier, + pub determinism_class: DeterminismClass, + pub binary_hash: String, + pub runtime_fingerprint: String, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -132,28 +128,24 @@ pub struct ProvenanceEntry { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ReplayRecipe { + pub mode: ReplayClass, pub command: String, pub args: Vec, pub env: BTreeMap, + pub reason: String, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct CapabilitiesContract { - /// Allowed usage scopes (e.g., ["qa", "summarization"]). pub allowed_scopes: Vec, - /// Tools permitted to mount this adapter. pub permitted_tools: Vec, - /// Whether code generation is allowed. pub allow_code_gen: bool, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct RiskTier { - /// Risk level for creation: "compute-risky" (external tool invoked). pub creation: String, - /// Risk level for attach: "behavior-changing" (affects inference). pub attach: String, - /// Risk level for inspect: "read-only" (no side effects). pub inspect: String, } @@ -167,34 +159,47 @@ impl Default for RiskTier { } } -/// Result of `decapod internalize create`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InternalizationCreateResult { pub schema_version: String, pub success: bool, pub artifact_id: String, pub artifact_path: String, + pub cache_hit: bool, pub manifest: InternalizationManifest, pub source_hash: String, pub adapter_hash: String, } -/// Result of `decapod internalize attach`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InternalizationAttachResult { pub schema_version: String, pub success: bool, pub artifact_id: String, pub session_id: String, + pub tool: String, pub attached_at: String, + pub lease_id: String, + pub lease_seconds: u64, + pub lease_expires_at: String, pub expires_at: Option, pub capabilities_contract: CapabilitiesContract, pub risk_classification: String, - /// Provenance entry logged to the session. + pub source_verification: String, pub provenance_entry: ProvenanceEntry, } -/// Result of `decapod internalize inspect`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InternalizationDetachResult { + pub schema_version: String, + pub success: bool, + pub artifact_id: String, + pub session_id: String, + pub detached_at: String, + pub lease_id: String, + pub detached: bool, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InternalizationInspectResult { pub schema_version: String, @@ -207,29 +212,24 @@ pub struct InternalizationInspectResult { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct IntegrityCheck { pub source_hash_valid: bool, + pub source_verification: String, pub adapter_hash_valid: bool, pub manifest_consistent: bool, pub expired: bool, + pub replayable_claim_valid: bool, } -// ── Internalizer Profile Abstraction ─────────────────────────────────── - -/// An internalizer profile describes an external tool that converts -/// a document + base model into an adapter artifact. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InternalizerProfile { pub name: String, pub version: String, - /// Executable path or "builtin:noop" for the stub. pub executable: String, - /// Default chunking parameters. pub default_params: BTreeMap, - /// Output adapter format. pub adapter_format: String, + pub determinism_class: DeterminismClass, } impl InternalizerProfile { - /// Built-in noop profile: produces a zero-byte adapter for pipeline testing. pub fn noop() -> Self { Self { name: "noop".to_string(), @@ -237,16 +237,15 @@ impl InternalizerProfile { executable: "builtin:noop".to_string(), default_params: BTreeMap::new(), adapter_format: "noop".to_string(), + determinism_class: DeterminismClass::Deterministic, } } - /// Resolve a profile by name. Returns the noop stub for "noop", - /// otherwise looks for a profile JSON in `.decapod/generated/profiles/internalizers/`. pub fn resolve(name: &str, store_root: &Path) -> Result { if name == "noop" { return Ok(Self::noop()); } - let profile_path = store_root + let profile_path = control_root(store_root) .join("generated") .join("profiles") .join("internalizers") @@ -255,37 +254,55 @@ impl InternalizerProfile { return Err(InternalizeError::ProfileNotFound(name.to_string())); } let raw = fs::read_to_string(&profile_path).map_err(InternalizeError::Io)?; - let profile: Self = serde_json::from_str(&raw).map_err(InternalizeError::Json)?; - Ok(profile) + serde_json::from_str(&raw).map_err(InternalizeError::Json) + } + + pub fn binary_hash(&self) -> Result { + if self.executable == "builtin:noop" { + return sha256_bytes(self.executable.as_bytes()); + } + let path = Path::new(&self.executable); + if !path.exists() { + return Err(InternalizeError::ProfileExecution(format!( + "Internalizer binary not found: {}", + self.executable + ))); + } + sha256_file(path) + } + + pub fn runtime_fingerprint(&self) -> String { + format!( + "os={} arch={} executable={}", + std::env::consts::OS, + std::env::consts::ARCH, + self.executable + ) } - /// Execute the internalizer. For "builtin:noop", produces an empty adapter. - /// For external executables, invokes them with JSON on stdin and reads adapter from output dir. pub fn execute( &self, source_path: &Path, - _base_model: &str, + base_model: &str, output_dir: &Path, ) -> Result<(PathBuf, BTreeMap), InternalizeError> { let adapter_file = output_dir.join("adapter.bin"); if self.executable == "builtin:noop" { - // Noop: write empty adapter fs::write(&adapter_file, b"").map_err(InternalizeError::Io)?; return Ok((adapter_file, self.default_params.clone())); } - // External executable: invoke with structured JSON input let input = serde_json::json!({ "source_path": source_path.to_string_lossy(), - "base_model": _base_model, + "base_model": base_model, "output_dir": output_dir.to_string_lossy(), "params": self.default_params, }); let output = ProcessCommand::new(&self.executable) .arg("--input") - .arg(serde_json::to_string(&input).unwrap()) + .arg(serde_json::to_string(&input).unwrap_or_default()) .output() .map_err(InternalizeError::Io)?; @@ -305,17 +322,13 @@ impl InternalizerProfile { ))); } - // Try to parse output metadata from stdout let stdout = String::from_utf8_lossy(&output.stdout); - let params: BTreeMap = - serde_json::from_str(&stdout).unwrap_or_else(|_| self.default_params.clone()); + let params = serde_json::from_str(&stdout).unwrap_or_else(|_| self.default_params.clone()); Ok((adapter_file, params)) } } -// ── Error Types ──────────────────────────────────────────────────────── - #[derive(Debug)] pub enum InternalizeError { Io(std::io::Error), @@ -323,6 +336,10 @@ pub enum InternalizeError { ProfileNotFound(String), ProfileExecution(String), ArtifactNotFound(String), + MountNotFound { + artifact_id: String, + session_id: String, + }, SourceIntegrityFailed { expected: String, actual: String, @@ -335,6 +352,10 @@ pub enum InternalizeError { artifact_id: String, expired_at: String, }, + ToolNotPermitted { + tool: String, + artifact_id: String, + }, ValidationError(String), } @@ -346,30 +367,37 @@ impl std::fmt::Display for InternalizeError { Self::ProfileNotFound(n) => write!(f, "Internalizer profile '{}' not found", n), Self::ProfileExecution(s) => write!(f, "Profile execution error: {}", s), Self::ArtifactNotFound(id) => write!(f, "Artifact '{}' not found", id), - Self::SourceIntegrityFailed { expected, actual } => { - write!( - f, - "Source integrity check failed: expected {}, got {}", - expected, actual - ) - } - Self::AdapterIntegrityFailed { expected, actual } => { - write!( - f, - "Adapter integrity check failed: expected {}, got {}", - expected, actual - ) - } + Self::MountNotFound { + artifact_id, + session_id, + } => write!( + f, + "No active mount for artifact '{}' in session '{}'", + artifact_id, session_id + ), + Self::SourceIntegrityFailed { expected, actual } => write!( + f, + "Source integrity check failed: expected {}, got {}", + expected, actual + ), + Self::AdapterIntegrityFailed { expected, actual } => write!( + f, + "Adapter integrity check failed: expected {}, got {}", + expected, actual + ), Self::Expired { artifact_id, expired_at, - } => { - write!( - f, - "Artifact '{}' expired at {}; renew with a new create", - artifact_id, expired_at - ) - } + } => write!( + f, + "Artifact '{}' expired at {}; renew with a new create", + artifact_id, expired_at + ), + Self::ToolNotPermitted { tool, artifact_id } => write!( + f, + "Tool '{}' is not permitted to mount artifact '{}'", + tool, artifact_id + ), Self::ValidationError(s) => write!(f, "Validation error: {}", s), } } @@ -383,8 +411,6 @@ impl From for crate::core::error::DecapodError { } } -// ── Helpers ──────────────────────────────────────────────────────────── - fn sha256_file(path: &Path) -> Result { let bytes = fs::read(path).map_err(InternalizeError::Io)?; sha256_bytes(&bytes) @@ -396,18 +422,12 @@ fn sha256_bytes(bytes: &[u8]) -> Result { Ok(format!("{:x}", hasher.finalize())) } -fn now_iso8601() -> String { - let d = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default(); - // Simple ISO 8601 without chrono dependency - let secs = d.as_secs(); +fn iso8601_from_epoch(secs: u64) -> String { let days = secs / 86400; let time_of_day = secs % 86400; let hours = time_of_day / 3600; let minutes = (time_of_day % 3600) / 60; let seconds = time_of_day % 60; - // Approximate date calculation (good enough for timestamps) let mut year = 1970i64; let mut remaining_days = days as i64; loop { @@ -456,8 +476,40 @@ fn now_iso8601() -> String { ) } +fn now_unix() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() +} + +pub fn now_iso8601() -> String { + iso8601_from_epoch(now_unix()) +} + +fn iso8601_after_secs(secs: u64) -> String { + iso8601_from_epoch(now_unix().saturating_add(secs)) +} + +fn control_root(store_root: &Path) -> PathBuf { + if store_root.file_name().and_then(|s| s.to_str()) == Some("data") + && store_root + .parent() + .and_then(|p| p.file_name()) + .and_then(|s| s.to_str()) + == Some(".decapod") + { + store_root + .parent() + .map(Path::to_path_buf) + .unwrap_or_else(|| store_root.to_path_buf()) + } else { + store_root.to_path_buf() + } +} + fn artifacts_dir(store_root: &Path) -> PathBuf { - store_root + control_root(store_root) .join("generated") .join("artifacts") .join("internalizations") @@ -467,136 +519,242 @@ fn artifact_dir(store_root: &Path, id: &str) -> PathBuf { artifacts_dir(store_root).join(id) } -// ── Core Operations ──────────────────────────────────────────────────── +fn session_dir(store_root: &Path, session_id: &str) -> PathBuf { + control_root(store_root) + .join("generated") + .join("sessions") + .join(session_id) +} -pub fn create_internalization( - store_root: &Path, - source: &str, - model: &str, - profile_name: &str, - ttl: u64, - scopes: &[String], -) -> Result { - let source_path = Path::new(source); - if !source_path.exists() { - return Err(InternalizeError::Io(std::io::Error::new( - std::io::ErrorKind::NotFound, - format!("Source document not found: {}", source), - ))); - } +fn mount_dir(store_root: &Path, session_id: &str) -> PathBuf { + session_dir(store_root, session_id).join("internalize_mounts") +} - // Hash the source - let source_hash = sha256_file(source_path)?; +fn mount_id(artifact_id: &str) -> String { + format!("mount_{}", artifact_id) +} - // Resolve profile - let profile = InternalizerProfile::resolve(profile_name, store_root)?; +fn mount_path(store_root: &Path, session_id: &str, artifact_id: &str) -> PathBuf { + mount_dir(store_root, session_id).join(format!("{}.json", mount_id(artifact_id))) +} - // Create artifact directory - let id = ulid::Ulid::new().to_string(); - let art_dir = artifact_dir(store_root, &id); - fs::create_dir_all(&art_dir).map_err(InternalizeError::Io)?; +fn is_non_local_source(source: &str) -> bool { + source == "-" || source.starts_with("http://") || source.starts_with("https://") +} - // Execute internalizer - let (adapter_path, chunking_params) = profile.execute(source_path, model, &art_dir)?; +fn is_expired(expires_at: Option<&str>) -> bool { + expires_at.is_some_and(|exp| now_iso8601().as_str() > exp) +} - // Hash the adapter - let adapter_hash = sha256_file(&adapter_path)?; +fn verify_source_binding( + manifest: &InternalizationManifest, +) -> Result<(bool, String), InternalizeError> { + if manifest.source_path == "-" { + return Ok((false, "best-effort-stdin-source".to_string())); + } + if manifest.source_path.starts_with("http://") || manifest.source_path.starts_with("https://") { + return Ok((false, "best-effort-nonlocal-source".to_string())); + } - let now = now_iso8601(); + let source_path = Path::new(&manifest.source_path); + if !source_path.exists() { + return Ok((false, "best-effort-source-unavailable".to_string())); + } - // Compute expiry - let expires_at = if ttl > 0 { - let d = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default(); - let exp_secs = d.as_secs() + ttl; - // Recompute timestamp for expiry - let mut year = 1970i64; - let mut remaining = exp_secs as i64; - loop { - let diy = if year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) { - 366 * 86400 - } else { - 365 * 86400 - }; - if remaining < diy { - break; - } - remaining -= diy; - year += 1; - } - let day_secs = remaining; - let days = day_secs / 86400; - let tod = day_secs % 86400; - let h = tod / 3600; - let m = (tod % 3600) / 60; - let s = tod % 60; - let leap = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); - let md = [ - 31, - if leap { 29 } else { 28 }, - 31, - 30, - 31, - 30, - 31, - 31, - 30, - 31, - 30, - 31, - ]; - let mut mon = 0usize; - let mut rem = days; - for (i, &d) in md.iter().enumerate() { - if rem < d { - mon = i; - break; - } - rem -= d; - } - Some(format!( - "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", - year, - mon + 1, - rem + 1, - h, - m, - s - )) + let actual = sha256_file(source_path)?; + if actual == manifest.source_hash { + Ok((true, "verified".to_string())) } else { - None - }; + Ok((false, "mismatch".to_string())) + } +} - // Default scopes - let effective_scopes = if scopes.is_empty() { - vec!["qa".to_string(), "summarization".to_string()] - } else { - scopes.to_vec() - }; +fn tool_is_permitted(contract: &CapabilitiesContract, tool: &str) -> bool { + contract + .permitted_tools + .iter() + .any(|entry| entry == "*" || entry == tool) +} - let allow_code_gen = effective_scopes.iter().any(|s| s == "code-gen"); +fn artifact_id_for_request( + source_hash: &str, + source_path: &str, + model: &str, + profile: &InternalizerProfile, + ttl: u64, + scopes: &[String], +) -> Result { + let mut normalized_scopes = scopes.to_vec(); + normalized_scopes.sort(); + normalized_scopes.dedup(); + let binding = serde_json::json!({ + "schema_version": SCHEMA_VERSION, + "source_hash": source_hash, + "source_path": source_path, + "base_model_id": model, + "internalizer_profile": profile.name, + "internalizer_version": profile.version, + "adapter_format": profile.adapter_format, + "determinism_class": profile.determinism_class, + "ttl_seconds": ttl, + "scopes": normalized_scopes, + "chunking_params": profile.default_params, + }); + let bytes = serde_json::to_vec(&binding).map_err(InternalizeError::Json)?; + let hex = sha256_bytes(&bytes)?; + Ok(format!("int_{}", &hex[..24])) +} - // Build replay recipe +fn build_replay_recipe( + profile: &InternalizerProfile, + binary_hash: &str, + source_path: &str, + model: &str, + ttl: u64, + scopes: &[String], +) -> ReplayRecipe { let mut replay_args = vec![ "internalize".to_string(), "create".to_string(), "--source".to_string(), - source.to_string(), + source_path.to_string(), "--model".to_string(), model.to_string(), "--profile".to_string(), - profile_name.to_string(), + profile.name.clone(), ]; if ttl > 0 { replay_args.push("--ttl".to_string()); replay_args.push(ttl.to_string()); } - for s in &effective_scopes { + for scope in scopes { replay_args.push("--scope".to_string()); - replay_args.push(s.clone()); + replay_args.push(scope.clone()); + } + + let (mode, reason) = match profile.determinism_class { + DeterminismClass::Deterministic if !binary_hash.is_empty() => ( + ReplayClass::Replayable, + "deterministic profile with pinned binary hash".to_string(), + ), + DeterminismClass::Deterministic => ( + ReplayClass::NonReplayable, + "deterministic profile missing pinned binary hash".to_string(), + ), + DeterminismClass::BestEffort => ( + ReplayClass::NonReplayable, + "best_effort profile may depend on nondeterministic runtime or hardware".to_string(), + ), + }; + + ReplayRecipe { + mode, + command: "decapod".to_string(), + args: replay_args, + env: BTreeMap::new(), + reason, + } +} + +fn replayable_claim_valid(manifest: &InternalizationManifest) -> bool { + match manifest.replay_recipe.mode { + ReplayClass::Replayable => { + manifest.determinism_class == DeterminismClass::Deterministic + && !manifest.binary_hash.trim().is_empty() + } + ReplayClass::NonReplayable => { + if manifest.determinism_class == DeterminismClass::BestEffort { + !manifest.binary_hash.trim().is_empty() + && !manifest.runtime_fingerprint.trim().is_empty() + } else { + true + } + } } +} + +pub fn create_internalization( + store_root: &Path, + source: &str, + model: &str, + profile_name: &str, + ttl: u64, + scopes: &[String], +) -> Result { + if is_non_local_source(source) { + return Err(InternalizeError::ValidationError( + "MVP only supports local file sources; URL and stdin sources are intentionally not implemented" + .to_string(), + )); + } + + let source_path = Path::new(source); + if !source_path.exists() { + return Err(InternalizeError::Io(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("Source document not found: {}", source), + ))); + } + let canonical_source = fs::canonicalize(source_path).map_err(InternalizeError::Io)?; + let source_hash = sha256_file(&canonical_source)?; + let profile = InternalizerProfile::resolve(profile_name, store_root)?; + let effective_scopes = if scopes.is_empty() { + vec!["qa".to_string()] + } else { + let mut normalized = scopes.to_vec(); + normalized.sort(); + normalized.dedup(); + normalized + }; + let allow_code_gen = effective_scopes.iter().any(|s| s == "code-gen"); + let binary_hash = profile.binary_hash()?; + let runtime_fingerprint = profile.runtime_fingerprint(); + let source_path_string = canonical_source.to_string_lossy().to_string(); + let artifact_id = artifact_id_for_request( + &source_hash, + &source_path_string, + model, + &profile, + ttl, + &effective_scopes, + )?; + let art_dir = artifact_dir(store_root, &artifact_id); + let manifest_path = art_dir.join("manifest.json"); + if manifest_path.exists() { + let raw = fs::read_to_string(&manifest_path).map_err(InternalizeError::Io)?; + let manifest: InternalizationManifest = + serde_json::from_str(&raw).map_err(InternalizeError::Json)?; + return Ok(InternalizationCreateResult { + schema_version: SCHEMA_VERSION.to_string(), + success: true, + artifact_id, + artifact_path: art_dir.to_string_lossy().to_string(), + cache_hit: true, + source_hash: manifest.source_hash.clone(), + adapter_hash: manifest.adapter_hash.clone(), + manifest, + }); + } + + fs::create_dir_all(&art_dir).map_err(InternalizeError::Io)?; + let (adapter_path, chunking_params) = profile.execute(&canonical_source, model, &art_dir)?; + let adapter_hash = sha256_file(&adapter_path)?; + let now = now_iso8601(); + let expires_at = if ttl > 0 { + Some(iso8601_after_secs(ttl)) + } else { + None + }; + + let replay_recipe = build_replay_recipe( + &profile, + &binary_hash, + &source_path_string, + model, + ttl, + &effective_scopes, + ); let provenance_entry = ProvenanceEntry { op: "internalize.create".to_string(), timestamp: now.clone(), @@ -606,49 +764,46 @@ pub fn create_internalization( let manifest = InternalizationManifest { schema_version: SCHEMA_VERSION.to_string(), - id: id.clone(), + id: artifact_id.clone(), source_hash: source_hash.clone(), - source_path: source.to_string(), + source_path: source_path_string, extraction_method: profile.name.clone(), chunking_params, base_model_id: model.to_string(), internalizer_profile: profile.name.clone(), internalizer_version: profile.version.clone(), adapter_format: profile.adapter_format.clone(), - created_at: now.clone(), + created_at: now, ttl_seconds: ttl, expires_at, provenance: vec![provenance_entry], - replay_recipe: ReplayRecipe { - command: "decapod".to_string(), - args: replay_args, - env: BTreeMap::new(), - }, + replay_recipe, adapter_hash: adapter_hash.clone(), adapter_path: "adapter.bin".to_string(), capabilities_contract: CapabilitiesContract { allowed_scopes: effective_scopes, - permitted_tools: vec!["*".to_string()], + permitted_tools: vec!["decapod-cli".to_string()], allow_code_gen, }, risk_tier: RiskTier::default(), + determinism_class: profile.determinism_class, + binary_hash, + runtime_fingerprint, }; - // Write manifest let manifest_json = serde_json::to_string_pretty(&manifest).map_err(InternalizeError::Json)?; - fs::write(art_dir.join("manifest.json"), &manifest_json).map_err(InternalizeError::Io)?; + fs::write(&manifest_path, manifest_json).map_err(InternalizeError::Io)?; - let result = InternalizationCreateResult { + Ok(InternalizationCreateResult { schema_version: SCHEMA_VERSION.to_string(), success: true, - artifact_id: id, + artifact_id, artifact_path: art_dir.to_string_lossy().to_string(), + cache_hit: false, manifest, source_hash, adapter_hash, - }; - - Ok(result) + }) } pub fn inspect_internalization( @@ -657,7 +812,6 @@ pub fn inspect_internalization( ) -> Result { let art_dir = artifact_dir(store_root, id); let manifest_path = art_dir.join("manifest.json"); - if !manifest_path.exists() { return Err(InternalizeError::ArtifactNotFound(id.to_string())); } @@ -666,27 +820,22 @@ pub fn inspect_internalization( let manifest: InternalizationManifest = serde_json::from_str(&raw).map_err(InternalizeError::Json)?; - // Verify adapter integrity + let (source_hash_valid, source_verification) = verify_source_binding(&manifest)?; let adapter_full_path = art_dir.join(&manifest.adapter_path); let adapter_hash_valid = if adapter_full_path.exists() { - let actual = sha256_file(&adapter_full_path)?; - actual == manifest.adapter_hash - } else { - false - }; - - // Check expiry - let expired = if let Some(ref exp) = manifest.expires_at { - let now = now_iso8601(); - now > *exp + sha256_file(&adapter_full_path)? == manifest.adapter_hash } else { false }; + let expired = is_expired(manifest.expires_at.as_deref()); + let replayable_claim_valid = replayable_claim_valid(&manifest); let status = if expired { "expired".to_string() - } else if !adapter_hash_valid { + } else if !adapter_hash_valid || source_verification == "mismatch" || !replayable_claim_valid { "integrity-failed".to_string() + } else if source_verification.starts_with("best-effort") { + "best-effort".to_string() } else { "valid".to_string() }; @@ -696,10 +845,12 @@ pub fn inspect_internalization( artifact_id: id.to_string(), manifest, integrity: IntegrityCheck { - source_hash_valid: true, // Source may not be local; skipped for inspect + source_hash_valid, + source_verification, adapter_hash_valid, manifest_consistent: true, expired, + replayable_claim_valid, }, status, }) @@ -709,8 +860,9 @@ pub fn attach_internalization( store_root: &Path, id: &str, session_id: &str, + tool: &str, + lease_seconds: u64, ) -> Result { - // First inspect to check integrity and expiry let inspection = inspect_internalization(store_root, id)?; if inspection.integrity.expired { @@ -719,61 +871,248 @@ pub fn attach_internalization( expired_at: inspection .manifest .expires_at + .clone() .unwrap_or_else(|| "unknown".to_string()), }); } - + if inspection.integrity.source_verification == "mismatch" { + let actual = if Path::new(&inspection.manifest.source_path).exists() { + sha256_file(Path::new(&inspection.manifest.source_path))? + } else { + "unavailable".to_string() + }; + return Err(InternalizeError::SourceIntegrityFailed { + expected: inspection.manifest.source_hash.clone(), + actual, + }); + } if !inspection.integrity.adapter_hash_valid { return Err(InternalizeError::AdapterIntegrityFailed { expected: inspection.manifest.adapter_hash.clone(), actual: "corrupted".to_string(), }); } + if !inspection.integrity.replayable_claim_valid { + return Err(InternalizeError::ValidationError( + "Artifact replayability metadata is inconsistent with determinism policy".to_string(), + )); + } + if !tool_is_permitted(&inspection.manifest.capabilities_contract, tool) { + return Err(InternalizeError::ToolNotPermitted { + tool: tool.to_string(), + artifact_id: id.to_string(), + }); + } - let now = now_iso8601(); - + let attached_at = now_iso8601(); + let lease_id = mount_id(id); + let lease_expires_at = iso8601_after_secs(lease_seconds); let provenance_entry = ProvenanceEntry { op: "internalize.attach".to_string(), - timestamp: now.clone(), + timestamp: attached_at.clone(), actor: format!("session:{}", session_id), inputs_hash: inspection.manifest.adapter_hash.clone(), }; - // Log the attach event to the session's provenance directory - let session_prov_dir = store_root - .join("generated") - .join("sessions") - .join(session_id); - let _ = fs::create_dir_all(&session_prov_dir); + let mounts_dir = mount_dir(store_root, session_id); + fs::create_dir_all(&mounts_dir).map_err(InternalizeError::Io)?; + let mount = serde_json::json!({ + "schema_version": SCHEMA_VERSION, + "artifact_id": id, + "session_id": session_id, + "tool": tool, + "lease_id": lease_id, + "lease_seconds": lease_seconds, + "mounted_at": attached_at, + "lease_expires_at": lease_expires_at, + "adapter_hash": inspection.manifest.adapter_hash, + "source_verification": inspection.integrity.source_verification, + "capabilities_contract": inspection.manifest.capabilities_contract, + "risk_classification": inspection.manifest.risk_tier.attach + }); + fs::write( + mount_path(store_root, session_id, id), + serde_json::to_string_pretty(&mount).map_err(InternalizeError::Json)?, + ) + .map_err(InternalizeError::Io)?; + + let session_prov_dir = session_dir(store_root, session_id); + fs::create_dir_all(&session_prov_dir).map_err(InternalizeError::Io)?; let attach_log = session_prov_dir.join(format!("internalize_attach_{}.json", id)); let attach_entry = serde_json::json!({ "op": "internalize.attach", "artifact_id": id, "session_id": session_id, - "timestamp": now, + "tool": tool, + "lease_id": lease_id, + "lease_seconds": lease_seconds, + "lease_expires_at": lease_expires_at, + "timestamp": attached_at, "adapter_hash": inspection.manifest.adapter_hash, "capabilities_contract": inspection.manifest.capabilities_contract, "risk_classification": inspection.manifest.risk_tier.attach, + "source_verification": inspection.integrity.source_verification, }); - let _ = fs::write( - &attach_log, - serde_json::to_string_pretty(&attach_entry).unwrap_or_default(), - ); + fs::write( + attach_log, + serde_json::to_string_pretty(&attach_entry).map_err(InternalizeError::Json)?, + ) + .map_err(InternalizeError::Io)?; Ok(InternalizationAttachResult { schema_version: SCHEMA_VERSION.to_string(), success: true, artifact_id: id.to_string(), session_id: session_id.to_string(), - attached_at: now, + tool: tool.to_string(), + attached_at, + lease_id, + lease_seconds, + lease_expires_at, expires_at: inspection.manifest.expires_at, capabilities_contract: inspection.manifest.capabilities_contract, risk_classification: inspection.manifest.risk_tier.attach, + source_verification: inspection.integrity.source_verification, provenance_entry, }) } -// ── CLI Runner ───────────────────────────────────────────────────────── +pub fn detach_internalization( + store_root: &Path, + id: &str, + session_id: &str, +) -> Result { + let mount_file = mount_path(store_root, session_id, id); + if !mount_file.exists() { + return Err(InternalizeError::MountNotFound { + artifact_id: id.to_string(), + session_id: session_id.to_string(), + }); + } + + let raw = fs::read_to_string(&mount_file).map_err(InternalizeError::Io)?; + let mount: serde_json::Value = serde_json::from_str(&raw).map_err(InternalizeError::Json)?; + let lease_id = mount + .get("lease_id") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + fs::remove_file(&mount_file).map_err(InternalizeError::Io)?; + + let detached_at = now_iso8601(); + let session_prov_dir = session_dir(store_root, session_id); + fs::create_dir_all(&session_prov_dir).map_err(InternalizeError::Io)?; + let detach_log = session_prov_dir.join(format!("internalize_detach_{}.json", id)); + let detach_entry = serde_json::json!({ + "op": "internalize.detach", + "artifact_id": id, + "session_id": session_id, + "lease_id": lease_id, + "timestamp": detached_at, + }); + fs::write( + detach_log, + serde_json::to_string_pretty(&detach_entry).map_err(InternalizeError::Json)?, + ) + .map_err(InternalizeError::Io)?; + + Ok(InternalizationDetachResult { + schema_version: SCHEMA_VERSION.to_string(), + success: true, + artifact_id: id.to_string(), + session_id: session_id.to_string(), + detached_at, + lease_id, + detached: true, + }) +} + +pub fn manifest_json_schema() -> serde_json::Value { + serde_json::json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/manifest-1.2.0.json", + "title": "InternalizationManifest", + "type": "object", + "required": [ + "schema_version", "id", "source_hash", "source_path", "base_model_id", + "internalizer_profile", "internalizer_version", "adapter_format", "created_at", + "ttl_seconds", "provenance", "replay_recipe", "adapter_hash", "adapter_path", + "capabilities_contract", "risk_tier", "determinism_class", "binary_hash", + "runtime_fingerprint" + ], + "properties": { + "schema_version": { "const": SCHEMA_VERSION }, + "id": { "type": "string", "pattern": "^int_[a-f0-9]{24}$" }, + "source_hash": { "type": "string", "pattern": "^[a-f0-9]{64}$" }, + "determinism_class": { "enum": ["deterministic", "best_effort"] }, + "binary_hash": { "type": "string", "minLength": 1 }, + "runtime_fingerprint": { "type": "string", "minLength": 1 } + } + }) +} + +pub fn create_result_json_schema() -> serde_json::Value { + serde_json::json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/create-result-1.2.0.json", + "title": "InternalizationCreateResult", + "type": "object", + "required": [ + "schema_version", "success", "artifact_id", "artifact_path", + "cache_hit", "manifest", "source_hash", "adapter_hash" + ] + }) +} + +pub fn attach_result_json_schema() -> serde_json::Value { + serde_json::json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/attach-result-1.2.0.json", + "title": "InternalizationAttachResult", + "type": "object", + "required": [ + "schema_version", "success", "artifact_id", "session_id", "tool", + "attached_at", "lease_id", "lease_seconds", "lease_expires_at" + ] + }) +} + +pub fn detach_result_json_schema() -> serde_json::Value { + serde_json::json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/detach-result-1.2.0.json", + "title": "InternalizationDetachResult", + "type": "object", + "required": [ + "schema_version", "success", "artifact_id", "session_id", + "detached_at", "lease_id", "detached" + ] + }) +} + +pub fn inspect_result_json_schema() -> serde_json::Value { + serde_json::json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://decapod.dev/schemas/internalization/inspect-result-1.2.0.json", + "title": "InternalizationInspectResult", + "type": "object", + "required": ["schema_version", "artifact_id", "manifest", "integrity", "status"] + }) +} + +pub fn schema() -> serde_json::Value { + serde_json::json!({ + "name": "internalize", + "version": SCHEMA_VERSION, + "description": "Internalized context artifact lifecycle with explicit create, attach lease, detach, and inspect gates", + "commands": [ + { "name": "create", "parameters": ["source", "model", "profile", "ttl", "scope", "format"] }, + { "name": "attach", "parameters": ["id", "session", "tool", "lease_seconds", "format"] }, + { "name": "detach", "parameters": ["id", "session", "format"] }, + { "name": "inspect", "parameters": ["id", "format"] } + ] + }) +} pub fn run_internalize_cli( _store: &Store, @@ -795,25 +1134,38 @@ pub fn run_internalize_cli( println!("{}", serde_json::to_string_pretty(&result).unwrap()); } else { println!("Created internalization artifact: {}", result.artifact_id); - println!(" Source hash: {}", result.source_hash); - println!(" Adapter hash: {}", result.adapter_hash); - println!(" Path: {}", result.artifact_path); } } InternalizeCommand::Attach { + id, + session, + tool, + lease_seconds, + format, + } => { + let result = attach_internalization(store_root, &id, &session, &tool, lease_seconds)?; + if format == "json" { + println!("{}", serde_json::to_string_pretty(&result).unwrap()); + } else { + println!( + "Attached {} to session {} until {}", + result.artifact_id, result.session_id, result.lease_expires_at + ); + } + } + InternalizeCommand::Detach { id, session, format, } => { - let result = attach_internalization(store_root, &id, &session)?; + let result = detach_internalization(store_root, &id, &session)?; if format == "json" { println!("{}", serde_json::to_string_pretty(&result).unwrap()); } else { println!( - "Attached {} to session {}", + "Detached {} from session {}", result.artifact_id, result.session_id ); - println!(" Risk: {}", result.risk_classification); } } InternalizeCommand::Inspect { id, format } => { @@ -823,10 +1175,6 @@ pub fn run_internalize_cli( } else { println!("Artifact: {}", result.artifact_id); println!(" Status: {}", result.status); - println!(" Source hash: {}", result.manifest.source_hash); - println!(" Adapter hash: {}", result.manifest.adapter_hash); - println!(" Profile: {}", result.manifest.internalizer_profile); - println!(" Model: {}", result.manifest.base_model_id); } } } diff --git a/tests/plugins/internalize.rs b/tests/plugins/internalize.rs index 65c27a3b..e48923c6 100644 --- a/tests/plugins/internalize.rs +++ b/tests/plugins/internalize.rs @@ -1,11 +1,9 @@ //! Tests for internalized context artifacts. -//! -//! Proves: manifest determinism, source hash binding, TTL expiry blocking, -//! schema stability, and the full create → attach → inspect lifecycle. use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::Command; + use tempfile::TempDir; fn decapod_bin() -> String { @@ -13,19 +11,17 @@ fn decapod_bin() -> String { } fn setup_project() -> (TempDir, PathBuf) { - let temp_dir = TempDir::new().expect("Failed to create temp dir"); + let temp_dir = TempDir::new().expect("temp dir"); let temp_path = temp_dir.path().to_path_buf(); - // Init decapod let output = Command::new(decapod_bin()) .current_dir(&temp_path) .args(["init", "--force"]) .env("DECAPOD_VALIDATE_SKIP_GIT_GATES", "1") .output() - .expect("Failed to run decapod init"); + .expect("run decapod init"); assert!(output.status.success(), "decapod init failed"); - // Create a sample source document fs::write( temp_path.join("sample_doc.txt"), "This is a sample document for internalization testing.\nIt has multiple lines.\nAnd some content.", @@ -35,32 +31,37 @@ fn setup_project() -> (TempDir, PathBuf) { (temp_dir, temp_path) } -fn run_decapod(dir: &PathBuf, args: &[&str]) -> (bool, String) { +fn run_decapod(dir: &Path, args: &[&str]) -> (bool, String) { let output = Command::new(decapod_bin()) .current_dir(dir) .args(args) .env("DECAPOD_VALIDATE_SKIP_GIT_GATES", "1") .output() - .expect("Failed to execute decapod"); + .expect("execute decapod"); let stdout = String::from_utf8_lossy(&output.stdout).to_string(); let stderr = String::from_utf8_lossy(&output.stderr).to_string(); (output.status.success(), format!("{}\n{}", stdout, stderr)) } -// ── Schema Stability Tests ───────────────────────────────────────────── +fn parse_json_from_output(output: &str) -> serde_json::Value { + let start = output.find('{').expect("json start"); + let end = output.rfind('}').expect("json end"); + serde_json::from_str(&output[start..=end]).expect("json parse") +} #[test] fn test_internalization_manifest_schema_roundtrip() { use decapod::plugins::internalize::{ - CapabilitiesContract, InternalizationManifest, ProvenanceEntry, ReplayRecipe, RiskTier, + CapabilitiesContract, DeterminismClass, InternalizationManifest, ProvenanceEntry, + ReplayClass, ReplayRecipe, RiskTier, SCHEMA_VERSION, }; use std::collections::BTreeMap; let manifest = InternalizationManifest { - schema_version: "1.0.0".to_string(), - id: "01TESTID000000000000000000".to_string(), - source_hash: "abc123".to_string(), + schema_version: SCHEMA_VERSION.to_string(), + id: "int_0123456789abcdef01234567".to_string(), + source_hash: "a".repeat(64), source_path: "/tmp/doc.txt".to_string(), extraction_method: "noop".to_string(), chunking_params: BTreeMap::new(), @@ -75,80 +76,62 @@ fn test_internalization_manifest_schema_roundtrip() { op: "internalize.create".to_string(), timestamp: "2026-02-28T00:00:00Z".to_string(), actor: "decapod-cli".to_string(), - inputs_hash: "abc123".to_string(), + inputs_hash: "a".repeat(64), }], replay_recipe: ReplayRecipe { + mode: ReplayClass::Replayable, command: "decapod".to_string(), args: vec!["internalize".to_string(), "create".to_string()], env: BTreeMap::new(), + reason: "deterministic profile with pinned binary hash".to_string(), }, - adapter_hash: "def456".to_string(), + adapter_hash: "b".repeat(64), adapter_path: "adapter.bin".to_string(), capabilities_contract: CapabilitiesContract { allowed_scopes: vec!["qa".to_string()], - permitted_tools: vec!["*".to_string()], + permitted_tools: vec!["decapod-cli".to_string()], allow_code_gen: false, }, risk_tier: RiskTier::default(), + determinism_class: DeterminismClass::Deterministic, + binary_hash: "c".repeat(64), + runtime_fingerprint: "os=linux arch=x86_64 executable=builtin:noop".to_string(), }; - // Serialize let json = serde_json::to_string_pretty(&manifest).unwrap(); - - // Deserialize let roundtrip: InternalizationManifest = serde_json::from_str(&json).unwrap(); - - assert_eq!(manifest, roundtrip, "Manifest must survive JSON roundtrip"); + assert_eq!(manifest, roundtrip); } #[test] -fn test_create_result_schema_has_required_fields() { - use decapod::plugins::internalize::InternalizationCreateResult; - - let json = r#"{ - "schema_version": "1.0.0", - "success": true, - "artifact_id": "test", - "artifact_path": "/tmp/test", - "manifest": { - "schema_version": "1.0.0", - "id": "test", - "source_hash": "abc", - "source_path": "/tmp/doc.txt", - "extraction_method": "noop", - "chunking_params": {}, - "base_model_id": "model", - "internalizer_profile": "noop", - "internalizer_version": "1.0.0", - "adapter_format": "noop", - "created_at": "2026-02-28T00:00:00Z", - "ttl_seconds": 0, - "provenance": [], - "replay_recipe": {"command": "decapod", "args": [], "env": {}}, - "adapter_hash": "def", - "adapter_path": "adapter.bin", - "capabilities_contract": {"allowed_scopes": [], "permitted_tools": [], "allow_code_gen": false}, - "risk_tier": {"creation": "compute-risky", "attach": "behavior-changing", "inspect": "read-only"} - }, - "source_hash": "abc", - "adapter_hash": "def" - }"#; - - let result: InternalizationCreateResult = serde_json::from_str(json).unwrap(); - assert!(result.success); - assert_eq!(result.schema_version, "1.0.0"); +fn test_schema_files_exist_and_parse() { + let repo_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let schema_dir = repo_root.join("constitution/interfaces/jsonschema/internalization"); + let files = [ + "InternalizationManifest.schema.json", + "InternalizationCreateResult.schema.json", + "InternalizationAttachResult.schema.json", + "InternalizationDetachResult.schema.json", + "InternalizationInspectResult.schema.json", + ]; + + for file in files { + let raw = fs::read_to_string(schema_dir.join(file)).expect("read schema fixture"); + let parsed: serde_json::Value = serde_json::from_str(&raw).expect("parse schema fixture"); + assert!( + parsed.get("$id").is_some(), + "schema {} must declare $id", + file + ); + } } -// ── Manifest Determinism ─────────────────────────────────────────────── - #[test] fn test_manifest_deterministic_for_same_inputs() { use decapod::plugins::internalize::create_internalization; let temp_dir = TempDir::new().unwrap(); let store_root = temp_dir.path().to_path_buf(); - - // Create source doc let doc_path = temp_dir.path().join("doc.txt"); fs::write(&doc_path, "deterministic content").unwrap(); @@ -158,93 +141,75 @@ fn test_manifest_deterministic_for_same_inputs() { "model-v1", "noop", 0, - &[], + &["qa".to_string()], ) .unwrap(); - let r2 = create_internalization( &store_root, doc_path.to_str().unwrap(), "model-v1", "noop", 0, - &[], + &["qa".to_string()], ) .unwrap(); - // Source hashes must be identical for same input - assert_eq!(r1.source_hash, r2.source_hash); - // Adapter hashes must be identical for noop (empty adapter) - assert_eq!(r1.adapter_hash, r2.adapter_hash); - // But artifact IDs must differ (ULIDs) - assert_ne!(r1.artifact_id, r2.artifact_id); + assert!(!r1.cache_hit); + assert!(r2.cache_hit); + assert_eq!(r1.artifact_id, r2.artifact_id); } -// ── Source Hash Binding ──────────────────────────────────────────────── - #[test] -fn test_source_hash_changes_when_document_changes() { - use decapod::plugins::internalize::create_internalization; +fn test_source_hash_binding_is_enforced_on_attach() { + use decapod::plugins::internalize::{attach_internalization, create_internalization}; let temp_dir = TempDir::new().unwrap(); let store_root = temp_dir.path().to_path_buf(); - let doc_path = temp_dir.path().join("doc.txt"); - - // First version fs::write(&doc_path, "version 1").unwrap(); - let r1 = create_internalization( + + let created = create_internalization( &store_root, doc_path.to_str().unwrap(), "model-v1", "noop", 0, - &[], + &["qa".to_string()], ) .unwrap(); - // Modify document fs::write(&doc_path, "version 2").unwrap(); - let r2 = create_internalization( + + let err = attach_internalization( &store_root, - doc_path.to_str().unwrap(), - "model-v1", - "noop", - 0, - &[], + &created.artifact_id, + "session-1", + "decapod-cli", + 1800, ) - .unwrap(); - - assert_ne!( - r1.source_hash, r2.source_hash, - "Source hash must change when document changes" - ); + .unwrap_err(); + assert!(format!("{}", err).contains("Source integrity check failed")); } -// ── TTL Enforcement ──────────────────────────────────────────────────── - #[test] fn test_ttl_blocks_attach_after_expiry() { use decapod::plugins::internalize::{attach_internalization, create_internalization}; let temp_dir = TempDir::new().unwrap(); let store_root = temp_dir.path().to_path_buf(); - let doc_path = temp_dir.path().join("doc.txt"); fs::write(&doc_path, "content").unwrap(); - // Create with TTL=1 second let result = create_internalization( &store_root, doc_path.to_str().unwrap(), "model-v1", "noop", 1, - &[], + &["qa".to_string()], ) .unwrap(); - // Manually set expires_at to the past let art_dir = store_root .join("generated") .join("artifacts") @@ -260,119 +225,78 @@ fn test_ttl_blocks_attach_after_expiry() { ) .unwrap(); - // Attempt attach — should fail with Expired - let err = attach_internalization(&store_root, &result.artifact_id, "test-session"); - assert!(err.is_err(), "Attach must fail on expired artifact"); - let err_msg = format!("{}", err.unwrap_err()); - assert!( - err_msg.contains("expired"), - "Error must mention expiry: {}", - err_msg + let err = attach_internalization( + &store_root, + &result.artifact_id, + "test-session", + "decapod-cli", + 1800, ); + assert!(err.is_err()); } -// ── Full Lifecycle: Create → Inspect → Attach ───────────────────────── - #[test] -fn test_full_lifecycle_create_inspect_attach() { +fn test_full_lifecycle_create_attach_detach_inspect() { use decapod::plugins::internalize::{ - attach_internalization, create_internalization, inspect_internalization, + DeterminismClass, ReplayClass, attach_internalization, create_internalization, + detach_internalization, inspect_internalization, }; let temp_dir = TempDir::new().unwrap(); let store_root = temp_dir.path().to_path_buf(); - let doc_path = temp_dir.path().join("doc.txt"); fs::write(&doc_path, "lifecycle test document").unwrap(); - // CREATE let create_result = create_internalization( &store_root, doc_path.to_str().unwrap(), "claude-sonnet-4-6", "noop", 0, - &["qa".to_string(), "summarization".to_string()], + &["qa".to_string()], ) .unwrap(); - - assert!(create_result.success); - assert_eq!(create_result.manifest.base_model_id, "claude-sonnet-4-6"); - assert_eq!(create_result.manifest.internalizer_profile, "noop"); - assert!(!create_result.source_hash.is_empty()); - - // INSPECT - let inspect_result = inspect_internalization(&store_root, &create_result.artifact_id).unwrap(); - - assert_eq!(inspect_result.status, "valid"); - assert!(inspect_result.integrity.adapter_hash_valid); - assert!(!inspect_result.integrity.expired); assert_eq!( - inspect_result.manifest.source_hash, - create_result.source_hash + create_result.manifest.determinism_class, + DeterminismClass::Deterministic + ); + assert_eq!( + create_result.manifest.replay_recipe.mode, + ReplayClass::Replayable ); - // ATTACH - let attach_result = - attach_internalization(&store_root, &create_result.artifact_id, "session-001").unwrap(); + let inspect_result = inspect_internalization(&store_root, &create_result.artifact_id).unwrap(); + assert_eq!(inspect_result.status, "valid"); + assert!(inspect_result.integrity.replayable_claim_valid); - assert!(attach_result.success); - assert_eq!(attach_result.session_id, "session-001"); - assert_eq!(attach_result.risk_classification, "behavior-changing"); - assert_eq!(attach_result.provenance_entry.op, "internalize.attach"); + let attach_result = attach_internalization( + &store_root, + &create_result.artifact_id, + "session-001", + "decapod-cli", + 900, + ) + .unwrap(); + assert_eq!(attach_result.lease_seconds, 900); - // Verify provenance was logged to session dir - let session_dir = store_root + let mount_path = store_root .join("generated") .join("sessions") - .join("session-001"); - assert!( - session_dir.exists(), - "Session provenance directory must be created" - ); + .join("session-001") + .join("internalize_mounts") + .join(format!("mount_{}.json", create_result.artifact_id)); + assert!(mount_path.exists()); + + let detach_result = + detach_internalization(&store_root, &create_result.artifact_id, "session-001").unwrap(); + assert!(detach_result.detached); + assert!(!mount_path.exists()); } -// ── Noop Profile Tests ───────────────────────────────────────────────── - #[test] -fn test_noop_profile_produces_empty_adapter() { - use decapod::plugins::internalize::InternalizerProfile; - - let temp_dir = TempDir::new().unwrap(); - let output_dir = temp_dir.path().to_path_buf(); - let doc_path = temp_dir.path().join("doc.txt"); - fs::write(&doc_path, "test").unwrap(); - - let profile = InternalizerProfile::noop(); - assert_eq!(profile.name, "noop"); - assert_eq!(profile.adapter_format, "noop"); - - let (adapter_path, _params) = profile.execute(&doc_path, "model", &output_dir).unwrap(); - assert!(adapter_path.exists()); - - let content = fs::read(&adapter_path).unwrap(); - assert!(content.is_empty(), "Noop adapter must produce empty file"); -} - -// ── Risk Tier Tests ──────────────────────────────────────────────────── - -#[test] -fn test_risk_tier_defaults() { - use decapod::plugins::internalize::RiskTier; - - let tier = RiskTier::default(); - assert_eq!(tier.creation, "compute-risky"); - assert_eq!(tier.attach, "behavior-changing"); - assert_eq!(tier.inspect, "read-only"); -} - -// ── CLI Integration (end-to-end via binary) ──────────────────────────── - -#[test] -fn test_cli_create_and_inspect() { +fn test_cli_create_attach_detach_inspect() { let (_temp_dir, temp_path) = setup_project(); - // Create internalization let (success, output) = run_decapod( &temp_path, &[ @@ -388,72 +312,54 @@ fn test_cli_create_and_inspect() { "json", ], ); - assert!( - success, - "internalize create should succeed. Output:\n{}", - output - ); - - // Parse the JSON output to get artifact ID - let stdout_lines: Vec<&str> = output.lines().collect(); - let json_start = stdout_lines - .iter() - .position(|l| l.trim_start().starts_with('{')); - assert!(json_start.is_some(), "Output should contain JSON"); + assert!(success, "create should succeed:\n{}", output); + let created = parse_json_from_output(&output); + let artifact_id = created["artifact_id"].as_str().unwrap(); - // Find matching closing brace - let json_str = &output[output.find('{').unwrap()..]; - let result: serde_json::Value = - serde_json::from_str(&json_str[..json_str.rfind('}').unwrap() + 1]) - .expect("Should parse create result JSON"); - - let artifact_id = result["artifact_id"].as_str().unwrap(); - assert!(!artifact_id.is_empty()); - - // Inspect the artifact let (success, output) = run_decapod( &temp_path, &[ "internalize", - "inspect", + "attach", "--id", artifact_id, + "--session", + "session-123", + "--tool", + "decapod-cli", + "--lease-seconds", + "600", "--format", "json", ], ); - assert!( - success, - "internalize inspect should succeed. Output:\n{}", - output - ); - - let inspect_json_str = &output[output.find('{').unwrap()..]; - let inspect_result: serde_json::Value = - serde_json::from_str(&inspect_json_str[..inspect_json_str.rfind('}').unwrap() + 1]) - .expect("Should parse inspect result JSON"); + assert!(success, "attach should succeed:\n{}", output); - assert_eq!(inspect_result["status"].as_str().unwrap(), "valid"); - assert_eq!(inspect_result["artifact_id"].as_str().unwrap(), artifact_id); -} - -#[test] -fn test_cli_create_with_missing_source_fails() { - let (_temp_dir, temp_path) = setup_project(); - - let (success, _output) = run_decapod( + let (success, output) = run_decapod( &temp_path, &[ "internalize", - "create", - "--source", - "nonexistent.txt", - "--model", - "test-model", + "detach", + "--id", + artifact_id, + "--session", + "session-123", + "--format", + "json", ], ); - assert!( - !success, - "internalize create with missing source should fail" + assert!(success, "detach should succeed:\n{}", output); + + let (success, output) = run_decapod( + &temp_path, + &[ + "internalize", + "inspect", + "--id", + artifact_id, + "--format", + "json", + ], ); + assert!(success, "inspect should succeed:\n{}", output); } diff --git a/tests/validate_optional_artifact_gates.rs b/tests/validate_optional_artifact_gates.rs index dd2cf648..794eef38 100644 --- a/tests/validate_optional_artifact_gates.rs +++ b/tests/validate_optional_artifact_gates.rs @@ -498,3 +498,110 @@ fn validate_fails_on_invalid_context_capsule_policy_contract_if_present() { stderr ); } + +#[test] +fn validate_fails_on_internalization_source_hash_drift_if_present() { + let (_tmp, dir, password) = setup_repo(); + let doc_path = dir.join("doc.txt"); + fs::write(&doc_path, "version 1").expect("write source doc"); + + let create = run_decapod( + &dir, + &[ + "internalize", + "create", + "--source", + "doc.txt", + "--model", + "test-model", + "--profile", + "noop", + "--format", + "json", + ], + &[("DECAPOD_VALIDATE_SKIP_GIT_GATES", "1")], + ); + assert!( + create.status.success(), + "create failed: {}", + combined_output(&create) + ); + + fs::write(&doc_path, "version 2").expect("mutate source doc"); + + let validate = run_decapod( + &dir, + &["validate"], + &[ + ("DECAPOD_AGENT_ID", "unknown"), + ("DECAPOD_SESSION_PASSWORD", &password), + ("DECAPOD_VALIDATE_SKIP_GIT_GATES", "1"), + ], + ); + assert!(!validate.status.success()); + let stderr = combined_output(&validate); + assert!(stderr.contains("Internalization source hash mismatch")); +} + +#[test] +fn validate_fails_on_best_effort_internalization_claiming_replayable() { + let (_tmp, dir, password) = setup_repo(); + let doc_path = dir.join("doc.txt"); + fs::write(&doc_path, "version 1").expect("write source doc"); + + let create = run_decapod( + &dir, + &[ + "internalize", + "create", + "--source", + "doc.txt", + "--model", + "test-model", + "--profile", + "noop", + "--format", + "json", + ], + &[("DECAPOD_VALIDATE_SKIP_GIT_GATES", "1")], + ); + assert!( + create.status.success(), + "create failed: {}", + combined_output(&create) + ); + let created: serde_json::Value = serde_json::from_slice(&create.stdout).expect("create json"); + let artifact_id = created["artifact_id"].as_str().expect("artifact id"); + let manifest_path = dir + .join(".decapod") + .join("generated") + .join("artifacts") + .join("internalizations") + .join(artifact_id) + .join("manifest.json"); + let raw = fs::read_to_string(&manifest_path).expect("read manifest"); + let mut manifest: serde_json::Value = serde_json::from_str(&raw).expect("parse manifest"); + manifest["determinism_class"] = serde_json::Value::String("best_effort".to_string()); + manifest["replay_recipe"]["mode"] = serde_json::Value::String("replayable".to_string()); + fs::write( + &manifest_path, + serde_json::to_vec_pretty(&manifest).expect("serialize manifest"), + ) + .expect("write manifest"); + + let validate = run_decapod( + &dir, + &["validate"], + &[ + ("DECAPOD_AGENT_ID", "unknown"), + ("DECAPOD_SESSION_PASSWORD", &password), + ("DECAPOD_VALIDATE_SKIP_GIT_GATES", "1"), + ], + ); + assert!(!validate.status.success()); + let stderr = combined_output(&validate); + assert!( + stderr.contains("claims replayable despite non-deterministic profile") + || stderr.contains("replay metadata is inconsistent") + ); +}