Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .beads/issues.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@
{"id":"ge-hch.5.15.22","title":"Implement: Director Config UI","description":"Extend AI Settings modal with Director configuration.\n\n## Acceptance Criteria\n- [ ] Risk threshold slider (0.1-0.8, default 0.4) in settings\n- [ ] Enable Director checkbox (default checked)\n- [ ] Settings persist in localStorage\n- [ ] Changes take effect on next choice (no reload)\n- [ ] Invalid values clamped to valid range\n\n## Implementation Notes\n- Extend renderSettingsPanel() in api-key-manager.js\n- Add Director Settings section\n- Bind to settings.directorRiskThreshold and settings.directorEnabled\n\n## Related Feature\nge-hch.5.15.7 (Director Configuration UI)","status":"closed","priority":2,"issue_type":"task","assignee":"@OpenCode","created_at":"2026-01-16T15:04:07.947028051-08:00","created_by":"rgardler","updated_at":"2026-01-16T22:07:33.585947557-08:00","closed_at":"2026-01-16T22:07:33.585947557-08:00","close_reason":"Completed","dependencies":[{"issue_id":"ge-hch.5.15.22","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:04:07.948288344-08:00","created_by":"rgardler"}],"comments":[{"id":195,"issue_id":"ge-hch.5.15.22","author":"rgardler","text":"Added Director controls to AI Settings (enable toggle + risk threshold slider with clamping + persistence). Settings feed inkrunner.js to govern Director usage. No UI yet for telemetry, deferred to ge-hch.5.15.24.","created_at":"2026-01-17T06:07:17Z"},{"id":196,"issue_id":"ge-hch.5.15.22","author":"rgardler","text":"Settings panel now hides the entire AI config when AI choices are disabled, plus Director controls collapse when either AI or Director toggles are off. Keeps UI compact and avoids misleading controls.","created_at":"2026-01-17T06:11:54Z"}]}
{"id":"ge-hch.5.15.23","title":"Tests: Director Config UI","description":"UI tests for Director configuration.\n\n## Acceptance Criteria\n- [ ] Test: changing threshold updates getSettings().directorRiskThreshold\n- [ ] Test: invalid threshold (2.0) clamped to valid range\n- [ ] Test: high threshold (0.8) accepts more proposals than low (0.2)\n- [ ] Test: disabling Director falls back to naive injection\n\n## Related Feature\nge-hch.5.15.7 (Director Configuration UI)","status":"closed","priority":2,"issue_type":"task","assignee":"@OpenCode","created_at":"2026-01-16T15:04:07.991961562-08:00","created_by":"rgardler","updated_at":"2026-01-17T01:40:45.906548983-08:00","closed_at":"2026-01-17T01:40:45.906582258-08:00","dependencies":[{"issue_id":"ge-hch.5.15.23","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:04:07.992789597-08:00","created_by":"rgardler"}],"comments":[{"id":197,"issue_id":"ge-hch.5.15.23","author":"rgardler","text":"Added deterministic mock proposal hook to inkrunner and updated Playwright tests to use mock proposals for Director acceptance tests. This avoids hitting external LLM endpoints and makes approval counts deterministic. Files changed: web/demo/js/inkrunner.js, tests/demo.telemetry.spec.ts. (Assignee: @OpenCode)","created_at":"2026-01-17T07:29:10Z"},{"id":198,"issue_id":"ge-hch.5.15.23","author":"rgardler","text":"Completed Director UI tests and deterministic mock hooks. Added/updated: web/demo/js/inkrunner.js, web/demo/js/director.js, tests/demo.telemetry.spec.ts, tests/unit/director.test.js. Ran unit tests (npm run test:unit) and Playwright demo tests locally; both passed. PR https://github.com/TheWizardsCode/GEngine/pull/156 merged. Deleting local branch feature/ge-hch.5.15-director and remote counterpart after merge. Closing per acceptance criteria: threshold updates, clamping, high/low threshold behavior, and Director disable fallback are covered by tests. (Assignee: @OpenCode)","created_at":"2026-01-17T09:40:44Z"}]}
{"id":"ge-hch.5.15.24","title":"Implement: Decision Telemetry","description":"Add telemetry emission to director.js.\n\n## Acceptance Criteria\n- [ ] emitDecisionTelemetry(decision, metrics) function\n- [ ] Emits director_decision event with proposal_id, timestamp, decision, reason, riskScore, latencyMs, metrics\n- [ ] Uses telemetry.js if available, console.log fallback\n- [ ] Buffers last 50 events in sessionStorage\n\n## Related Feature\nge-hch.5.15.8 (Decision Telemetry Emitter)","acceptance_criteria":"- emitDecisionTelemetry(decision, metrics) function\\n- Emits director_decision event with proposal_id, timestamp, decision, reason, riskScore, latencyMs, metrics\\n- Includes timing fields: writerMs, directorMs, totalMs (ms) in the payload\\n- Uses telemetry.js if available, console.log fallback\\n- Buffers last 50 events in sessionStorage","status":"closed","priority":2,"issue_type":"task","assignee":"@Patch","created_at":"2026-01-16T15:04:16.411083197-08:00","created_by":"rgardler","updated_at":"2026-01-17T15:29:04.002562756-08:00","closed_at":"2026-01-17T15:29:04.002562756-08:00","close_reason":"Merged PR #162 — Completed","dependencies":[{"issue_id":"ge-hch.5.15.24","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:04:16.413016807-08:00","created_by":"rgardler"}],"comments":[{"id":203,"issue_id":"ge-hch.5.15.24","author":"rgardler","text":"Added acceptance criteria: include timing fields writerMs, directorMs, and totalMs in the director_decision telemetry payload. These should record writer latency, director latency, and combined total latency in milliseconds. Update unit and integration tests to assert presence and numeric types for these fields.","created_at":"2026-01-17T20:55:27Z"},{"id":204,"issue_id":"ge-hch.5.15.24","author":"rgardler","text":"Implemented timing fields (writerMs, directorMs, totalMs) in director_decision telemetry, ensuring evaluate and all rejection paths emit them; updated unit + Playwright demo tests to assert presence and numeric types. Ran npm test (unit + Playwright) successfully on branch feature/ge-hch.5.15.24-telemetry.","created_at":"2026-01-17T21:07:40Z"}]}
{"id":"ge-hch.5.15.25","title":"Tests: Decision Telemetry","description":"Unit tests for telemetry emission.\n\n## Acceptance Criteria\n- [ ] Test: decision emits event with all required fields\n- [ ] Test: timestamp is valid ISO8601\n- [ ] Test: missing proposal_id generates UUID\n- [ ] Test: after 5 choices, sessionStorage contains 5 events\n\n## Related Feature\nge-hch.5.15.8 (Decision Telemetry Emitter)","status":"open","priority":2,"issue_type":"task","assignee":"Probe","created_at":"2026-01-16T15:04:16.491963828-08:00","created_by":"rgardler","updated_at":"2026-01-16T15:04:16.491963828-08:00","dependencies":[{"issue_id":"ge-hch.5.15.25","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:04:16.507643334-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.25","title":"Tests: Decision Telemetry","description":"Unit tests for telemetry emission.\n\n## Acceptance Criteria\n- [ ] Test: decision emits event with all required fields\n- [ ] Test: timestamp is valid ISO8601\n- [ ] Test: missing proposal_id generates UUID\n- [ ] Test: after 5 choices, sessionStorage contains 5 events\n\n## Related Feature\nge-hch.5.15.8 (Decision Telemetry Emitter)","status":"in_progress","priority":2,"issue_type":"task","assignee":"@Patch","created_at":"2026-01-16T15:04:16.491963828-08:00","created_by":"rgardler","updated_at":"2026-01-18T01:03:32.447823274-08:00","dependencies":[{"issue_id":"ge-hch.5.15.25","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:04:16.507643334-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.26","title":"Docs: Decision Telemetry Schema","description":"Document telemetry event schema.\n\n## Acceptance Criteria\n- [ ] Document director_decision event fields\n- [ ] Include example JSON event\n- [ ] Note sessionStorage buffer behavior\n- [ ] Reference to telemetry-schema.md design doc\n\n## Related Feature\nge-hch.5.15.8 (Decision Telemetry Emitter)","status":"open","priority":2,"issue_type":"task","assignee":"Scribbler","created_at":"2026-01-16T15:04:16.562304471-08:00","created_by":"rgardler","updated_at":"2026-01-16T15:04:16.562304471-08:00","dependencies":[{"issue_id":"ge-hch.5.15.26","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:04:16.563250103-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.3","title":"Risk Scorer (3+3 Metrics)","description":"Compute a risk score that predicts whether a branch will feel coherent to the player.\n\n## Player Experience Change\nPlayers will see fewer 'off' or jarring AI branches. Branches that don't fit the narrative pacing or have low Writer confidence are filtered out.\n\n## Acceptance Criteria\n- [ ] Computes weighted risk score (0.0–1.0), where 0.0=safe, 1.0=high risk\n- [ ] Active metrics implemented:\n - `proposal_confidence_risk`: `1.0 - proposal.metadata.confidence_score`\n - `narrative_pacing_risk`: based on branch length vs. expected range\n - `return_path_confidence_risk`: from return-path checker\n- [ ] Placeholder metrics return configurable defaults (0.3):\n - `thematic_consistency_risk`, `lore_adherence_risk`, `character_voice_risk`\n- [ ] Consistent: same input → same output\n- [ ] Determinism test: 10 calls with same input produce identical riskScore\n- [ ] Unit test: high-confidence proposal (0.9) → low risk score (\u003c0.3)\n- [ ] Unit test: low-confidence proposal (0.3) → high risk score (\u003e0.5)\n- [ ] Unit test: very long branch (\u003e500 tokens in exposition phase) → elevated pacing risk\n\n## Minimal Implementation\n- Create `computeRiskScore(proposal, context, config)` function\n- Implement 3 active metrics\n- Weighted average with default weights from design doc\n\n## Dependencies\n- ge-hch.5.15.1 (Decision Flow Engine)\n- ge-hch.5.15.2 (Return-Path Feasibility Checker)\n\n## Deliverables\n- Risk scorer in director.js\n- Unit tests for each metric\n- Config schema for weights","status":"closed","priority":1,"issue_type":"feature","assignee":"Patch","created_at":"2026-01-16T15:01:50.954803291-08:00","created_by":"rgardler","updated_at":"2026-01-17T11:36:20.913696503-08:00","closed_at":"2026-01-17T11:36:20.913696503-08:00","close_reason":"PR merged (gh-158) — risk scorer implemented","external_ref":"gh-158","labels":["Status: PR Created"],"dependencies":[{"issue_id":"ge-hch.5.15.3","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:01:50.955629677-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.3","depends_on_id":"ge-hch.5.15.1","type":"blocks","created_at":"2026-01-16T15:04:32.2862167-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.3","depends_on_id":"ge-hch.5.15.2","type":"blocks","created_at":"2026-01-16T15:04:32.327828266-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.4","title":"Embedding Service (transformers.js)","description":"Provide local semantic similarity using transformers.js for future intelligent risk metrics.\n\n## Player Experience Change\nNone immediately — this is infrastructure for deferred metrics (thematic consistency, LORE adherence, character voice). Enables future improvements without additional API costs.\n\n## Acceptance Criteria\n- [ ] Model runs in WebWorker (UI thread not blocked)\n- [ ] API: `embed(text)` returns embedding vector\n- [ ] API: `similarity(vec1, vec2)` returns cosine similarity (0.0–1.0)\n- [ ] Model loads lazily on first `embed()` call\n- [ ] Graceful fallback: if model fails to load, `embed()` returns null, `similarity()` returns 0.5\n- [ ] Unit test: `similarity('happy', 'joyful')` \u003e 0.7\n- [ ] Unit test: `similarity('happy', 'database')` \u003c 0.4\n- [ ] Unit test: `embed(null)` returns null gracefully\n- [ ] Performance test: first embed() \u003c 3s (model load); subsequent \u003c 100ms\n\n## Minimal Implementation\n- Create `web/demo/js/embedding-service.js`\n- Load `Xenova/all-MiniLM-L6-v2` via transformers.js\n- WebWorker wrapper for non-blocking inference\n- Cache embeddings for repeated texts\n\n## Dependencies\n- None (parallel development)\n\n## Deliverables\n- `web/demo/js/embedding-service.js`\n- WebWorker script\n- Unit tests with sample texts","status":"open","priority":2,"issue_type":"feature","created_at":"2026-01-16T15:02:02.704393975-08:00","created_by":"rgardler","updated_at":"2026-01-16T15:02:02.704393975-08:00","dependencies":[{"issue_id":"ge-hch.5.15.4","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:02:02.70547581-08:00","created_by":"rgardler"}]}
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/director.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ describe('Director core', () => {
global.window = global.window || {};
global.window.Telemetry = { emit: jest.fn() };

// Simple sessionStorage mock
const store = new Map();
global.sessionStorage = {
getItem: (k) => (store.has(k) ? store.get(k) : null),
setItem: (k, v) => store.set(k, String(v)),
removeItem: (k) => store.delete(k),
clear: () => store.clear(),
};

// Default ProposalValidator that approves
global.window.ProposalValidator = {
quickValidate: jest.fn(() => ({ valid: true }))
Expand All @@ -23,6 +32,7 @@ describe('Director core', () => {
afterEach(() => {
// clean up any globals we set
delete global.window.__proposalValidReturnPaths;
delete global.sessionStorage;
});

it('approves a low-risk proposal and returns latencyMs', async () => {
Expand Down Expand Up @@ -219,4 +229,45 @@ describe('Director core', () => {
expect(payload).toHaveProperty('decision');
});

it('telemetry payload includes required fields and ISO timestamp; fills missing proposal_id; buffers 5 decisions', async () => {
const story = { mainContentContainer: { _namedContent: { campfire: {} } } };
const baseProposal = { content: { text: 'Short content', return_path: 'campfire' }, metadata: { confidence_score: 0.9 } };

const payloads = [];
for (let i = 0; i < 5; i += 1) {
const proposal = { ...baseProposal, id: i === 0 ? undefined : `p-${i}` };
const res = await Director.evaluate(proposal, { story }, { riskThreshold: 0.5 });
expect(res).toHaveProperty('decision');
const calls = global.window.Telemetry.emit.mock.calls.filter(c => c[0] === 'director_decision');
const call = calls[calls.length - 1];
expect(call).toBeTruthy();
const payload = call[1];
payloads.push(payload);
}

// Required fields
payloads.forEach((p) => {
expect(p).toHaveProperty('proposal_id');
expect(p).toHaveProperty('decision');
expect(p).toHaveProperty('reason');
expect(p).toHaveProperty('riskScore');
expect(p).toHaveProperty('latencyMs');
expect(p).toHaveProperty('writerMs');
expect(p).toHaveProperty('directorMs');
expect(p).toHaveProperty('totalMs');
expect(p).toHaveProperty('timestamp');
expect(p).toHaveProperty('metrics');

// Timestamp is valid ISO8601
expect(() => new Date(p.timestamp).toISOString()).not.toThrow();
});

// First call had no proposal.id; ensure uuid filled
expect(payloads[0].proposal_id).toMatch(/^uuid-|^[0-9a-f]{8}-/i);

// sessionStorage contains 5 events
const stored = JSON.parse(global.sessionStorage.getItem('ge-hch.director.telemetry') || '[]');
expect(stored.length).toBe(5);
});

});