Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .beads/issues.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
{"id":"ge-hch.5.15.26","title":"Docs: Decision Telemetry Schema","description":"Document telemetry event schema.\n\n## Acceptance Criteria\n- [ ] Document director_decision event fields\n- [ ] Include example JSON event\n- [ ] Note sessionStorage buffer behavior\n- [ ] Reference to telemetry-schema.md design doc\n\n## Related Feature\nge-hch.5.15.8 (Decision Telemetry Emitter)","notes":"PR #175 merged; doc aligned to web demo emitter; branch cleaned up","status":"closed","priority":2,"issue_type":"task","assignee":"Scribbler","created_at":"2026-01-16T15:04:16.562304471-08:00","created_by":"rgardler","updated_at":"2026-01-18T01:52:07.656506577-08:00","closed_at":"2026-01-18T01:52:07.656516168-08:00","close_reason":"Docs updated","dependencies":[{"issue_id":"ge-hch.5.15.26","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:04:16.563250103-08:00","created_by":"rgardler"}],"comments":[{"id":215,"issue_id":"ge-hch.5.15.26","author":"rgardler","text":"Updated docs/dev/m2-design/telemetry-schema.md: expanded telemetry with field table, clarified fit-vs-risk convention, added sessionStorage buffering note, and linked to director-algorithm telemetry emission section. (ge-hch.5.15.26)","created_at":"2026-01-18T09:44:20Z"},{"id":216,"issue_id":"ge-hch.5.15.26","author":"rgardler","text":"Aligned doc to current web demo telemetry emitter (Option A): documented flat director_decision payload (decision/reason/riskScore/timing + metrics.*), added example with payload block, and clarified sessionStorage ring buffer key ge-hch.director.telemetry (last 50).","created_at":"2026-01-18T09:48:39Z"}]}
{"id":"ge-hch.5.15.3","title":"Risk Scorer (3+3 Metrics)","description":"Compute a risk score that predicts whether a branch will feel coherent to the player.\n\n## Player Experience Change\nPlayers will see fewer 'off' or jarring AI branches. Branches that don't fit the narrative pacing or have low Writer confidence are filtered out.\n\n## Acceptance Criteria\n- [ ] Computes weighted risk score (0.0–1.0), where 0.0=safe, 1.0=high risk\n- [ ] Active metrics implemented:\n - `proposal_confidence_risk`: `1.0 - proposal.metadata.confidence_score`\n - `narrative_pacing_risk`: based on branch length vs. expected range\n - `return_path_confidence_risk`: from return-path checker\n- [ ] Placeholder metrics return configurable defaults (0.3):\n - `thematic_consistency_risk`, `lore_adherence_risk`, `character_voice_risk`\n- [ ] Consistent: same input → same output\n- [ ] Determinism test: 10 calls with same input produce identical riskScore\n- [ ] Unit test: high-confidence proposal (0.9) → low risk score (\u003c0.3)\n- [ ] Unit test: low-confidence proposal (0.3) → high risk score (\u003e0.5)\n- [ ] Unit test: very long branch (\u003e500 tokens in exposition phase) → elevated pacing risk\n\n## Minimal Implementation\n- Create `computeRiskScore(proposal, context, config)` function\n- Implement 3 active metrics\n- Weighted average with default weights from design doc\n\n## Dependencies\n- ge-hch.5.15.1 (Decision Flow Engine)\n- ge-hch.5.15.2 (Return-Path Feasibility Checker)\n\n## Deliverables\n- Risk scorer in director.js\n- Unit tests for each metric\n- Config schema for weights","status":"closed","priority":1,"issue_type":"feature","assignee":"Patch","created_at":"2026-01-16T15:01:50.954803291-08:00","created_by":"rgardler","updated_at":"2026-01-17T11:36:20.913696503-08:00","closed_at":"2026-01-17T11:36:20.913696503-08:00","close_reason":"PR merged (gh-158) — risk scorer implemented","external_ref":"gh-158","labels":["Status: PR Created"],"dependencies":[{"issue_id":"ge-hch.5.15.3","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:01:50.955629677-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.3","depends_on_id":"ge-hch.5.15.1","type":"blocks","created_at":"2026-01-16T15:04:32.2862167-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.3","depends_on_id":"ge-hch.5.15.2","type":"blocks","created_at":"2026-01-16T15:04:32.327828266-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.4","title":"Embedding Service (transformers.js)","description":"Provide local semantic similarity using transformers.js for future intelligent risk metrics.\n\n## Player Experience Change\nNone immediately — this is infrastructure for deferred metrics (thematic consistency, LORE adherence, character voice). Enables future improvements without additional API costs.\n\n## Acceptance Criteria\n- [ ] Model runs in WebWorker (UI thread not blocked)\n- [ ] API: `embed(text)` returns embedding vector\n- [ ] API: `similarity(vec1, vec2)` returns cosine similarity (0.0–1.0)\n- [ ] Model loads lazily on first `embed()` call\n- [ ] Graceful fallback: if model fails to load, `embed()` returns null, `similarity()` returns 0.5\n- [ ] Unit test: `similarity('happy', 'joyful')` \u003e 0.7\n- [ ] Unit test: `similarity('happy', 'database')` \u003c 0.4\n- [ ] Unit test: `embed(null)` returns null gracefully\n- [ ] Performance test: first embed() \u003c 3s (model load); subsequent \u003c 100ms\n\n## Minimal Implementation\n- Create `web/demo/js/embedding-service.js`\n- Load `Xenova/all-MiniLM-L6-v2` via transformers.js\n- WebWorker wrapper for non-blocking inference\n- Cache embeddings for repeated texts\n\n## Dependencies\n- None (parallel development)\n\n## Deliverables\n- `web/demo/js/embedding-service.js`\n- WebWorker script\n- Unit tests with sample texts","notes":"PR #176 merged; embedding service caching delivered; branch cleaned up","status":"closed","priority":2,"issue_type":"feature","assignee":"@Patch","created_at":"2026-01-16T15:02:02.704393975-08:00","created_by":"rgardler","updated_at":"2026-01-18T01:57:54.712774906-08:00","closed_at":"2026-01-18T01:57:54.712785828-08:00","external_ref":"https://github.com/TheWizardsCode/GEngine/pull/176","labels":["Status: PR Created"],"dependencies":[{"issue_id":"ge-hch.5.15.4","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:02:02.70547581-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.5","title":"Player Preference Tracker","description":"Track which types of AI branches the player accepts/rejects to personalize future offers.\n\n## Player Experience Change\nOver time, the system learns player preferences. Players who prefer exploration branches will see more exploration options offered; players who reject dialogue-heavy branches will see fewer.\n\n## Acceptance Criteria\n- [ ] Records: `{ branchType, accepted: boolean, timestamp }` on each Director decision\n- [ ] Computes preference score per branch type (0.0–1.0, based on accept ratio)\n- [ ] Persists in localStorage key `ge-hch.ai-preferences`\n- [ ] Cold-start: returns 0.5 for all types when no history\n- [ ] API: `getPreference(branchType)` → number\n- [ ] API: `recordOutcome(branchType, accepted)` → void\n- [ ] Unit test: after 3 accepts + 1 reject of 'dialogue', preference \u003e 0.6\n- [ ] Unit test: after 0 history, preference = 0.5\n- [ ] Unit test: after 100+ events, preference calculation remains performant (\u003c10ms)\n- [ ] Integration: risk scorer uses preference to adjust player_preference_risk\n\n## Minimal Implementation\n- Create `web/demo/js/player-preference.js`\n- Track accept/reject counts per branch type\n- Simple ratio calculation with smoothing\n\n## Dependencies\n- ge-hch.5.15.3 (Risk Scorer)\n\n## Deliverables\n- `web/demo/js/player-preference.js`\n- Unit tests\n- Integration with localStorage","status":"open","priority":2,"issue_type":"feature","created_at":"2026-01-16T15:02:12.247694133-08:00","created_by":"rgardler","updated_at":"2026-01-16T15:02:12.247694133-08:00","dependencies":[{"issue_id":"ge-hch.5.15.5","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:02:12.248718041-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.5","depends_on_id":"ge-hch.5.15.3","type":"blocks","created_at":"2026-01-16T15:04:32.372750464-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.5","title":"Player Preference Tracker","description":"Track which types of AI branches the player accepts/rejects to personalize future offers.\n\n## Player Experience Change\nOver time, the system learns player preferences. Players who prefer exploration branches will see more exploration options offered; players who reject dialogue-heavy branches will see fewer.\n\n## Acceptance Criteria\n- [ ] Records: `{ branchType, accepted: boolean, timestamp }` on each Director decision\n- [ ] Computes preference score per branch type (0.0–1.0, based on accept ratio)\n- [ ] Persists in localStorage key `ge-hch.ai-preferences`\n- [ ] Cold-start: returns 0.5 for all types when no history\n- [ ] API: `getPreference(branchType)` → number\n- [ ] API: `recordOutcome(branchType, accepted)` → void\n- [ ] Unit test: after 3 accepts + 1 reject of 'dialogue', preference \u003e 0.6\n- [ ] Unit test: after 0 history, preference = 0.5\n- [ ] Unit test: after 100+ events, preference calculation remains performant (\u003c10ms)\n- [ ] Integration: risk scorer uses preference to adjust player_preference_risk\n\n## Minimal Implementation\n- Create `web/demo/js/player-preference.js`\n- Track accept/reject counts per branch type\n- Simple ratio calculation with smoothing\n\n## Dependencies\n- ge-hch.5.15.3 (Risk Scorer)\n\n## Deliverables\n- `web/demo/js/player-preference.js`\n- Unit tests\n- Integration with localStorage","status":"in_progress","priority":2,"issue_type":"feature","assignee":"@Patch","created_at":"2026-01-16T15:02:12.247694133-08:00","created_by":"rgardler","updated_at":"2026-01-18T02:17:56.541875938-08:00","external_ref":"https://github.com/TheWizardsCode/GEngine/pull/177","labels":["Status: PR Created"],"dependencies":[{"issue_id":"ge-hch.5.15.5","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:02:12.248718041-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.5","depends_on_id":"ge-hch.5.15.3","type":"blocks","created_at":"2026-01-16T15:04:32.372750464-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.6","title":"Director Integration \u0026 Injection","description":"Replace naive injection with Director-governed injection so only quality branches reach players.\n\n## Player Experience Change\n**This is the key user-facing change.** Players no longer see an AI choice at every decision point. Instead, AI choices only appear when the Director determines they're contextually appropriate. Some choice points may have no AI option. Players experience improved narrative coherence — AI options feel like they belong in the story.\n\n## Acceptance Criteria\n- [ ] inkrunner calls `director.evaluate()` before injecting AI choice\n- [ ] AI choice injected only if `decision === 'approve'`\n- [ ] Silent skip if `decision === 'reject'` (no AI choice shown, no error)\n- [ ] Loading indicator shows 'Evaluating AI choice...' during Director evaluation\n- [ ] Total latency (Writer + Director) visible in console as telemetry event\n- [ ] Playthrough test: complete demo.ink with mix of accepted/rejected AI branches\n- [ ] Playthrough test: no runtime errors when Director rejects all branches\n- [ ] Playtest test: 3 internal players complete demo; rate coherence ≥4/5\n\n## Minimal Implementation\n- Modify `generateAIChoice()` in inkrunner.js\n- After writer returns proposal, call `director.evaluate(proposal, storyContext)`\n- Conditionally inject based on decision\n- Log rejection reasons to console\n\n## Dependencies\n- ge-hch.5.15.1 (Decision Flow Engine)\n- ge-hch.5.15.2 (Return-Path Feasibility Checker)\n- ge-hch.5.15.3 (Risk Scorer)\n\n## Deliverables\n- Modified inkrunner.js\n- Integration tests with mocked Director","status":"closed","priority":1,"issue_type":"feature","assignee":"Patch","created_at":"2026-01-16T15:02:22.771903526-08:00","created_by":"rgardler","updated_at":"2026-01-17T12:15:52.250196378-08:00","closed_at":"2026-01-17T12:15:52.250196378-08:00","close_reason":"Completed: tests added and demo verification passed; PR #160 merged","dependencies":[{"issue_id":"ge-hch.5.15.6","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:02:22.772701315-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.6","depends_on_id":"ge-hch.5.15.1","type":"blocks","created_at":"2026-01-16T15:04:32.417478849-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.6","depends_on_id":"ge-hch.5.15.2","type":"blocks","created_at":"2026-01-16T15:04:32.458937914-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.6","depends_on_id":"ge-hch.5.15.3","type":"blocks","created_at":"2026-01-16T15:04:32.500388326-08:00","created_by":"rgardler"}],"comments":[{"id":199,"issue_id":"ge-hch.5.15.6","author":"rgardler","text":"Added Director gating coverage in inkrunner unit tests: approve path injects AI button with telemetry; reject path skips silently with telemetry; mixed approve/reject sequences retain authored choices. Targeted unit run: npm run test:unit -- --runTestsByPath tests/unit/inkrunner.test.js tests/unit/director.test.js (pass).","created_at":"2026-01-17T19:59:31Z"},{"id":200,"issue_id":"ge-hch.5.15.6","author":"rgardler","text":"Extended inkrunner AI integration tests to simulate reject-all playthrough and mixed approve/reject sequences: authored choices remain when Director rejects every proposal; only approved AI choice shows when one passes. Re-ran targeted unit suites: npm run test:unit -- --runTestsByPath tests/unit/inkrunner.test.js tests/unit/director.test.js (pass).","created_at":"2026-01-17T20:02:15Z"},{"id":201,"issue_id":"ge-hch.5.15.6","author":"rgardler","text":"Ran full Playwright demo suite after freeing port 4173 (killed http-server pid 7186). Command: npm run test:demo. Result: 17 passed, 1 skipped (tap-only context). Confirms Director gating and telemetry behaviors in UI flows.","created_at":"2026-01-17T20:03:26Z"}]}
{"id":"ge-hch.5.15.7","title":"Director Configuration UI","description":"Let players tune Director sensitivity via the settings panel.\n\n## Player Experience Change\nPlayers can adjust how selective the Director is. Lower risk threshold = stricter filtering (fewer AI branches but higher quality). Higher threshold = more permissive (more AI branches but potentially less coherent). Power users can disable Director entirely to return to naive injection mode.\n\n## Acceptance Criteria\n- [ ] Risk threshold slider (0.1–0.8, default 0.4) in AI Settings modal\n- [ ] 'Enable Director' checkbox (default: checked)\n- [ ] When disabled, falls back to naive injection (all valid proposals accepted)\n- [ ] Settings persist in localStorage\n- [ ] UI changes take effect on next choice point (no page reload needed)\n- [ ] Unit test: changing threshold updates `getSettings().directorRiskThreshold`\n- [ ] Unit test: invalid threshold value (e.g., 2.0) is clamped to valid range\n- [ ] Integration test: high threshold (0.8) accepts more proposals than low threshold (0.2)\n\n## Minimal Implementation\n- Extend `renderSettingsPanel()` in api-key-manager.js\n- Add 'Director Settings' section below 'AI Settings'\n- Bind slider to `settings.directorRiskThreshold`\n- Bind checkbox to `settings.directorEnabled`\n\n## Dependencies\n- ge-hch.5.15.6 (Director Integration \u0026 Injection)\n\n## Deliverables\n- Extended api-key-manager.js\n- UI tests","status":"open","priority":2,"issue_type":"feature","created_at":"2026-01-16T15:02:32.281278376-08:00","created_by":"rgardler","updated_at":"2026-01-16T15:02:32.281278376-08:00","dependencies":[{"issue_id":"ge-hch.5.15.7","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:02:32.282245731-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.7","depends_on_id":"ge-hch.5.15.6","type":"blocks","created_at":"2026-01-16T15:04:32.543472979-08:00","created_by":"rgardler"}]}
{"id":"ge-hch.5.15.8","title":"Decision Telemetry Emitter","description":"Emit telemetry events for Director decisions to enable future analysis and tuning.\n\n## Player Experience Change\nNone directly visible. Enables the team to analyze Director performance, identify common rejection reasons, and tune risk weights based on real data.\n\n## Acceptance Criteria\n- [ ] Emits `director_decision` event on each `evaluate()` call\n- [ ] Event includes: `{ proposal_id, timestamp, decision, reason, riskScore, latencyMs, metrics: { confidence, pacing, returnPath, thematic, lore, voice } }`\n- [ ] Uses existing telemetry.js if available; console.log fallback otherwise\n- [ ] Events stored in sessionStorage buffer for offline analysis (last 50 events)\n- [ ] Unit test: decision emits event with all required fields\n- [ ] Unit test: event timestamp is valid ISO8601\n- [ ] Unit test: event without proposal_id still emits with generated UUID\n- [ ] Integration test: after 5 choices, sessionStorage contains 5 telemetry events\n\n## Minimal Implementation\n- Create `emitDecisionTelemetry(decision, metrics)` in director.js\n- Integrate with telemetry.js or console.log\n- Buffer recent events in sessionStorage\n\n## Dependencies\n- ge-hch.5.15.1 (Decision Flow Engine)\n\n## Deliverables\n- Telemetry emitter in director.js\n- Event schema documentation","status":"closed","priority":2,"issue_type":"feature","assignee":"@Patch","created_at":"2026-01-16T15:02:44.228894318-08:00","created_by":"rgardler","updated_at":"2026-01-17T12:34:58.682680447-08:00","closed_at":"2026-01-17T12:34:58.682680447-08:00","close_reason":"Completed","external_ref":"https://github.com/TheWizardsCode/GEngine/pull/161","labels":["Status: PR Created"],"dependencies":[{"issue_id":"ge-hch.5.15.8","depends_on_id":"ge-hch.5.15","type":"parent-child","created_at":"2026-01-16T15:02:44.229808395-08:00","created_by":"rgardler"},{"issue_id":"ge-hch.5.15.8","depends_on_id":"ge-hch.5.15.1","type":"blocks","created_at":"2026-01-16T15:04:32.584486358-08:00","created_by":"rgardler"}],"comments":[{"id":202,"issue_id":"ge-hch.5.15.8","author":"rgardler","text":"Implemented director_decision telemetry emitter with sessionStorage buffer (50), ISO timestamps, UUID fallback. Added unit tests for schema, timestamp validity, buffer cap, evaluate integration; ran jest: tests/unit/director.telemetry.test.js tests/unit/director.test.js tests/unit/inkrunner.test.js (all pass).","created_at":"2026-01-17T20:24:00Z"}]}
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/director.risk-preference.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
jest.mock('../../web/demo/js/player-preference', () => {
return {
getPreference: jest.fn(() => 0.5),
};
});

const PlayerPreference = require('../../web/demo/js/player-preference');

// Expose mock to Director via global before requiring Director
beforeAll(() => {
global.PlayerPreference = PlayerPreference;
});

afterAll(() => {
delete global.PlayerPreference;
});

const Director = require('../../web/demo/js/director');

const proposal = {
content: {
text: 'Some branch text',
branch_type: 'dialogue',
},
metadata: { confidence_score: 0.8 },
};

describe('Director computeRiskScore with player preference', () => {
beforeEach(() => {
PlayerPreference.getPreference.mockReset();
});

test('uses PlayerPreference.getPreference when available', () => {
PlayerPreference.getPreference.mockReturnValue(0.9);
const score = Director.computeRiskScore(proposal, {}, {});
expect(score).toBeLessThan(0.27);
expect(PlayerPreference.getPreference).toHaveBeenCalledWith('dialogue');
});


test('falls back to 0.5 when preference is NaN', () => {
PlayerPreference.getPreference.mockReturnValue(NaN);
const score = Director.computeRiskScore(proposal, {}, {});
expect(PlayerPreference.getPreference).toHaveBeenCalled();
expect(score).toBeGreaterThan(0.25); // higher risk because pref risk = 0.5 -> risk 0.5
});

test('accepts config override getPreference', () => {
const cfg = {
getPreference: () => 1.0,
weights: { player_preference: 0.2 },
};
const score = Director.computeRiskScore(proposal, {}, cfg);
expect(score).toBeLessThan(0.25);
});
});
Loading