Skip to content

Commit 40a2802

Browse files
author
StackMemory Bot (CLI)
committed
feat(conductor): auto-PR creation, evolve dry-run, intelligence tests
- Auto-create GitHub PRs after successful agent commits via gh CLI with --no-pr flag to disable; PRs also created for finalized stale agents - Add --dry-run flag to conductor learn --evolve — shows diff without writing, for reviewing proposed template changes before applying - Add 19 tests for conductor intelligence: getRetryStrategy, estimateIssueComplexity, selectModelForIssue, predictDifficulty
1 parent 6753e32 commit 40a2802

File tree

4 files changed

+455
-1
lines changed

4 files changed

+455
-1
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@stackmemoryai/stackmemory",
3-
"version": "1.6.1",
3+
"version": "1.6.2",
44
"description": "Lossless, project-scoped memory for AI coding tools. Durable context across sessions with 56 MCP tools, FTS5 search, conductor orchestrator, loop/watch monitoring, snapshot capture, pre-flight overlap checks, Claude/Codex/OpenCode wrappers, Linear sync, and automatic hooks.",
55
"engines": {
66
"node": ">=20.0.0",
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
import { describe, it, expect } from 'vitest';
2+
import {
3+
getRetryStrategy,
4+
estimateIssueComplexity,
5+
selectModelForIssue,
6+
type AgentOutcomeEntry,
7+
} from '../orchestrator.js';
8+
import { predictDifficulty } from '../orchestrate.js';
9+
import type { LinearIssue } from '../../../integrations/linear/client.js';
10+
11+
// ── Helpers ──
12+
13+
function makeOutcome(
14+
overrides: Partial<AgentOutcomeEntry> = {}
15+
): AgentOutcomeEntry {
16+
return {
17+
timestamp: new Date().toISOString(),
18+
issue: 'STA-100',
19+
attempt: 1,
20+
outcome: 'failure',
21+
phase: 'implementing',
22+
toolCalls: 30,
23+
filesModified: 2,
24+
tokensUsed: 15000,
25+
durationMs: 60000,
26+
hasCommits: false,
27+
...overrides,
28+
};
29+
}
30+
31+
function makeIssue(overrides: Partial<LinearIssue> = {}): LinearIssue {
32+
return {
33+
id: 'id-1',
34+
identifier: 'STA-100',
35+
title: 'Fix something',
36+
description: 'A short description',
37+
state: { id: 's1', name: 'Todo', type: 'unstarted' },
38+
priority: 3,
39+
labels: [],
40+
createdAt: '2026-03-01T00:00:00Z',
41+
updatedAt: '2026-03-01T00:00:00Z',
42+
url: 'https://linear.app/test/STA-100',
43+
...overrides,
44+
};
45+
}
46+
47+
// ── getRetryStrategy ──
48+
49+
describe('getRetryStrategy', () => {
50+
it('returns shouldRetry: true with empty outcomes', () => {
51+
const result = getRetryStrategy('STA-100', []);
52+
expect(result.shouldRetry).toBe(true);
53+
expect(result.adjustments).toEqual([]);
54+
});
55+
56+
it('returns shouldRetry: false when last failure was rate_limit (429)', () => {
57+
const outcomes = [
58+
makeOutcome({
59+
issue: 'STA-100',
60+
errorTail: 'Error: 429 Too Many Requests\nrate limit exceeded',
61+
}),
62+
];
63+
const result = getRetryStrategy('STA-100', outcomes);
64+
expect(result.shouldRetry).toBe(false);
65+
expect(result.reason).toContain('rate limit');
66+
});
67+
68+
it('returns shouldRetry: false when issue failed 2+ times on same phase', () => {
69+
const outcomes = [
70+
makeOutcome({ issue: 'STA-200', phase: 'testing' }),
71+
makeOutcome({ issue: 'STA-200', phase: 'testing' }),
72+
];
73+
const result = getRetryStrategy('STA-200', outcomes);
74+
expect(result.shouldRetry).toBe(false);
75+
expect(result.reason).toContain('structural problem');
76+
expect(result.reason).toContain('testing');
77+
});
78+
79+
it('returns adjustments including timeout hint when failure was timeout', () => {
80+
const outcomes = [
81+
makeOutcome({
82+
issue: 'STA-100',
83+
errorTail: 'ETIMEDOUT: operation timed out',
84+
}),
85+
];
86+
const result = getRetryStrategy('STA-100', outcomes);
87+
expect(result.shouldRetry).toBe(true);
88+
expect(result.adjustments.some((a) => /timeout|timed/i.test(a))).toBe(true);
89+
});
90+
91+
it('returns adjustments including lint hint when failure was lint error', () => {
92+
const outcomes = [
93+
makeOutcome({
94+
issue: 'STA-100',
95+
errorTail: 'eslint: 3 errors found\nformatting issues',
96+
}),
97+
];
98+
const result = getRetryStrategy('STA-100', outcomes);
99+
expect(result.shouldRetry).toBe(true);
100+
expect(result.adjustments.some((a) => /lint/i.test(a))).toBe(true);
101+
});
102+
103+
it('returns adjustments including test hint when failure was test error', () => {
104+
const outcomes = [
105+
makeOutcome({
106+
issue: 'STA-100',
107+
errorTail: 'FAIL src/test.ts\nexpect(received).toBe(expected)',
108+
}),
109+
];
110+
const result = getRetryStrategy('STA-100', outcomes);
111+
expect(result.shouldRetry).toBe(true);
112+
expect(result.adjustments.some((a) => /test/i.test(a))).toBe(true);
113+
});
114+
});
115+
116+
// ── estimateIssueComplexity ──
117+
118+
describe('estimateIssueComplexity', () => {
119+
it("returns 'simple' for short description with bug label", () => {
120+
const issue = makeIssue({
121+
description: 'Short bug',
122+
labels: [{ id: 'l1', name: 'bug' }],
123+
priority: 4,
124+
});
125+
expect(estimateIssueComplexity(issue)).toBe('simple');
126+
});
127+
128+
it("returns 'complex' for long description with feature label and high priority", () => {
129+
const issue = makeIssue({
130+
description: 'A'.repeat(900),
131+
labels: [{ id: 'l1', name: 'feature' }],
132+
priority: 1,
133+
estimate: 5,
134+
});
135+
expect(estimateIssueComplexity(issue)).toBe('complex');
136+
});
137+
138+
it("returns 'moderate' for average issue", () => {
139+
const issue = makeIssue({
140+
description: 'A'.repeat(500),
141+
priority: 3,
142+
estimate: 3,
143+
});
144+
expect(estimateIssueComplexity(issue)).toBe('moderate');
145+
});
146+
147+
it('bumps complexity when attempt > 1', () => {
148+
const issue = makeIssue({
149+
description: 'A moderate task',
150+
priority: 4,
151+
});
152+
const base = estimateIssueComplexity(issue, 1);
153+
const bumped = estimateIssueComplexity(issue, 2);
154+
// Bumped should be >= base complexity level
155+
const levels = ['simple', 'moderate', 'complex'];
156+
expect(levels.indexOf(bumped)).toBeGreaterThanOrEqual(levels.indexOf(base));
157+
});
158+
});
159+
160+
// ── selectModelForIssue ──
161+
162+
describe('selectModelForIssue', () => {
163+
const baseConfig = {
164+
activeStates: ['Todo'],
165+
terminalStates: ['Done', 'Cancelled'],
166+
inProgressState: 'In Progress',
167+
inReviewState: 'In Review',
168+
pollIntervalMs: 30000,
169+
maxConcurrent: 3,
170+
workspaceRoot: '/tmp',
171+
repoRoot: '/tmp',
172+
baseBranch: 'main',
173+
appServerPath: '',
174+
turnTimeoutMs: 3600000,
175+
maxRetries: 1,
176+
hookTimeoutMs: 60000,
177+
agentMode: 'cli' as const,
178+
};
179+
180+
it("returns sonnet for 'simple'", () => {
181+
const model = selectModelForIssue('simple', baseConfig);
182+
expect(model).toContain('sonnet');
183+
});
184+
185+
it("returns sonnet for 'moderate'", () => {
186+
const model = selectModelForIssue('moderate', baseConfig);
187+
expect(model).toContain('sonnet');
188+
});
189+
190+
it("returns opus for 'complex'", () => {
191+
const model = selectModelForIssue('complex', baseConfig);
192+
expect(model).toContain('opus');
193+
});
194+
195+
it("returns forced model when config.model is set (not 'auto')", () => {
196+
const config = { ...baseConfig, model: 'claude-haiku-3' };
197+
expect(selectModelForIssue('simple', config)).toBe('claude-haiku-3');
198+
expect(selectModelForIssue('complex', config)).toBe('claude-haiku-3');
199+
});
200+
});
201+
202+
// ── predictDifficulty ──
203+
204+
describe('predictDifficulty', () => {
205+
it("returns 'easy' for bug fix with short description", () => {
206+
const result = predictDifficulty(['bug'], 'Fix null check', 4, []);
207+
expect(result.difficulty).toBe('easy');
208+
expect(result.reasons.length).toBeGreaterThan(0);
209+
});
210+
211+
it("returns 'hard' for feature with long description and high priority", () => {
212+
const result = predictDifficulty(['feature'], 'A'.repeat(600), 1, []);
213+
expect(result.difficulty).toBe('hard');
214+
});
215+
216+
it("returns 'medium' as default", () => {
217+
const result = predictDifficulty([], 'Some task', 3, []);
218+
expect(result.difficulty).toBe('medium');
219+
expect(result.reasons).toContain(
220+
'No strong signals — defaulting to medium'
221+
);
222+
});
223+
224+
it('increases confidence with more signals', () => {
225+
const minimal = predictDifficulty([], 'x', 4, []);
226+
const rich = predictDifficulty(['feature'], 'A'.repeat(600), 1, []);
227+
expect(rich.confidence).toBeGreaterThan(minimal.confidence);
228+
});
229+
230+
it('uses historical outcomes to adjust difficulty', () => {
231+
// 4 matching outcomes with 100% failure rate → hard
232+
const outcomes: AgentOutcomeEntry[] = Array.from({ length: 4 }, () =>
233+
makeOutcome({
234+
outcome: 'failure',
235+
labels: ['feature'],
236+
toolCalls: 90,
237+
})
238+
);
239+
const result = predictDifficulty(['feature'], 'Implement X', 3, outcomes);
240+
expect(result.difficulty).toBe('hard');
241+
expect(
242+
result.reasons.some(
243+
(r) => /historical/i.test(r) || /failure rate/i.test(r)
244+
)
245+
).toBe(true);
246+
});
247+
});

0 commit comments

Comments
 (0)