Skip to content

Commit 67ac1d6

Browse files
author
StackMemory Bot (CLI)
committed
feat(conductor): add learning loop — outcome logging, templated prompts, learn command
- Log structured AgentOutcomeEntry to outcomes.jsonl on every agent completion/failure - Extract error tail from output.log for failure pattern analysis - Externalize buildPrompt() to support custom prompt-template.md with {{VARIABLE}} substitution - Add `conductor learn` command: analyzes outcomes, reports success rate, failure phases, error patterns, and actionable recommendations - Supports --export (JSON), --failures-only, --last N filters
1 parent 9646080 commit 67ac1d6

File tree

2 files changed

+353
-10
lines changed

2 files changed

+353
-10
lines changed

src/cli/commands/orchestrate.ts

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import { logger } from '../../core/monitoring/logger.js';
2525
import { Conductor } from './orchestrator.js';
2626
import {
2727
getAgentStatusDir,
28+
getOutcomesLogPath,
29+
type AgentOutcomeEntry,
2830
type AgentPhase,
2931
type AgentStatusFile,
3032
} from './orchestrator.js';
@@ -824,6 +826,234 @@ export function createConductorCommands(): Command {
824826
});
825827
});
826828

829+
// --- learn ---
830+
cmd
831+
.command('learn')
832+
.description(
833+
'Analyze agent outcomes and generate improved prompt templates'
834+
)
835+
.option('--last <n>', 'Analyze last N outcomes (default: all)', '0')
836+
.option('--failures-only', 'Only analyze failures', false)
837+
.option('--export', 'Export analysis as JSON', false)
838+
.action(async (options) => {
839+
const logPath = getOutcomesLogPath();
840+
if (!existsSync(logPath)) {
841+
console.log(
842+
`${c.yellow}No outcomes log found.${c.r} Run conductor to generate data.`
843+
);
844+
return;
845+
}
846+
847+
const raw = readFileSync(logPath, 'utf-8')
848+
.trim()
849+
.split('\n')
850+
.filter((l) => l.length > 0);
851+
852+
let outcomes: AgentOutcomeEntry[] = raw.map(
853+
(line) => JSON.parse(line) as AgentOutcomeEntry
854+
);
855+
856+
if (options.failuresOnly) {
857+
outcomes = outcomes.filter((o) => o.outcome === 'failure');
858+
}
859+
860+
const lastN = parseInt(options.last, 10);
861+
if (lastN > 0) {
862+
outcomes = outcomes.slice(-lastN);
863+
}
864+
865+
if (outcomes.length === 0) {
866+
console.log(`${c.gray}No matching outcomes to analyze.${c.r}`);
867+
return;
868+
}
869+
870+
// Aggregate stats
871+
const total = outcomes.length;
872+
const successes = outcomes.filter((o) => o.outcome === 'success').length;
873+
const failures = outcomes.filter((o) => o.outcome === 'failure').length;
874+
const successRate = Math.round((successes / total) * 100);
875+
876+
const avgTokens = Math.round(
877+
outcomes.reduce((s, o) => s + o.tokensUsed, 0) / total
878+
);
879+
const avgDuration = Math.round(
880+
outcomes.reduce((s, o) => s + o.durationMs, 0) / total / 60000
881+
);
882+
const avgTools = Math.round(
883+
outcomes.reduce((s, o) => s + o.toolCalls, 0) / total
884+
);
885+
886+
// Phase distribution at failure
887+
const failPhases: Record<string, number> = {};
888+
for (const o of outcomes.filter((o) => o.outcome === 'failure')) {
889+
failPhases[o.phase] = (failPhases[o.phase] || 0) + 1;
890+
}
891+
892+
// Retry analysis
893+
const retries = outcomes.filter((o) => o.attempt > 1);
894+
const retrySuccessRate =
895+
retries.length > 0
896+
? Math.round(
897+
(retries.filter((o) => o.outcome === 'success').length /
898+
retries.length) *
899+
100
900+
)
901+
: 0;
902+
903+
// Error pattern extraction
904+
const errorPatterns: Record<string, number> = {};
905+
for (const o of outcomes.filter(
906+
(o) => o.outcome === 'failure' && o.errorTail
907+
)) {
908+
// Extract key error patterns from tail
909+
const tail = o.errorTail!;
910+
if (tail.includes('lint'))
911+
errorPatterns['lint_failure'] =
912+
(errorPatterns['lint_failure'] || 0) + 1;
913+
else if (tail.includes('test'))
914+
errorPatterns['test_failure'] =
915+
(errorPatterns['test_failure'] || 0) + 1;
916+
else if (tail.includes('timeout') || tail.includes('timed out'))
917+
errorPatterns['timeout'] = (errorPatterns['timeout'] || 0) + 1;
918+
else if (tail.includes('conflict'))
919+
errorPatterns['git_conflict'] =
920+
(errorPatterns['git_conflict'] || 0) + 1;
921+
else if (tail.includes('429') || tail.includes('rate'))
922+
errorPatterns['rate_limit'] = (errorPatterns['rate_limit'] || 0) + 1;
923+
else errorPatterns['unknown'] = (errorPatterns['unknown'] || 0) + 1;
924+
}
925+
926+
if (options.export) {
927+
const analysis = {
928+
total,
929+
successes,
930+
failures,
931+
successRate,
932+
avgTokens,
933+
avgDurationMin: avgDuration,
934+
avgToolCalls: avgTools,
935+
failPhases,
936+
retrySuccessRate,
937+
errorPatterns,
938+
outcomes,
939+
};
940+
console.log(JSON.stringify(analysis, null, 2));
941+
return;
942+
}
943+
944+
// Display report
945+
console.log(`\n ${c.b}${c.purple}Conductor Learning Report${c.r}\n`);
946+
947+
const rateColor =
948+
successRate >= 80 ? c.green : successRate >= 50 ? c.yellow : c.red;
949+
console.log(
950+
` ${c.b}Outcomes${c.r} ${c.white}${total}${c.r} total ${c.green}${successes}${c.r} success ${c.red}${failures}${c.r} failed ${rateColor}${successRate}%${c.r} success rate`
951+
);
952+
console.log(
953+
` ${c.b}Averages${c.r} ${c.white}${avgDuration}m${c.r} duration ${c.white}${fmtTokens(avgTokens)}${c.r} tokens ${c.white}${avgTools}${c.r} tool calls`
954+
);
955+
956+
if (retries.length > 0) {
957+
console.log(
958+
` ${c.b}Retries${c.r} ${c.white}${retries.length}${c.r} attempts ${c.white}${retrySuccessRate}%${c.r} retry success rate`
959+
);
960+
}
961+
962+
// Failure phase breakdown
963+
if (failures > 0) {
964+
console.log(`\n ${c.b}Failure Phases${c.r}`);
965+
const sorted = Object.entries(failPhases).sort((a, b) => b[1] - a[1]);
966+
for (const [phase, count] of sorted) {
967+
const pct = Math.round((count / failures) * 100);
968+
const bar = progressBar(pct, 10);
969+
console.log(
970+
` ${phaseIcon[phase as AgentPhase] || '○'} ${phase.padEnd(14)} ${bar} ${c.white}${count}${c.r} ${c.gray}(${pct}%)${c.r}`
971+
);
972+
}
973+
}
974+
975+
// Error patterns
976+
if (Object.keys(errorPatterns).length > 0) {
977+
console.log(`\n ${c.b}Error Patterns${c.r}`);
978+
const sorted = Object.entries(errorPatterns).sort(
979+
(a, b) => b[1] - a[1]
980+
);
981+
for (const [pattern, count] of sorted) {
982+
console.log(
983+
` ${c.red}${c.r} ${pattern.padEnd(16)} ${c.white}${count}${c.r}`
984+
);
985+
}
986+
}
987+
988+
// Recommendations
989+
console.log(`\n ${c.b}Recommendations${c.r}`);
990+
const recs: string[] = [];
991+
992+
if (errorPatterns['lint_failure'] > 0) {
993+
recs.push(
994+
'Add explicit lint rules to prompt template (ESLint conventions, import style)'
995+
);
996+
}
997+
if (errorPatterns['test_failure'] > 0) {
998+
recs.push(
999+
'Add "run tests before committing" emphasis, include test command in prompt'
1000+
);
1001+
}
1002+
if (errorPatterns['timeout'] > 0) {
1003+
recs.push(
1004+
'Reduce scope per issue or increase turnTimeoutMs in conductor config'
1005+
);
1006+
}
1007+
if (failPhases['implementing'] > failures * 0.5) {
1008+
recs.push(
1009+
'Agents stall during implementation — add examples or break issues smaller'
1010+
);
1011+
}
1012+
if (failPhases['reading'] > 0) {
1013+
recs.push(
1014+
'Agents fail during reading — improve issue descriptions or add context pointers'
1015+
);
1016+
}
1017+
if (retrySuccessRate < 30 && retries.length > 2) {
1018+
recs.push(
1019+
'Low retry success — consider better prior-attempt context injection'
1020+
);
1021+
}
1022+
if (successRate >= 80) {
1023+
recs.push(
1024+
'High success rate — current prompt template is working well'
1025+
);
1026+
}
1027+
1028+
if (recs.length === 0) {
1029+
recs.push('Collect more data for actionable recommendations');
1030+
}
1031+
1032+
for (const rec of recs) {
1033+
console.log(` ${c.cyan}${c.r} ${rec}`);
1034+
}
1035+
1036+
// Prompt template hint
1037+
const templatePath = join(
1038+
homedir(),
1039+
'.stackmemory',
1040+
'conductor',
1041+
'prompt-template.md'
1042+
);
1043+
if (!existsSync(templatePath)) {
1044+
console.log(
1045+
`\n ${c.d}Tip: Create ${templatePath} to customize agent prompts.${c.r}`
1046+
);
1047+
console.log(
1048+
` ${c.d}Variables: {{ISSUE_ID}} {{TITLE}} {{DESCRIPTION}} {{LABELS}} {{PRIORITY}} {{ATTEMPT}} {{PRIOR_CONTEXT}}${c.r}`
1049+
);
1050+
} else {
1051+
console.log(`\n ${c.d}Using custom template: ${templatePath}${c.r}`);
1052+
}
1053+
1054+
console.log('');
1055+
});
1056+
8271057
// --- usage ---
8281058
cmd
8291059
.command('usage')

0 commit comments

Comments
 (0)