feat(conductor): smart retry strategy and GEPA conductor profile

StackMemory Bot (CLI) · StackMemory Bot (CLI) · commit ef1c109e1687 · 2026-03-11T22:06:28.000-04:00
- Add getRetryStrategy() — skips retries on rate limits and repeated
  same-phase failures, injects adjustments (timeout, lint, test hints)
  into retry prompt context
- Log failure outcomes in attemptRun catch block for retry analysis
- Add GEPA --profile conductor mode with conductor-specific evals
  (7 tasks testing prompt quality for bug fixes, features, retries)
- Add conductor mutation strategies: add_guardrails, improve_error_handling
- Wire retry adjustments into buildPrompt via PRIOR_CONTEXT variable
diff --git a/scripts/gepa/config.json b/scripts/gepa/config.json
@@ -55,5 +55,46 @@
     "trackTokenUsage": true,
     "trackErrors": true,
     "trackUserFeedback": true
+  },
+
+  "profiles": {
+    "claude-md": {
+      "target": {
+        "file": "CLAUDE.md",
+        "scope": "project",
+        "backup": true
+      },
+      "evolution": {
+        "mutationStrategies": [
+          "rephrase",
+          "add_examples",
+          "remove_redundancy",
+          "restructure",
+          "add_constraints",
+          "simplify"
+        ]
+      },
+      "evals": {
+        "files": ["stackmemory-tasks.jsonl", "coding-tasks.jsonl"]
+      }
+    },
+    "conductor": {
+      "target": {
+        "file": "~/.stackmemory/conductor/prompt-template.md",
+        "scope": "user",
+        "backup": true
+      },
+      "evolution": {
+        "mutationStrategies": [
+          "add_guardrails",
+          "improve_error_handling",
+          "add_examples",
+          "simplify"
+        ]
+      },
+      "evals": {
+        "files": ["conductor-tasks.jsonl"]
+      }
+    }
   }
 }
diff --git a/scripts/gepa/evals/conductor-tasks.jsonl b/scripts/gepa/evals/conductor-tasks.jsonl
@@ -0,0 +1,7 @@
+{"id": "cond-001", "name": "bug_fix_includes_validation", "prompt": "You are an AI coding agent given this prompt template output:\n\n---\nYou are working on Linear issue STA-142: Fix null pointer in frame lookup\n\n## Description\n\nWhen a frame is deleted and then looked up by ID, the sqlite adapter throws an unhandled null reference instead of returning undefined. Stack trace attached.\n\nLabels: bug\nPriority: High\n\n## Instructions\n\n1. Read the issue description carefully\n2. Implement the requested changes\n3. Write or update tests as needed\n4. Run lint and tests to verify\n5. Commit your changes with a descriptive message\n\nWork in the current directory. All changes will be on a dedicated branch.\n---\n\nDoes this prompt adequately guide an agent to fix a bug? Evaluate whether it includes: lint/test commands, commit format guidance, error handling expectations, and clear bug reproduction context. List what is present and what is missing.", "expected": {"mentions_lint_test": "prompt should include or guide agent to run lint and test commands", "commit_format": "prompt should specify commit message format like type(scope): message", "bug_context": "prompt should guide the agent to understand the bug before fixing", "validation_step": "prompt should include a verification/validation step after the fix"}, "weight": 1.5}
+{"id": "cond-002", "name": "feature_issue_guides_implementation", "prompt": "You are an AI coding agent given this prompt template output:\n\n---\nYou are working on Linear issue STA-200: Add tag filtering to frame search\n\n## Description\n\nUsers should be able to filter frames by tags in the search API. Add a `tags` parameter (string[]) to the search method that filters results to only include frames with matching tags. Use FTS5 for the text search portion.\n\nLabels: feature, search\nPriority: Medium\n\n## Instructions\n\n1. Read the issue description carefully\n2. Implement the requested changes\n3. Write or update tests as needed\n4. Run lint and tests to verify\n5. Commit your changes with a descriptive message\n\nWork in the current directory. All changes will be on a dedicated branch.\n---\n\nEvaluate whether this prompt adequately guides a feature implementation. Check for: implementation guidance, test requirements, code quality expectations, commit conventions, and whether it tells the agent to check existing patterns before writing new code.", "expected": {"implementation_guidance": "prompt should guide the agent through implementation steps", "test_requirements": "prompt should specify writing tests for the new feature", "existing_patterns": "prompt should tell agent to follow existing code patterns", "code_quality": "prompt should mention lint, type safety, or code quality checks", "commit_conventions": "prompt should specify commit message format"}, "weight": 1.5}
+{"id": "cond-003", "name": "retry_context_handling", "prompt": "You are an AI coding agent given this prompt template output for a retry attempt:\n\n---\nYou are working on Linear issue STA-305: Migrate config schema to v3\n\n## Description\n\nUpdate the config file schema from v2 to v3. Add the new 'integrations' top-level key and migrate existing linear settings under it.\n\nLabels: chore\nPriority: Medium\n\nThis is attempt 2. Check .stackmemory/conductor-context.md for context from prior attempts.\n\n## Instructions\n\n1. Read the issue description carefully\n2. Implement the requested changes\n3. Write or update tests as needed\n4. Run lint and tests to verify\n5. Commit your changes with a descriptive message\n\nWork in the current directory. All changes will be on a dedicated branch.\n---\n\nEvaluate whether this retry prompt adequately guides the agent on attempt 2. Check: does it tell the agent to read prior context first, does it explain what might have gone wrong, does it suggest a different approach, does it include enough context about what was already tried?", "expected": {"reads_prior_context": "prompt should instruct agent to read prior attempt context before starting", "avoids_repeating_mistakes": "prompt should guide agent to understand what failed previously", "different_approach": "prompt should suggest trying a different approach if prior attempt failed", "preserves_prior_work": "prompt should tell agent to check if partial work exists from prior attempt"}, "weight": 2.0}
+{"id": "cond-004", "name": "commit_format_guidance", "prompt": "You are an AI coding agent given this prompt template output:\n\n---\nYou are working on Linear issue STA-180: Add rate limiting to webhook endpoint\n\n## Description\n\nAdd rate limiting (100 req/min per IP) to the webhook handler to prevent abuse.\n\nLabels: feature, security\nPriority: High\n\n## Instructions\n\n1. Read the issue description carefully\n2. Implement the requested changes\n3. Write or update tests as needed\n4. Run lint and tests to verify\n5. Commit your changes with a descriptive message\n\nWork in the current directory. All changes will be on a dedicated branch.\n---\n\nEvaluate whether this prompt gives adequate commit guidance. Check: does it specify the commit message format (e.g. feat(scope): message), does it mention including the Linear issue ID, does it tell the agent to make atomic commits, does it specify branch naming conventions?", "expected": {"commit_format_specified": "prompt should specify type(scope): message format", "includes_issue_id": "prompt should tell agent to reference the Linear issue ID in commits", "atomic_commits": "prompt should guide making focused, atomic commits", "branch_conventions": "prompt should mention branch naming or confirm branch is set up"}, "weight": 1.3}
+{"id": "cond-005", "name": "no_description_handling", "prompt": "You are an AI coding agent given this prompt template output for an issue with no description:\n\n---\nYou are working on Linear issue STA-410: Fix typo in error message\n\nLabels: \nPriority: Low\n\n## Instructions\n\n1. Read the issue description carefully\n2. Implement the requested changes\n3. Write or update tests as needed\n4. Run lint and tests to verify\n5. Commit your changes with a descriptive message\n\nWork in the current directory. All changes will be on a dedicated branch.\n---\n\nEvaluate how well this prompt handles an issue with no description and no labels. Check: does the prompt degrade gracefully without a description, does it guide the agent to search the codebase for context, does it handle empty labels cleanly, does it still provide useful instructions despite minimal input?", "expected": {"graceful_without_description": "prompt should still be useful without a description section", "guides_codebase_search": "prompt should tell agent to search codebase for relevant context when description is missing", "handles_empty_labels": "empty labels should not produce awkward formatting", "minimal_input_useful": "prompt should provide enough structure even with minimal issue data"}, "weight": 1.8}
+{"id": "cond-006", "name": "urgent_priority_handling", "prompt": "You are an AI coding agent given this prompt template output for an urgent issue:\n\n---\nYou are working on Linear issue STA-501: Production crash in webhook handler\n\n## Description\n\nThe webhook handler is crashing in production with an unhandled promise rejection when the Linear API returns a 502. This is blocking all issue syncing. Error: UnhandledPromiseRejection at webhookHandler:45\n\nLabels: bug, production\nPriority: Urgent\n\n## Instructions\n\n1. Read the issue description carefully\n2. Implement the requested changes\n3. Write or update tests as needed\n4. Run lint and tests to verify\n5. Commit your changes with a descriptive message\n\nWork in the current directory. All changes will be on a dedicated branch.\n---\n\nEvaluate whether this prompt handles urgent/production issues appropriately. Check: does it convey urgency, does it guide the agent to prioritize a fix over perfect code, does it suggest checking for similar issues, does it emphasize testing the error path specifically?", "expected": {"conveys_urgency": "prompt should differentiate urgent issues from normal priority", "fix_over_perfection": "prompt should guide agent to prioritize a working fix for urgent issues", "error_path_testing": "prompt should emphasize testing the specific error scenario", "production_awareness": "prompt should include guidance about production-impacting changes"}, "weight": 1.5}
+{"id": "cond-007", "name": "template_variable_completeness", "prompt": "Review this conductor prompt template for completeness:\n\n---\nYou are working on Linear issue {{ISSUE_ID}}: {{TITLE}}\n\n## Description\n\n{{DESCRIPTION}}\n\nLabels: {{LABELS}}\nPriority: {{PRIORITY}}\n\n{{PRIOR_CONTEXT}}\n\n## Instructions\n\n1. Read the issue description carefully\n2. Implement the requested changes\n3. Write or update tests as needed\n4. Run lint and tests to verify\n5. Commit your changes with a descriptive message\n\nWork in the current directory. All changes will be on a dedicated branch.\n---\n\nEvaluate this template's completeness. Check: are all template variables used, does it include project-specific commands (npm run lint, npm run test:run), does it specify coding conventions, does it mention the commit format, does it handle the case where DESCRIPTION or LABELS might be empty?", "expected": {"all_variables_used": "template should use all available variables (ISSUE_ID, TITLE, DESCRIPTION, LABELS, PRIORITY, ATTEMPT, PRIOR_CONTEXT)", "project_commands": "template should include specific commands like npm run lint, npm run test:run, npm run build", "coding_conventions": "template should reference coding conventions or link to CLAUDE.md", "empty_variable_handling": "template should handle empty DESCRIPTION or LABELS gracefully"}, "weight": 1.3}
diff --git a/scripts/gepa/optimize.js b/scripts/gepa/optimize.js
@@ -38,6 +38,37 @@ if (fs.existsSync(envPath)) {
 const CONFIG_PATH = path.join(__dirname, 'config.json');
 const config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
 
+// Profile support: --profile <name> overrides config sections
+const profileIdx = process.argv.indexOf('--profile');
+const profileName = profileIdx !== -1 ? process.argv[profileIdx + 1] : null;
+if (profileName) {
+  // Remove --profile <name> from argv so it doesn't interfere with command parsing
+  process.argv.splice(profileIdx, 2);
+
+  const profiles = config.profiles || {};
+  if (!profiles[profileName]) {
+    console.error(
+      `Error: Unknown profile "${profileName}". Available: ${Object.keys(profiles).join(', ')}`
+    );
+    process.exit(1);
+  }
+
+  const profile = profiles[profileName];
+
+  // Merge profile overrides into config
+  if (profile.target) {
+    Object.assign(config.target, profile.target);
+  }
+  if (profile.evolution?.mutationStrategies) {
+    config.evolution.mutationStrategies = profile.evolution.mutationStrategies;
+  }
+  if (profile.evals?.files) {
+    config.evals.files = profile.evals.files;
+  }
+
+  console.log(`Using profile: ${profileName}`);
+}
+
 const GEPA_DIR = process.env.GEPA_DIR || __dirname;
 const GENERATIONS_DIR = path.join(GEPA_DIR, 'generations');
 const RESULTS_DIR = path.join(GEPA_DIR, 'results');
@@ -87,7 +118,10 @@ function getGenPath(gen, variant = null) {
  * Initialize GEPA with current CLAUDE.md
  */
 async function init(targetPath) {
-  const claudeMdPath = targetPath || path.join(process.cwd(), 'CLAUDE.md');
+  const resolvedTarget = targetPath || config.target.file || 'CLAUDE.md';
+  const claudeMdPath = resolvedTarget.startsWith('~')
+    ? path.join(process.env.HOME, resolvedTarget.slice(1))
+    : path.resolve(resolvedTarget);
 
   if (!fs.existsSync(claudeMdPath)) {
     console.error(`Error: ${claudeMdPath} not found`);
@@ -183,6 +217,10 @@ async function generateMutation(content, strategy, state) {
     add_constraints: `Add specific constraints and guardrails based on common failure modes. Be precise about what NOT to do.`,
 
     simplify: `Simplify complex instructions. Break down multi-step rules. Use bullet points over paragraphs.`,
+
+    add_guardrails: `Add guardrails for common agent failure modes: forgetting to run tests, wrong commit format, not reading prior context on retries, not handling empty fields. Add explicit "DO NOT" rules where agents commonly go wrong.`,
+
+    improve_error_handling: `Improve how the prompt handles edge cases and errors: empty descriptions, missing labels, retry attempts, urgent priorities. Add conditional sections and fallback instructions for when data is incomplete.`,
   };
 
   const prompt = `You are optimizing a CLAUDE.md system prompt for an AI coding agent.
@@ -386,10 +424,10 @@ async function runEval(variantName) {
 
   console.log(`Running evals on ${variantName}...`);
 
-  // Load eval tasks
-  const evalFiles = fs
-    .readdirSync(EVALS_DIR)
-    .filter((f) => f.endsWith('.jsonl'));
+  // Load eval tasks (use profile-specific files if set, otherwise all .jsonl)
+  const evalFiles = config.evals.files
+    ? config.evals.files.filter((f) => fs.existsSync(path.join(EVALS_DIR, f)))
+    : fs.readdirSync(EVALS_DIR).filter((f) => f.endsWith('.jsonl'));
   const tasks = evalFiles.flatMap((f) =>
     fs
       .readFileSync(path.join(EVALS_DIR, f), 'utf8')
@@ -849,5 +887,9 @@ Usage:
   node optimize.js run [generations]     Full optimization loop
   node optimize.js status                Show current status
   node optimize.js diff [a] [b]          Compare two variants
+
+Options:
+  --profile <name>                       Use a named profile (default: claude-md)
+                                         Available: claude-md, conductor
 `);
 }
diff --git a/src/cli/commands/orchestrator.ts b/src/cli/commands/orchestrator.ts