Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Ruff

on:
pull_request:
branches: [ main ]
push:
branches: [ main ]

jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff
- name: Run Ruff linter
run: ruff check .
- name: Run Ruff formatter
run: ruff format --check .
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies = [
"langchain-anthropic>=0.3.16,<0.4.0",
"simple-term-menu>=1.6.4",
"langgraph-checkpoint-sqlite (>=2.0.10,<3.0.0)",
"langmem (==0.0.27)",
]

[project.scripts]
Expand Down Expand Up @@ -64,6 +65,9 @@ langchain-ollama = "^0.3.3"
langchain-anthropic = "^0.3.16"
simple-term-menu = "^1.6.4"

[tool.poetry.group.dev.dependencies]
ruff = "^0.12.4"

[tool.setuptools.packages.find]
where = ["src"]

Expand Down
8 changes: 8 additions & 0 deletions src/katalyst/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,11 @@
# Project specific
".katalyst",
}

#TODO: Explanatory Variable Names (Config Variables)
# Maximum number of tokens to return in the final output. Will be enforced only after summarization.
MAX_AGGREGATE_TOKENS_IN_SUMMARY_AND_OUTPUT = 50000 # 50k
# Maximum number of tokens to accumulate before triggering summarization.
MAX_TOKENS_TO_TRIGGER_SUMMARY = 40000 # 40k
# Maximum number of tokens to budget for the summary.
MAX_TOKENS_IN_SUMMARY_ONLY = 8000 # 8k
17 changes: 15 additions & 2 deletions src/katalyst/coding_agent/nodes/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
from langchain_core.tools import StructuredTool
from langchain_core.messages import HumanMessage
from langgraph.prebuilt import create_react_agent
from katalyst.coding_agent.nodes.summarizer import get_summarization_node
from langgraph.prebuilt.chat_agent_executor import AgentState as LangGraphAgentState
from typing import Any

class ReactAgentState(LangGraphAgentState):
"""
Custom state for the ReactAgent.
"""
context: dict[str, Any]


# Simple planner prompt - no complex guidelines
Expand Down Expand Up @@ -138,12 +147,16 @@ def sync_wrapper(**kwargs):
timeout=timeout,
api_base=api_base
)

summarization_node = get_summarization_node()
# Create the agent executor with checkpointer if available
state.agent_executor = create_react_agent(
model=agent_model,
tools=tools,
checkpointer=state.checkpointer if hasattr(state, 'checkpointer') else False
pre_model_hook=summarization_node,
state_schema=ReactAgentState,
checkpointer=state.checkpointer if hasattr(state, 'checkpointer') else False,
# uncomment for debugging
debug=False,
)

# Initialize conversation with the plan
Expand Down
101 changes: 101 additions & 0 deletions src/katalyst/coding_agent/nodes/summarizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from langmem.short_term import SummarizationNode
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages.utils import count_tokens_approximately
from katalyst.katalyst_core.services.llms import get_llm_client
from katalyst.katalyst_core.utils.logger import get_logger
from katalyst.app.config import MAX_AGGREGATE_TOKENS_IN_SUMMARY_AND_OUTPUT, MAX_TOKENS_TO_TRIGGER_SUMMARY, MAX_TOKENS_IN_SUMMARY_ONLY
logger = get_logger()

#(Reference: https://www.reddit.com/r/ClaudeAI/comments/1jr52qj/here_is_claude_codes_compact_prompt/)
SUMMARIZATION_PROMPT = """
Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.

Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:

1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
- The user's explicit requests and intents
- Your approach to addressing the user's requests
- Key decisions, technical concepts and code patterns
- Specific details like file names, full code snippets, function signatures, file edits, etc
2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.

Your summary should include the following sections:

1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Pay special attention to the most recent messages and include full code snippets where applicable and include a summary of why this file read or edit is important.
4. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
5. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
6. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include file names and code snippets where applicable.
7. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests without confirming with the user first.
8. If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.

Here's an example of how your output should be structured:

<example>
<analysis>
[Your thought process, ensuring all points are covered thoroughly and accurately]
</analysis>

<summary>
1. Primary Request and Intent:
[Detailed description]

2. Key Technical Concepts:
- [Concept 1]
- [Concept 2]
- [...]

3. Files and Code Sections:
- [File Name 1]
- [Summary of why this file is important]
- [Summary of the changes made to this file, if any]
- [Important Code Snippet]
- [File Name 2]
- [Important Code Snippet]
- [...]

4. Problem Solving:
[Description of solved problems and ongoing troubleshooting]

5. Pending Tasks:
- [Task 1]
- [Task 2]
- [...]

6. Current Work:
[Precise description of current work]

7. Optional Next Step:
[Optional Next step to take]

</summary>
</example>

Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.

"""


def get_summarization_node():
initial_summary_prompt = ChatPromptTemplate.from_messages(
[
("placeholder", "{messages}"),
("user", SUMMARIZATION_PROMPT),
]
)
client = get_llm_client("summarizer")
# Summarization Node ()
summarization_node = SummarizationNode(
token_counter=count_tokens_approximately,
# Advised to use gpt-4.1 for summarization
model=client,
max_tokens=MAX_AGGREGATE_TOKENS_IN_SUMMARY_AND_OUTPUT,
max_tokens_before_summary=MAX_TOKENS_TO_TRIGGER_SUMMARY,
initial_summary_prompt=initial_summary_prompt,
max_summary_tokens=MAX_TOKENS_IN_SUMMARY_ONLY,
# Output key "messages" replace the existing messages with the summarized messages + remaining messages
output_messages_key="messages",
)
return summarization_node
2 changes: 2 additions & 0 deletions src/katalyst/katalyst_core/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"execution": "gpt-4.1", # Fast execution tasks (agent_react, tools)
"fallback": "gpt-4o", # Fallback model
"default_timeout": 45,

},
"anthropic": {
"reasoning": "claude-3-opus-20240229", # High-reasoning tasks
Expand Down Expand Up @@ -55,6 +56,7 @@
"tool_runner": "execution",
# Default for any other component
"default": "execution",
"summarizer": "execution",
}


Expand Down
145 changes: 145 additions & 0 deletions tests/unit/test_summarizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""Tests for the summarizer node module."""

import pytest
from unittest.mock import MagicMock, patch, call
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.messages.utils import count_tokens_approximately

from katalyst.coding_agent.nodes.summarizer import (
get_summarization_node,
SUMMARIZATION_PROMPT,
)
from katalyst.app.config import (
MAX_AGGREGATE_TOKENS_IN_SUMMARY_AND_OUTPUT,
MAX_TOKENS_TO_TRIGGER_SUMMARY,
MAX_TOKENS_IN_SUMMARY_ONLY,
)


class TestSummarizationNode:
"""Test the summarization node functionality."""

def setup_method(self):
"""Set up test fixtures."""
self.mock_llm = MagicMock()
self.mock_llm.invoke.return_value = MagicMock(content="Test summary")

@patch("katalyst.coding_agent.nodes.summarizer.get_llm_client")
def test_get_summarization_node_creation(self, mock_get_llm_client):
"""Test that get_summarization_node creates a properly configured node."""
mock_get_llm_client.return_value = self.mock_llm

node = get_summarization_node()

# Verify the LLM client was requested for summarizer
mock_get_llm_client.assert_called_once_with("summarizer")

# Verify the node is properly configured
assert node.token_counter == count_tokens_approximately
assert node.model == self.mock_llm
assert node.max_tokens == MAX_AGGREGATE_TOKENS_IN_SUMMARY_AND_OUTPUT
assert node.max_tokens_before_summary == MAX_TOKENS_TO_TRIGGER_SUMMARY
assert node.max_summary_tokens == MAX_TOKENS_IN_SUMMARY_ONLY
assert node.output_messages_key == "messages"

@patch("katalyst.coding_agent.nodes.summarizer.get_llm_client")
def test_summarization_prompt_structure(self, mock_get_llm_client):
"""Test that the summarization prompt is properly structured."""
mock_get_llm_client.return_value = self.mock_llm

node = get_summarization_node()

# Check that the prompt template has the expected structure
prompt_template = node.initial_summary_prompt
messages = prompt_template.messages

assert len(messages) == 2
assert messages[0].prompt.template == "{messages}"
assert messages[1].prompt.template == SUMMARIZATION_PROMPT

@patch("katalyst.coding_agent.nodes.summarizer.get_llm_client")
def test_summarization_prompt_content(self, mock_get_llm_client):
"""Test that the summarization prompt contains expected content."""
mock_get_llm_client.return_value = self.mock_llm

# Verify key sections are present in the prompt
assert "Primary Request and Intent" in SUMMARIZATION_PROMPT
assert "Key Technical Concepts" in SUMMARIZATION_PROMPT
assert "Files and Code Sections" in SUMMARIZATION_PROMPT
assert "Problem Solving" in SUMMARIZATION_PROMPT
assert "Pending Tasks" in SUMMARIZATION_PROMPT
assert "Current Work" in SUMMARIZATION_PROMPT
assert "Optional Next Step" in SUMMARIZATION_PROMPT
assert "<analysis>" in SUMMARIZATION_PROMPT
assert "<summary>" in SUMMARIZATION_PROMPT

@patch("katalyst.coding_agent.nodes.summarizer.get_llm_client")
def test_multiple_node_instances(self, mock_get_llm_client):
"""Test that multiple instances of the node can be created."""
mock_get_llm_client.return_value = self.mock_llm

node1 = get_summarization_node()
node2 = get_summarization_node()

# Should be separate instances
assert node1 is not node2
assert mock_get_llm_client.call_count == 2

@patch("katalyst.coding_agent.nodes.summarizer.get_llm_client")
def test_node_token_counter_integration(self, mock_get_llm_client):
"""Test that the token counter function is properly integrated."""
mock_get_llm_client.return_value = self.mock_llm

node = get_summarization_node()

# Test that the token counter is the expected function
assert callable(node.token_counter)
assert node.token_counter == count_tokens_approximately

# Test that it works with sample messages
test_messages = [
HumanMessage(content="Test message 1"),
AIMessage(content="Test response 1")
]

token_count = node.token_counter(test_messages)
assert isinstance(token_count, int)
assert token_count > 0

@patch("katalyst.coding_agent.nodes.summarizer.get_llm_client")
def test_summarization_node_type(self, mock_get_llm_client):
"""Test that the correct type of summarization node is returned."""
mock_get_llm_client.return_value = self.mock_llm

node = get_summarization_node()

# Should be a SummarizationNode from langmem
assert node.__class__.__name__ == "SummarizationNode"

def test_prompt_constants_accessibility(self):
"""Test that the prompt constant is accessible and properly formatted."""
# Test that the prompt is a string
assert isinstance(SUMMARIZATION_PROMPT, str)
assert len(SUMMARIZATION_PROMPT) > 0

# Test that it contains the example structure
assert "<example>" in SUMMARIZATION_PROMPT
assert "</example>" in SUMMARIZATION_PROMPT

# Test that it has the Reddit reference
assert "reddit.com" in SUMMARIZATION_PROMPT

@patch("katalyst.coding_agent.nodes.summarizer.get_llm_client")
@patch("katalyst.coding_agent.nodes.summarizer.get_logger")
def test_logger_integration(self, mock_get_logger, mock_get_llm_client):
"""Test that the logger is properly imported and available."""
mock_get_llm_client.return_value = self.mock_llm
mock_logger = MagicMock()
mock_get_logger.return_value = mock_logger

# Import should work without errors
from katalyst.coding_agent.nodes.summarizer import logger

# Logger should be available
assert logger is not None
mock_get_logger.assert_called_once()
Loading