diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 2c6e3af..2433744 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -506,6 +506,11 @@ def _clear_output_files(input_path: Path, all_projects: bool, file_ext: str) -> default=2000, help="Maximum messages per page for combined transcript (default: 2000). Sessions are never split across pages.", ) +@click.option( + "--shallow", + is_flag=True, + help="Render only user and assistant text messages (no tools, system, or thinking)", +) @click.option( "--debug", is_flag=True, @@ -528,6 +533,7 @@ def main( output_format: str, image_export_mode: Optional[str], page_size: int, + shallow: bool, debug: bool, ) -> None: """Convert Claude transcript JSONL files to HTML or Markdown. @@ -685,6 +691,7 @@ def main( output_format, image_export_mode, page_size=page_size, + shallow=shallow, ) # Count processed projects @@ -737,6 +744,7 @@ def main( not no_cache, image_export_mode=image_export_mode, page_size=page_size, + shallow=shallow, ) if input_path.is_file(): click.echo(f"Successfully converted {input_path} to {output_path}") diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 2a67308..a7718ab 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -698,6 +698,7 @@ def _generate_paginated_html( session_data: Dict[str, SessionCacheData], working_directories: List[str], silent: bool = False, + shallow: bool = False, ) -> Path: """Generate paginated HTML files for combined transcript. @@ -714,7 +715,7 @@ def _generate_paginated_html( Returns: Path to the first page (combined_transcripts.html) """ - from .html.renderer import generate_html + from .html.renderer import HtmlRenderer from .utils import format_timestamp # Check if page size changed - if so, invalidate all pages @@ -851,7 +852,9 @@ def _generate_paginated_html( # Generate HTML for this page page_title = f"{title} - Page {page_num}" if page_num > 1 else title - html_content = generate_html( + page_renderer = HtmlRenderer() + page_renderer.shallow = shallow + html_content = page_renderer.generate( page_messages, page_title, page_info=page_info, @@ -915,6 +918,7 @@ def convert_jsonl_to( silent: bool = False, image_export_mode: Optional[str] = None, page_size: int = 2000, + shallow: bool = False, ) -> Path: """Convert JSONL transcript(s) to the specified format. @@ -930,6 +934,7 @@ def convert_jsonl_to( image_export_mode: Image export mode ("placeholder", "embedded", "referenced"). page_size: Maximum messages per page for combined transcript pagination. If None, uses format default (embedded for HTML, referenced for Markdown). + shallow: If True, render only user and assistant text messages. """ if not input_path.exists(): raise FileNotFoundError(f"Input path not found: {input_path}") @@ -1018,7 +1023,7 @@ def convert_jsonl_to( # Generate combined output file (check if regeneration needed) assert output_path is not None - renderer = get_renderer(format, image_export_mode) + renderer = get_renderer(format, image_export_mode, shallow=shallow) # Decide whether to use pagination (HTML only, directory mode, no date filter) use_pagination = False @@ -1065,6 +1070,7 @@ def convert_jsonl_to( session_data, working_directories, silent=silent, + shallow=shallow, ) else: # Use single-file generation for small projects or filtered views @@ -1117,6 +1123,7 @@ def convert_jsonl_to( cache_was_updated, image_export_mode, silent=silent, + shallow=shallow, ) return output_path @@ -1479,6 +1486,7 @@ def _generate_individual_session_files( cache_was_updated: bool = False, image_export_mode: Optional[str] = None, silent: bool = False, + shallow: bool = False, ) -> int: """Generate individual files for each session in the specified format. @@ -1514,7 +1522,7 @@ def _generate_individual_session_files( project_title = get_project_display_name(output_dir.name, working_directories) # Get renderer once outside the loop - renderer = get_renderer(format, image_export_mode) + renderer = get_renderer(format, image_export_mode, shallow=shallow) regenerated_count = 0 # Generate HTML file for each session @@ -1662,6 +1670,7 @@ def process_projects_hierarchy( image_export_mode: Optional[str] = None, silent: bool = True, page_size: int = 2000, + shallow: bool = False, ) -> Path: """Process the entire ~/.claude/projects/ hierarchy and create linked output files. @@ -1818,6 +1827,7 @@ def process_projects_hierarchy( silent=silent, image_export_mode=image_export_mode, page_size=page_size, + shallow=shallow, ) # Track timing diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py index 8d22a37..11ad1bb 100644 --- a/claude_code_log/html/renderer.py +++ b/claude_code_log/html/renderer.py @@ -541,7 +541,9 @@ def generate( title = "Claude Transcript" # Get root messages (tree) and session navigation from format-neutral renderer - root_messages, session_nav, _ = generate_template_messages(messages) + root_messages, session_nav, _ = generate_template_messages( + messages, shallow=self.shallow + ) # Flatten tree via pre-order traversal, formatting content along the way with log_timing("Content formatting (pre-order)", t_start): diff --git a/claude_code_log/markdown/renderer.py b/claude_code_log/markdown/renderer.py index 23c7e41..40dc56b 100644 --- a/claude_code_log/markdown/renderer.py +++ b/claude_code_log/markdown/renderer.py @@ -876,7 +876,9 @@ def generate( title = "Claude Transcript" # Get root messages (tree), session navigation, and rendering context - root_messages, session_nav, ctx = generate_template_messages(messages) + root_messages, session_nav, ctx = generate_template_messages( + messages, shallow=self.shallow + ) self._ctx = ctx parts = [f"", ""] diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 1f3c349..a115a9e 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -18,10 +18,12 @@ MessageMeta, MessageType, TranscriptEntry, + AssistantMessageModel, AssistantTranscriptEntry, SystemTranscriptEntry, SummaryTranscriptEntry, QueueOperationTranscriptEntry, + UserMessageModel, UserTranscriptEntry, ContentItem, TextContent, @@ -524,6 +526,7 @@ def __init__(self, project_summaries: list[dict[str, Any]]): def generate_template_messages( messages: list[TranscriptEntry], + shallow: bool = False, ) -> Tuple[list[TemplateMessage], list[dict[str, Any]], RenderingContext]: """Generate root messages and session navigation from transcript messages. @@ -562,6 +565,11 @@ def generate_template_messages( with log_timing("Filter messages", t_start): filtered_messages = _filter_messages(messages) + # Shallow mode: keep only user and assistant text messages (no tools, system, thinking) + if shallow: + with log_timing("Shallow filter", t_start): + filtered_messages = _filter_shallow(filtered_messages) + # Pass 1: Collect session metadata and token tracking with log_timing("Collect session info", t_start): sessions, session_order, show_tokens_for_message = _collect_session_info( @@ -575,6 +583,11 @@ def generate_template_messages( ): ctx = _render_messages(filtered_messages, sessions, show_tokens_for_message) + # Shallow post-render: remove text-derived types (bash, slash commands, etc.) + if shallow: + with log_timing("Shallow post-render filter", t_start): + ctx.messages = _filter_shallow_template_messages(ctx.messages) + # Prepare session navigation data (uses ctx for session header indices) session_nav: list[dict[str, Any]] = [] with log_timing( @@ -1558,6 +1571,72 @@ def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]: return filtered +def _filter_shallow(messages: list[TranscriptEntry]) -> list[TranscriptEntry]: + """Filter messages for shallow mode: keep only user and assistant text. + + Strips tool items from user entries and thinking/tool items from assistant + entries. System, summary, queue-operation, and sidechain entries are removed. + """ + from copy import copy + + _strip_types = (ThinkingContent, ToolUseContent, ToolResultContent) + filtered: list[TranscriptEntry] = [] + for message in messages: + if not isinstance(message, (UserTranscriptEntry, AssistantTranscriptEntry)): + continue + # Drop sidechain (subagent) messages + if message.isSidechain: + continue + text_items: list[ContentItem] = [ + item + for item in message.message.content + if not isinstance(item, _strip_types) + ] + if text_items: + msg_copy = copy(message) + msg_model = copy(message.message) + msg_model.content = text_items + if isinstance(msg_copy, UserTranscriptEntry): + msg_copy.message = cast("UserMessageModel", msg_model) + else: + msg_copy.message = cast("AssistantMessageModel", msg_model) + filtered.append(msg_copy) + return filtered + + +# Content classes to exclude in shallow mode post-render. +# These are text-derived types created by the user factory that we don't want +# in a shallow view (bash commands, slash command prompts, compacted summaries). +# We check by class rather than message_type string because several of these +# classes (SlashCommandMessage, CommandOutputMessage, CompactedSummaryMessage) +# return "user" as their message_type. +_SHALLOW_EXCLUDE_CLASSES = ( + BashInputMessage, + BashOutputMessage, + SlashCommandMessage, + UserSlashCommandMessage, + CommandOutputMessage, + CompactedSummaryMessage, +) + + +def _filter_shallow_template_messages( + messages: list[TemplateMessage], +) -> list[TemplateMessage]: + """Post-render filter for shallow mode: remove text-derived message types. + + After _render_messages creates TemplateMessages, some user text content + gets classified into special types (bash commands, slash commands, etc.) + that should be excluded from shallow output. + """ + return [ + msg + for msg in messages + if not isinstance(msg.content, _SHALLOW_EXCLUDE_CLASSES) + and not msg.is_sidechain + ] + + def _collect_session_info( messages: list[TranscriptEntry], session_summaries: dict[str, str], @@ -2014,6 +2093,8 @@ class Renderer: - Subclasses override methods to implement format-specific rendering """ + shallow: bool = False + def _dispatch_format(self, obj: Any, message: TemplateMessage) -> str: """Dispatch to format_{ClassName}(obj, message) based on object type.""" for cls in type(obj).__mro__: @@ -2284,13 +2365,18 @@ def is_outdated(self, file_path: Path) -> Optional[bool]: return None -def get_renderer(format: str, image_export_mode: Optional[str] = None) -> Renderer: +def get_renderer( + format: str, + image_export_mode: Optional[str] = None, + shallow: bool = False, +) -> Renderer: """Get a renderer instance for the specified format. Args: format: The output format ("html", "md", or "markdown"). image_export_mode: Image export mode ("placeholder", "embedded", "referenced"). If None, defaults to "embedded" for HTML and "referenced" for Markdown. + shallow: If True, render only user and assistant text messages. Returns: A Renderer instance for the specified format. @@ -2303,14 +2389,17 @@ def get_renderer(format: str, image_export_mode: Optional[str] = None) -> Render # For HTML, default to embedded mode (current behavior) mode = image_export_mode or "embedded" - return HtmlRenderer(image_export_mode=mode) + renderer = HtmlRenderer(image_export_mode=mode) elif format in ("md", "markdown"): from .markdown.renderer import MarkdownRenderer # For Markdown, default to referenced mode mode = image_export_mode or "referenced" - return MarkdownRenderer(image_export_mode=mode) - raise ValueError(f"Unsupported format: {format}") + renderer = MarkdownRenderer(image_export_mode=mode) + else: + raise ValueError(f"Unsupported format: {format}") + renderer.shallow = shallow + return renderer def is_html_outdated(html_file_path: Path) -> bool: diff --git a/test/test_shallow_mode.py b/test/test_shallow_mode.py new file mode 100644 index 0000000..485ea41 --- /dev/null +++ b/test/test_shallow_mode.py @@ -0,0 +1,824 @@ +#!/usr/bin/env python3 +"""Tests for --shallow rendering mode. + +Shallow mode filters out everything except user and assistant text messages: +no tools, no thinking, no system messages. +""" + +import json +import shutil +import uuid +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from claude_code_log.cli import main +from claude_code_log.converter import convert_jsonl_to, load_transcript +from claude_code_log.html.renderer import HtmlRenderer +from claude_code_log.markdown.renderer import MarkdownRenderer +from claude_code_log.models import ( + AssistantTranscriptEntry, + SystemTranscriptEntry, + ThinkingContent, + ToolResultContent, + ToolUseContent, + UserTranscriptEntry, +) +from claude_code_log.renderer import _filter_shallow, generate_template_messages + + +# -- Test data helpers -------------------------------------------------------- + + +def _user_entry( + text: str, + session_id: str = "sess-001", + timestamp: str = "2025-01-01T10:00:00Z", + extra_content: list | None = None, +) -> dict: + content: list = [{"type": "text", "text": text}] + if extra_content: + content.extend(extra_content) + return { + "type": "user", + "timestamp": timestamp, + "sessionId": session_id, + "uuid": f"u-{uuid.uuid4().hex[:8]}", + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": "/tmp", + "version": "1.0.0", + "message": {"role": "user", "content": content}, + } + + +def _assistant_entry( + text: str, + session_id: str = "sess-001", + timestamp: str = "2025-01-01T10:00:01Z", + extra_content: list | None = None, +) -> dict: + content: list = [{"type": "text", "text": text}] + if extra_content: + content.extend(extra_content) + return { + "type": "assistant", + "timestamp": timestamp, + "sessionId": session_id, + "uuid": f"a-{uuid.uuid4().hex[:8]}", + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": "/tmp", + "version": "1.0.0", + "message": { + "id": f"msg_{uuid.uuid4().hex[:16]}", + "type": "message", + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": content, + }, + } + + +def _system_entry( + text: str, + session_id: str = "sess-001", + timestamp: str = "2025-01-01T10:00:02Z", +) -> dict: + return { + "type": "system", + "timestamp": timestamp, + "sessionId": session_id, + "message": text, + } + + +def _tool_use_item(name: str = "Bash", tool_id: str = "tool_001") -> dict: + return { + "type": "tool_use", + "id": tool_id, + "name": name, + "input": {"command": "echo hello"}, + } + + +def _tool_result_item(tool_id: str = "tool_001") -> dict: + return { + "type": "tool_result", + "tool_use_id": tool_id, + "content": "hello", + "is_error": False, + } + + +def _thinking_item(text: str = "Let me think...") -> dict: + return {"type": "thinking", "thinking": text} + + +def _write_jsonl(entries: list[dict], path: Path) -> Path: + path.write_text("\n".join(json.dumps(e) for e in entries) + "\n", encoding="utf-8") + return path + + +# -- Unit tests for _filter_compact ------------------------------------------ + + +class TestFilterShallow: + """Test the _filter_compact function directly on parsed TranscriptEntry lists.""" + + def test_keeps_user_and_assistant_text(self, tmp_path): + """Plain user and assistant messages pass through.""" + entries = [ + _user_entry("Hello"), + _assistant_entry("Hi there!"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + result = _filter_shallow(messages) + assert len(result) == 2 + assert isinstance(result[0], UserTranscriptEntry) + assert isinstance(result[1], AssistantTranscriptEntry) + + def test_removes_system_entries(self, tmp_path): + """System entries are dropped entirely.""" + entries = [ + _user_entry("Hello"), + _system_entry("model changed"), + _assistant_entry("Hi"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + result = _filter_shallow(messages) + assert len(result) == 2 + assert all(not isinstance(m, SystemTranscriptEntry) for m in result) + + def test_strips_tool_use_from_assistant(self, tmp_path): + """Tool use items within assistant entries are stripped.""" + entries = [ + _user_entry("Do something"), + _assistant_entry( + "I'll run a command.", + extra_content=[_tool_use_item()], + ), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + result = _filter_shallow(messages) + assert len(result) == 2 + # Assistant entry should have text but no tool_use + assistant = result[1] + assert isinstance(assistant, AssistantTranscriptEntry) + for item in assistant.message.content: + assert not isinstance(item, ToolUseContent) + + def test_strips_tool_result_from_user(self, tmp_path): + """Tool result items within user entries are stripped.""" + entries = [ + _user_entry( + "Here's the result", + extra_content=[_tool_result_item()], + ), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + result = _filter_shallow(messages) + assert len(result) == 1 + user = result[0] + assert isinstance(user, UserTranscriptEntry) + for item in user.message.content: + assert not isinstance(item, ToolResultContent) + + def test_strips_thinking_from_assistant(self, tmp_path): + """Thinking items within assistant entries are stripped.""" + entries = [ + _assistant_entry( + "Here's my answer.", + extra_content=[_thinking_item()], + ), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + result = _filter_shallow(messages) + assert len(result) == 1 + assistant = result[0] + assert isinstance(assistant, AssistantTranscriptEntry) + for item in assistant.message.content: + assert not isinstance(item, ThinkingContent) + + def test_drops_assistant_with_only_tool_use(self, tmp_path): + """Assistant entries with only tool_use (no text) are dropped entirely.""" + # Build an entry where the only content is a tool_use (no text at all) + entry = _assistant_entry("placeholder", extra_content=[_tool_use_item()]) + # Remove the text item, keeping only tool_use + entry["message"]["content"] = [_tool_use_item()] + messages = load_transcript(_write_jsonl([entry], tmp_path / "t.jsonl")) + result = _filter_shallow(messages) + assert len(result) == 0 + + def test_removes_sidechain_entries(self, tmp_path): + """Sidechain (subagent) entries are dropped.""" + sidechain_user = _user_entry("Subagent prompt") + sidechain_user["isSidechain"] = True + sidechain_assistant = _assistant_entry("Subagent reply") + sidechain_assistant["isSidechain"] = True + entries = [ + _user_entry("Main prompt"), + sidechain_user, + sidechain_assistant, + _assistant_entry("Main reply"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + result = _filter_shallow(messages) + assert len(result) == 2 + for m in result: + assert isinstance(m, (UserTranscriptEntry, AssistantTranscriptEntry)) + assert not m.isSidechain + + def test_does_not_mutate_original(self, tmp_path): + """Filtering creates copies, not mutations of the original.""" + entries = [ + _assistant_entry( + "Some text", + extra_content=[_tool_use_item()], + ), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + first = messages[0] + assert isinstance(first, AssistantTranscriptEntry) + original_content_count = len(first.message.content) + _filter_shallow(messages) + assert len(first.message.content) == original_content_count + + +# -- Integration tests: generate_template_messages with shallow --------------- + + +class TestShallowTemplateMessages: + """Test shallow mode through the full generate_template_messages pipeline.""" + + def test_shallow_removes_tool_messages(self, tmp_path): + """Shallow mode should not produce tool_use or tool_result TemplateMessages.""" + entries = [ + _user_entry("Run something"), + _assistant_entry( + "Running it.", + extra_content=[_tool_use_item()], + timestamp="2025-01-01T10:00:01Z", + ), + _user_entry( + "", + extra_content=[_tool_result_item()], + timestamp="2025-01-01T10:00:02Z", + ), + _assistant_entry("Done!", timestamp="2025-01-01T10:00:03Z"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + root_messages, _, _ = generate_template_messages(messages, shallow=True) + # Flatten tree + all_types = set() + _collect_types(root_messages, all_types) + assert "tool_use" not in all_types + assert "tool_result" not in all_types + assert "user" in all_types + assert "assistant" in all_types + + def test_shallow_removes_thinking_messages(self, tmp_path): + """Shallow mode should not produce thinking TemplateMessages.""" + entries = [ + _user_entry("Think about this"), + _assistant_entry( + "Here's my answer.", + extra_content=[_thinking_item("deep thoughts")], + ), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + root_messages, _, _ = generate_template_messages(messages, shallow=True) + all_types = set() + _collect_types(root_messages, all_types) + assert "thinking" not in all_types + assert "assistant" in all_types + + def test_shallow_preserves_session_headers(self, tmp_path): + """Session headers are still generated in shallow mode.""" + entries = [ + _user_entry("Hello"), + _assistant_entry("Hi"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + root_messages, session_nav, _ = generate_template_messages( + messages, shallow=True + ) + assert len(root_messages) >= 1 + assert root_messages[0].is_session_header + assert len(session_nav) >= 1 + + def test_shallow_removes_bash_messages(self, tmp_path): + """Shallow mode removes bash-input and bash-output messages.""" + entries = [ + _user_entry("Check the directory"), + # bash-input is parsed from user text containing tags + _user_entry( + "ls -la", timestamp="2025-01-01T10:00:01Z" + ), + _user_entry( + "total 42\ndrwxr-xr-x", + timestamp="2025-01-01T10:00:02Z", + ), + _assistant_entry("Here are the files.", timestamp="2025-01-01T10:00:03Z"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + root_messages, _, _ = generate_template_messages(messages, shallow=True) + all_types = set() + _collect_types(root_messages, all_types) + assert "bash-input" not in all_types + assert "bash-output" not in all_types + + def test_shallow_removes_slash_command_messages(self, tmp_path): + """Shallow mode removes slash command messages (e.g. /exit).""" + entries = [ + _user_entry("Hello"), + _assistant_entry("Hi", timestamp="2025-01-01T10:00:01Z"), + # Slash command entries are user entries whose text matches /command + _user_entry("/exit", timestamp="2025-01-01T10:00:02Z"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + root_messages, _, ctx = generate_template_messages(messages, shallow=True) + all_types = set() + _collect_types(root_messages, all_types) + # /exit should not appear as any type + for msg in ctx.messages: + assert "/exit" not in getattr(msg.content, "text", ""), ( + f"Slash command '/exit' found in shallow output as {msg.type}" + ) + + def test_shallow_removes_sidechain_messages(self, tmp_path): + """Shallow mode removes sidechain (subagent) messages entirely.""" + sidechain_user = _user_entry("Subagent task", timestamp="2025-01-01T10:00:01Z") + sidechain_user["isSidechain"] = True + sidechain_assistant = _assistant_entry( + "Subagent result", timestamp="2025-01-01T10:00:02Z" + ) + sidechain_assistant["isSidechain"] = True + entries = [ + _user_entry("Do a task"), + sidechain_user, + sidechain_assistant, + _assistant_entry("Task done.", timestamp="2025-01-01T10:00:03Z"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + root_messages, _, ctx = generate_template_messages(messages, shallow=True) + # No sidechain messages should remain + for msg in ctx.messages: + assert not msg.is_sidechain, f"Sidechain message found: {msg.type}" + + def test_shallow_vs_normal_fewer_messages(self, tmp_path): + """Shallow mode produces fewer messages than normal mode.""" + entries = [ + _user_entry("Do something"), + _assistant_entry( + "OK, running bash.", + extra_content=[_tool_use_item()], + timestamp="2025-01-01T10:00:01Z", + ), + _user_entry( + "", + extra_content=[_tool_result_item()], + timestamp="2025-01-01T10:00:02Z", + ), + _assistant_entry("All done!", timestamp="2025-01-01T10:00:03Z"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + normal_roots, _, normal_ctx = generate_template_messages( + messages, shallow=False + ) + shallow_roots, _, shallow_ctx = generate_template_messages( + messages, shallow=True + ) + + normal_count = len(normal_ctx.messages) + shallow_count = len(shallow_ctx.messages) + assert shallow_count < normal_count + + +# -- HTML rendering tests ----------------------------------------------------- + + +class TestShallowHtmlRendering: + """Test shallow mode through the HTML renderer.""" + + def test_shallow_html_no_tool_divs(self, tmp_path): + """Shallow HTML should not contain tool_use or tool_result message divs.""" + entries = [ + _user_entry("Write a file"), + _assistant_entry( + "Creating the file.", + extra_content=[_tool_use_item("Write", "tool_w01")], + timestamp="2025-01-01T10:00:01Z", + ), + _user_entry( + "", + extra_content=[_tool_result_item("tool_w01")], + timestamp="2025-01-01T10:00:02Z", + ), + _assistant_entry("File created!", timestamp="2025-01-01T10:00:03Z"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + renderer = HtmlRenderer() + renderer.shallow = True + html = renderer.generate(messages, "Shallow Test") + + assert "class='message tool_use" not in html + assert "class='message tool_result" not in html + assert "Write a file" in html + assert "Creating the file" in html + assert "File created!" in html + + def test_shallow_html_no_thinking(self, tmp_path): + """Shallow HTML should not contain thinking message divs.""" + entries = [ + _user_entry("Explain something"), + _assistant_entry( + "Here's the explanation.", + extra_content=[_thinking_item("I need to consider...")], + ), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + renderer = HtmlRenderer() + renderer.shallow = True + html = renderer.generate(messages, "Shallow Test") + + assert "class='message thinking" not in html + assert "I need to consider" not in html + assert "Here's the explanation" in html + + +# -- Markdown rendering tests -------------------------------------------------- + + +class TestShallowMarkdownRendering: + """Test shallow mode through the Markdown renderer.""" + + def test_shallow_markdown_no_tool_content(self, tmp_path): + """Shallow Markdown should not contain tool names or tool output.""" + entries = [ + _user_entry("Write a file"), + _assistant_entry( + "Creating the file.", + extra_content=[_tool_use_item("Write", "tool_w01")], + timestamp="2025-01-01T10:00:01Z", + ), + _user_entry( + "", + extra_content=[_tool_result_item("tool_w01")], + timestamp="2025-01-01T10:00:02Z", + ), + _assistant_entry("File created!", timestamp="2025-01-01T10:00:03Z"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + renderer = MarkdownRenderer() + renderer.shallow = True + md = renderer.generate(messages, "Shallow Test") + + assert "Write a file" in md + assert "Creating the file" in md + assert "File created!" in md + # Tool-specific content should be absent + assert ( + "Write" not in md.split("File created!")[0].split("Creating the file.")[1] + ) + + def test_shallow_markdown_no_thinking(self, tmp_path): + """Shallow Markdown should not contain thinking blocks.""" + entries = [ + _user_entry("Explain this"), + _assistant_entry( + "Here's the explanation.", + extra_content=[_thinking_item("Let me reason about this...")], + ), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + renderer = MarkdownRenderer() + renderer.shallow = True + md = renderer.generate(messages, "Shallow Test") + + assert "Here's the explanation" in md + assert "Let me reason about this" not in md + assert "Thinking" not in md + + def test_shallow_markdown_preserves_session_structure(self, tmp_path): + """Shallow Markdown preserves session headers.""" + entries = [ + _user_entry("Hello"), + _assistant_entry("Hi there"), + ] + messages = load_transcript(_write_jsonl(entries, tmp_path / "t.jsonl")) + + renderer = MarkdownRenderer() + renderer.shallow = True + md = renderer.generate(messages, "Shallow Test") + + assert "# Shallow Test" in md + assert "Hello" in md + assert "Hi there" in md + + def test_shallow_markdown_on_real_projects(self, tmp_path): + """Shallow Markdown works on real project data.""" + real_projects = Path(__file__).parent / "test_data" / "real_projects" + if not real_projects.exists(): + pytest.skip("Real test projects not available") + + # Pick first JSONL file + jsonl_files = [] + for project_dir in real_projects.iterdir(): + if project_dir.is_dir(): + jsonl_files.extend(project_dir.glob("*.jsonl")) + if not jsonl_files: + pytest.skip("No JSONL files in real_projects") + + renderer = MarkdownRenderer() + renderer.shallow = True + messages = load_transcript(jsonl_files[0]) + md = renderer.generate(messages, "Shallow MD Test") + assert md + assert "# Shallow MD Test" in md + + +# -- CLI tests ---------------------------------------------------------------- + + +class TestShallowCLI: + """Test the --shallow CLI flag.""" + + def test_shallow_flag_accepted(self, tmp_path): + """CLI accepts --shallow without error.""" + entries = [ + _user_entry("Hello"), + _assistant_entry("Hi there"), + ] + _write_jsonl(entries, tmp_path / "test.jsonl") + output_file = tmp_path / "output.html" + + runner = CliRunner() + result = runner.invoke( + main, + [str(tmp_path / "test.jsonl"), "-o", str(output_file), "--shallow"], + ) + assert result.exit_code == 0, f"CLI failed: {result.output}" + assert output_file.exists() + + def test_shallow_flag_filters_tools(self, tmp_path): + """CLI --shallow produces HTML without tool messages.""" + entries = [ + _user_entry("Run a command"), + _assistant_entry( + "Running it.", + extra_content=[_tool_use_item("Bash", "tool_b01")], + timestamp="2025-01-01T10:00:01Z", + ), + _user_entry( + "", + extra_content=[_tool_result_item("tool_b01")], + timestamp="2025-01-01T10:00:02Z", + ), + _assistant_entry("Here's the output.", timestamp="2025-01-01T10:00:03Z"), + ] + _write_jsonl(entries, tmp_path / "test.jsonl") + output_file = tmp_path / "output.html" + + runner = CliRunner() + result = runner.invoke( + main, + [str(tmp_path / "test.jsonl"), "-o", str(output_file), "--shallow"], + ) + assert result.exit_code == 0, f"CLI failed: {result.output}" + + html = output_file.read_text(encoding="utf-8") + assert "class='message tool_use" not in html + assert "class='message tool_result" not in html + assert "Run a command" in html + assert "Here's the output" in html + + def test_shallow_with_markdown_format(self, tmp_path): + """CLI --shallow works with --format md too.""" + entries = [ + _user_entry("Hello"), + _assistant_entry( + "Hi", + extra_content=[_tool_use_item()], + ), + ] + _write_jsonl(entries, tmp_path / "test.jsonl") + output_file = tmp_path / "output.md" + + runner = CliRunner() + result = runner.invoke( + main, + [ + str(tmp_path / "test.jsonl"), + "-o", + str(output_file), + "--shallow", + "--format", + "md", + ], + ) + assert result.exit_code == 0, f"CLI failed: {result.output}" + assert output_file.exists() + md = output_file.read_text(encoding="utf-8") + assert "Hello" in md + assert "Bash" not in md # Tool name should not appear + + +# -- Real project data tests -------------------------------------------------- + +REAL_PROJECTS_DIR = Path(__file__).parent / "test_data" / "real_projects" + + +@pytest.fixture(scope="module") +def real_projects_path() -> Path: + if not REAL_PROJECTS_DIR.exists(): + pytest.skip("Real test projects not available") + return REAL_PROJECTS_DIR + + +class TestShallowRealProjects: + """Test shallow mode against real project data from test_data/real_projects/.""" + + def _get_project_jsonl_files(self, projects_path: Path) -> list[Path]: + """Get all JSONL files from real projects (top-level only, no subagents).""" + files = [] + for project_dir in sorted(projects_path.iterdir()): + if project_dir.is_dir(): + for f in project_dir.glob("*.jsonl"): + files.append(f) + return files + + def test_shallow_produces_valid_html(self, real_projects_path): + """Shallow mode generates valid HTML for every real project file.""" + files = self._get_project_jsonl_files(real_projects_path) + assert files, "No JSONL files found in real_projects" + + renderer = HtmlRenderer() + renderer.shallow = True + + for jsonl_file in files: + messages = load_transcript(jsonl_file) + html = renderer.generate(messages, f"Shallow: {jsonl_file.name}") + assert html, f"Empty HTML for {jsonl_file.name}" + assert "" in html + + def test_shallow_has_no_excluded_messages(self, real_projects_path): + """Shallow HTML from real projects contains no tool, thinking, bash, or sidechain divs.""" + files = self._get_project_jsonl_files(real_projects_path) + + renderer = HtmlRenderer() + renderer.shallow = True + + excluded_patterns = [ + "class='message tool_use", + "class='message tool_result", + "class='message thinking", + "class='message bash-input", + "class='message bash-output", + "class='message user-slash-command", + "class='message command-output", + "class='message compacted-summary", + ] + + for jsonl_file in files: + messages = load_transcript(jsonl_file) + html = renderer.generate(messages, "Shallow Test") + for pattern in excluded_patterns: + count = html.count(pattern) + msg_type = pattern.split("class='message ")[1] + assert count == 0, ( + f"{jsonl_file.name}: found {count} {msg_type} messages" + ) + + def test_shallow_fewer_messages_than_normal(self, real_projects_path): + """Shallow mode produces strictly fewer messages for projects with tools.""" + files = self._get_project_jsonl_files(real_projects_path) + + for jsonl_file in files: + messages = load_transcript(jsonl_file) + _, _, normal_ctx = generate_template_messages(messages, shallow=False) + _, _, shallow_ctx = generate_template_messages(messages, shallow=True) + + normal_count = len(normal_ctx.messages) + shallow_count = len(shallow_ctx.messages) + + # Real projects typically have many tool calls, so shallow should + # have fewer messages. Some tiny projects might only have text. + assert shallow_count <= normal_count, ( + f"{jsonl_file.name}: shallow ({shallow_count}) > normal ({normal_count})" + ) + + def test_shallow_preserves_user_and_assistant(self, real_projects_path): + """Shallow mode keeps user and assistant messages from real projects.""" + files = self._get_project_jsonl_files(real_projects_path) + + for jsonl_file in files: + messages = load_transcript(jsonl_file) + root_messages, _, _ = generate_template_messages(messages, shallow=True) + all_types = set() + _collect_types(root_messages, all_types) + + # Should only have user/assistant text types (plus session headers) + non_header_types = all_types - { + "session-header", + "session_header", + } + allowed = { + "user", + "assistant", + "user-steering", + "user-memory", + } + unexpected = non_header_types - allowed + assert not unexpected, ( + f"{jsonl_file.name}: unexpected types in shallow: {unexpected}" + ) + + def test_shallow_directory_mode(self, real_projects_path, tmp_path): + """Shallow mode works on a directory of JSONL files.""" + # Copy a project to tmp for isolated testing + project_dirs = [d for d in real_projects_path.iterdir() if d.is_dir()] + if not project_dirs: + pytest.skip("No project dirs in real_projects") + + source = project_dirs[0] + dest = tmp_path / source.name + shutil.copytree(source, dest) + + output = convert_jsonl_to( + "html", + dest, + use_cache=False, + generate_individual_sessions=False, + silent=True, + shallow=True, + ) + html = output.read_text(encoding="utf-8") + assert "" in html + assert "class='message tool_use" not in html + assert "class='message tool_result" not in html + + +# -- Test data file tests (representative_messages.jsonl) ---------------------- + + +class TestShallowTestData: + """Test shallow mode on the bundled test data files.""" + + @pytest.fixture + def test_data_dir(self) -> Path: + return Path(__file__).parent / "test_data" + + def test_shallow_representative_messages(self, test_data_dir): + """Shallow mode on representative_messages.jsonl removes tools.""" + test_file = test_data_dir / "representative_messages.jsonl" + messages = load_transcript(test_file) + + renderer = HtmlRenderer() + renderer.shallow = True + html = renderer.generate(messages, "Shallow Representative") + + # Should have user and assistant content + assert "class='message user" in html + assert "class='message assistant" in html + # Should not have tool content + assert "class='message tool_use" not in html + assert "class='message tool_result" not in html + + def test_shallow_sidechain(self, test_data_dir): + """Shallow mode on sidechain data removes tool messages.""" + test_file = test_data_dir / "sidechain.jsonl" + if not test_file.exists(): + pytest.skip("sidechain.jsonl not available") + messages = load_transcript(test_file) + + root_messages, _, _ = generate_template_messages(messages, shallow=True) + all_types = set() + _collect_types(root_messages, all_types) + assert "tool_use" not in all_types + assert "tool_result" not in all_types + + +# -- Helpers ------------------------------------------------------------------ + + +def _collect_types(messages: list, types: set[str]) -> None: + """Recursively collect all message types from a tree of TemplateMessages.""" + for msg in messages: + types.add(msg.type) + if hasattr(msg, "children"): + _collect_types(msg.children, types)