Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions claude_code_log/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,11 @@ def _clear_output_files(input_path: Path, all_projects: bool, file_ext: str) ->
default=2000,
help="Maximum messages per page for combined transcript (default: 2000). Sessions are never split across pages.",
)
@click.option(
"--shallow",
is_flag=True,
help="Render only user and assistant text messages (no tools, system, or thinking)",
)
@click.option(
"--debug",
is_flag=True,
Expand All @@ -528,6 +533,7 @@ def main(
output_format: str,
image_export_mode: Optional[str],
page_size: int,
shallow: bool,
debug: bool,
) -> None:
"""Convert Claude transcript JSONL files to HTML or Markdown.
Expand Down Expand Up @@ -685,6 +691,7 @@ def main(
output_format,
image_export_mode,
page_size=page_size,
shallow=shallow,
)

# Count processed projects
Expand Down Expand Up @@ -737,6 +744,7 @@ def main(
not no_cache,
image_export_mode=image_export_mode,
page_size=page_size,
shallow=shallow,
)
if input_path.is_file():
click.echo(f"Successfully converted {input_path} to {output_path}")
Expand Down
18 changes: 14 additions & 4 deletions claude_code_log/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,7 @@ def _generate_paginated_html(
session_data: Dict[str, SessionCacheData],
working_directories: List[str],
silent: bool = False,
shallow: bool = False,
) -> Path:
Comment on lines +701 to 702
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Cache invalidation does not account for render mode (shallow vs full).

shallow is now threaded through generation, but staleness checks/early exits are still mode-agnostic. That can serve the wrong artifact when users switch modes on the same output path.

🛠️ Minimum mitigation direction
@@ def convert_jsonl_to(...):
-        if (
+        if (
             cache_manager is not None
             and not cache_was_updated
             and from_date is None
             and to_date is None
+            and not shallow
         ):
             ...
@@
-            should_regenerate = (
+            should_regenerate = shallow or (
                 is_stale
                 or renderer.is_outdated(output_path)
                 or from_date is not None
                 or to_date is not None
                 or not output_path.exists()
             )
@@
-            if format == "html" and cache_manager is not None:
+            if format == "html" and cache_manager is not None and not shallow:
                 cache_manager.update_html_cache(...)

A complete fix should include render mode in staleness identity (cache metadata and/or file marker checks) for combined, paginated, and session outputs.

Also applies to: 921-922, 1026-1027, 1073-1074, 1126-1127, 1489-1490, 1525-1526, 1673-1674, 1830-1831

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@claude_code_log/converter.py` around lines 701 - 702, Staleness checks and
early exits are ignoring the new shallow render mode, so add the shallow flag
into the cache/staleness identity used wherever staleness is computed for
combined, paginated, and session outputs: include shallow in the metadata
written to and read from cache files/marker files and incorporate it into
whatever key/hash/compute_staleness function is used by the early-exit logic
(the code paths that currently accept shallow: bool and return Path). Update the
marker read/write and the is_stale/check_staleness logic to compare the shallow
value as well as timestamps/inputs so switching shallow vs full on the same
output path won't return the wrong artifact.

"""Generate paginated HTML files for combined transcript.

Expand All @@ -714,7 +715,7 @@ def _generate_paginated_html(
Returns:
Path to the first page (combined_transcripts.html)
"""
from .html.renderer import generate_html
from .html.renderer import HtmlRenderer
from .utils import format_timestamp

# Check if page size changed - if so, invalidate all pages
Expand Down Expand Up @@ -851,7 +852,9 @@ def _generate_paginated_html(

# Generate HTML for this page
page_title = f"{title} - Page {page_num}" if page_num > 1 else title
html_content = generate_html(
page_renderer = HtmlRenderer()
page_renderer.shallow = shallow
html_content = page_renderer.generate(
page_messages,
page_title,
page_info=page_info,
Expand Down Expand Up @@ -915,6 +918,7 @@ def convert_jsonl_to(
silent: bool = False,
image_export_mode: Optional[str] = None,
page_size: int = 2000,
shallow: bool = False,
) -> Path:
"""Convert JSONL transcript(s) to the specified format.

Expand All @@ -930,6 +934,7 @@ def convert_jsonl_to(
image_export_mode: Image export mode ("placeholder", "embedded", "referenced").
page_size: Maximum messages per page for combined transcript pagination.
If None, uses format default (embedded for HTML, referenced for Markdown).
shallow: If True, render only user and assistant text messages.
"""
if not input_path.exists():
raise FileNotFoundError(f"Input path not found: {input_path}")
Expand Down Expand Up @@ -1018,7 +1023,7 @@ def convert_jsonl_to(

# Generate combined output file (check if regeneration needed)
assert output_path is not None
renderer = get_renderer(format, image_export_mode)
renderer = get_renderer(format, image_export_mode, shallow=shallow)

# Decide whether to use pagination (HTML only, directory mode, no date filter)
use_pagination = False
Expand Down Expand Up @@ -1065,6 +1070,7 @@ def convert_jsonl_to(
session_data,
working_directories,
silent=silent,
shallow=shallow,
)
else:
# Use single-file generation for small projects or filtered views
Expand Down Expand Up @@ -1117,6 +1123,7 @@ def convert_jsonl_to(
cache_was_updated,
image_export_mode,
silent=silent,
shallow=shallow,
)

return output_path
Expand Down Expand Up @@ -1479,6 +1486,7 @@ def _generate_individual_session_files(
cache_was_updated: bool = False,
image_export_mode: Optional[str] = None,
silent: bool = False,
shallow: bool = False,
) -> int:
"""Generate individual files for each session in the specified format.

Expand Down Expand Up @@ -1514,7 +1522,7 @@ def _generate_individual_session_files(
project_title = get_project_display_name(output_dir.name, working_directories)

# Get renderer once outside the loop
renderer = get_renderer(format, image_export_mode)
renderer = get_renderer(format, image_export_mode, shallow=shallow)
regenerated_count = 0

# Generate HTML file for each session
Expand Down Expand Up @@ -1662,6 +1670,7 @@ def process_projects_hierarchy(
image_export_mode: Optional[str] = None,
silent: bool = True,
page_size: int = 2000,
shallow: bool = False,
) -> Path:
"""Process the entire ~/.claude/projects/ hierarchy and create linked output files.

Expand Down Expand Up @@ -1818,6 +1827,7 @@ def process_projects_hierarchy(
silent=silent,
image_export_mode=image_export_mode,
page_size=page_size,
shallow=shallow,
)

# Track timing
Expand Down
4 changes: 3 additions & 1 deletion claude_code_log/html/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,9 @@ def generate(
title = "Claude Transcript"

# Get root messages (tree) and session navigation from format-neutral renderer
root_messages, session_nav, _ = generate_template_messages(messages)
root_messages, session_nav, _ = generate_template_messages(
messages, shallow=self.shallow
)

# Flatten tree via pre-order traversal, formatting content along the way
with log_timing("Content formatting (pre-order)", t_start):
Expand Down
4 changes: 3 additions & 1 deletion claude_code_log/markdown/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,9 @@ def generate(
title = "Claude Transcript"

# Get root messages (tree), session navigation, and rendering context
root_messages, session_nav, ctx = generate_template_messages(messages)
root_messages, session_nav, ctx = generate_template_messages(
messages, shallow=self.shallow
)
self._ctx = ctx

parts = [f"<!-- Generated by claude-code-log v{get_library_version()} -->", ""]
Expand Down
97 changes: 93 additions & 4 deletions claude_code_log/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
MessageMeta,
MessageType,
TranscriptEntry,
AssistantMessageModel,
AssistantTranscriptEntry,
SystemTranscriptEntry,
SummaryTranscriptEntry,
QueueOperationTranscriptEntry,
UserMessageModel,
UserTranscriptEntry,
ContentItem,
TextContent,
Expand Down Expand Up @@ -524,6 +526,7 @@ def __init__(self, project_summaries: list[dict[str, Any]]):

def generate_template_messages(
messages: list[TranscriptEntry],
shallow: bool = False,
) -> Tuple[list[TemplateMessage], list[dict[str, Any]], RenderingContext]:
"""Generate root messages and session navigation from transcript messages.

Expand Down Expand Up @@ -562,6 +565,11 @@ def generate_template_messages(
with log_timing("Filter messages", t_start):
filtered_messages = _filter_messages(messages)

# Shallow mode: keep only user and assistant text messages (no tools, system, thinking)
if shallow:
with log_timing("Shallow filter", t_start):
filtered_messages = _filter_shallow(filtered_messages)

# Pass 1: Collect session metadata and token tracking
with log_timing("Collect session info", t_start):
sessions, session_order, show_tokens_for_message = _collect_session_info(
Expand All @@ -575,6 +583,11 @@ def generate_template_messages(
):
ctx = _render_messages(filtered_messages, sessions, show_tokens_for_message)

# Shallow post-render: remove text-derived types (bash, slash commands, etc.)
if shallow:
with log_timing("Shallow post-render filter", t_start):
ctx.messages = _filter_shallow_template_messages(ctx.messages)

# Prepare session navigation data (uses ctx for session header indices)
session_nav: list[dict[str, Any]] = []
with log_timing(
Expand Down Expand Up @@ -1558,6 +1571,72 @@ def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]:
return filtered


def _filter_shallow(messages: list[TranscriptEntry]) -> list[TranscriptEntry]:
"""Filter messages for shallow mode: keep only user and assistant text.

Strips tool items from user entries and thinking/tool items from assistant
entries. System, summary, queue-operation, and sidechain entries are removed.
"""
from copy import copy

_strip_types = (ThinkingContent, ToolUseContent, ToolResultContent)
filtered: list[TranscriptEntry] = []
for message in messages:
if not isinstance(message, (UserTranscriptEntry, AssistantTranscriptEntry)):
continue
# Drop sidechain (subagent) messages
if message.isSidechain:
continue
text_items: list[ContentItem] = [
item
for item in message.message.content
if not isinstance(item, _strip_types)
]
if text_items:
msg_copy = copy(message)
msg_model = copy(message.message)
msg_model.content = text_items
if isinstance(msg_copy, UserTranscriptEntry):
msg_copy.message = cast("UserMessageModel", msg_model)
else:
msg_copy.message = cast("AssistantMessageModel", msg_model)
filtered.append(msg_copy)
return filtered


# Content classes to exclude in shallow mode post-render.
# These are text-derived types created by the user factory that we don't want
# in a shallow view (bash commands, slash command prompts, compacted summaries).
# We check by class rather than message_type string because several of these
# classes (SlashCommandMessage, CommandOutputMessage, CompactedSummaryMessage)
# return "user" as their message_type.
_SHALLOW_EXCLUDE_CLASSES = (
BashInputMessage,
BashOutputMessage,
SlashCommandMessage,
UserSlashCommandMessage,
CommandOutputMessage,
CompactedSummaryMessage,
)


def _filter_shallow_template_messages(
messages: list[TemplateMessage],
) -> list[TemplateMessage]:
"""Post-render filter for shallow mode: remove text-derived message types.

After _render_messages creates TemplateMessages, some user text content
gets classified into special types (bash commands, slash commands, etc.)
that should be excluded from shallow output.
"""
return [
msg
for msg in messages
if not isinstance(msg.content, _SHALLOW_EXCLUDE_CLASSES)
and not msg.is_sidechain
]


def _collect_session_info(
messages: list[TranscriptEntry],
session_summaries: dict[str, str],
Expand Down Expand Up @@ -2014,6 +2093,8 @@ class Renderer:
- Subclasses override methods to implement format-specific rendering
"""

shallow: bool = False

def _dispatch_format(self, obj: Any, message: TemplateMessage) -> str:
"""Dispatch to format_{ClassName}(obj, message) based on object type."""
for cls in type(obj).__mro__:
Expand Down Expand Up @@ -2284,13 +2365,18 @@ def is_outdated(self, file_path: Path) -> Optional[bool]:
return None


def get_renderer(format: str, image_export_mode: Optional[str] = None) -> Renderer:
def get_renderer(
format: str,
image_export_mode: Optional[str] = None,
shallow: bool = False,
) -> Renderer:
"""Get a renderer instance for the specified format.

Args:
format: The output format ("html", "md", or "markdown").
image_export_mode: Image export mode ("placeholder", "embedded", "referenced").
If None, defaults to "embedded" for HTML and "referenced" for Markdown.
shallow: If True, render only user and assistant text messages.

Returns:
A Renderer instance for the specified format.
Expand All @@ -2303,14 +2389,17 @@ def get_renderer(format: str, image_export_mode: Optional[str] = None) -> Render

# For HTML, default to embedded mode (current behavior)
mode = image_export_mode or "embedded"
return HtmlRenderer(image_export_mode=mode)
renderer = HtmlRenderer(image_export_mode=mode)
elif format in ("md", "markdown"):
from .markdown.renderer import MarkdownRenderer

# For Markdown, default to referenced mode
mode = image_export_mode or "referenced"
return MarkdownRenderer(image_export_mode=mode)
raise ValueError(f"Unsupported format: {format}")
renderer = MarkdownRenderer(image_export_mode=mode)
else:
raise ValueError(f"Unsupported format: {format}")
renderer.shallow = shallow
return renderer


def is_html_outdated(html_file_path: Path) -> bool:
Expand Down
Loading
Loading