@@ -4,7 +4,7 @@ use core::fmt;
44use std:: fmt:: Write as _;
55use std:: ops:: Range ;
66
7- use markdown_weaver:: { Event , TagEnd } ;
7+ use markdown_weaver:: { Event , Tag , TagEnd } ;
88use markdown_weaver_escape:: { escape_html, escape_html_body_text_with_char_count} ;
99
1010use crate :: offset_map:: OffsetMapping ;
5858 // Emit gap from last_byte_offset to range.end
5959 self . emit_gap_before ( range. end ) ?;
6060 } else if !matches ! ( & event, Event :: End ( _) ) {
61+ // For paragraph-level start events, capture pre-gap position so the
62+ // paragraph's char_range includes leading whitespace/gap content.
63+ let is_para_start = matches ! (
64+ & event,
65+ Event :: Start (
66+ Tag :: Paragraph ( _)
67+ | Tag :: Heading { .. }
68+ | Tag :: CodeBlock ( _)
69+ | Tag :: List ( _)
70+ | Tag :: BlockQuote ( _)
71+ | Tag :: HtmlBlock
72+ )
73+ ) ;
74+ if is_para_start && self . paragraphs . should_track_boundaries ( ) {
75+ self . paragraphs . pre_gap_start =
76+ Some ( ( self . last_byte_offset , self . last_char_offset ) ) ;
77+ }
78+
6179 // For other events, emit any gap before range.start
6280 // (emit_syntax handles char offset tracking)
6381 self . emit_gap_before ( range. start ) ?;
@@ -79,16 +97,31 @@ where
7997 // else: Event updated offset (e.g. start_tag emitted opening syntax), keep that value
8098 }
8199
82- // Emit any trailing syntax
83- self . emit_gap_before ( self . source . len ( ) ) ?;
100+ // Check if document ends with a paragraph break (double newline) BEFORE emitting trailing.
101+ // If so, we'll reserve the final newline for a synthetic trailing paragraph.
102+ let ends_with_para_break = self . source . ends_with ( "\n \n " )
103+ || self . source . ends_with ( "\n \u{200C} \n " ) ;
104+
105+ // Determine where to stop emitting trailing syntax
106+ let trailing_emit_end = if ends_with_para_break {
107+ // Don't emit the final newline - save it for synthetic paragraph
108+ self . source . len ( ) . saturating_sub ( 1 )
109+ } else {
110+ self . source . len ( )
111+ } ;
112+
113+ // Emit trailing syntax up to the determined point
114+ self . emit_gap_before ( trailing_emit_end) ?;
84115
85116 // Handle unmapped trailing content (stripped by parser)
86117 // This includes trailing spaces that markdown ignores
87118 let doc_byte_len = self . source . len ( ) ;
88119 let doc_char_len = self . text_buffer . len_chars ( ) ;
89120
90- if self . last_byte_offset < doc_byte_len || self . last_char_offset < doc_char_len {
91- // Emit the trailing content as visible syntax
121+ if !ends_with_para_break
122+ && ( self . last_byte_offset < doc_byte_len || self . last_char_offset < doc_char_len)
123+ {
124+ // Emit the trailing content as visible syntax (only if not creating synthetic para)
92125 if self . last_byte_offset < doc_byte_len {
93126 let trailing = & self . source [ self . last_byte_offset ..] ;
94127 if !trailing. is_empty ( ) {
@@ -125,7 +158,7 @@ where
125158 }
126159 }
127160
128- // Add any remaining accumulated data for the last paragraph
161+ // Add any remaining accumulated data for the last paragraph FIRST
129162 // (content that wasn't followed by a paragraph boundary)
130163 if !self . current_para . offset_maps . is_empty ( )
131164 || !self . current_para . syntax_spans . is_empty ( )
@@ -139,6 +172,48 @@ where
139172 . push ( std:: mem:: take ( & mut self . ref_collector . refs ) ) ;
140173 }
141174
175+ // Now create a synthetic trailing paragraph if needed
176+ if ends_with_para_break {
177+ // Get the trailing content we reserved (the final newline)
178+ let trailing_content = & self . source [ trailing_emit_end..] ;
179+ let trailing_char_len = trailing_content. chars ( ) . count ( ) ;
180+
181+ let trailing_start_char = self . last_char_offset ;
182+ let trailing_start_byte = self . last_byte_offset ;
183+ let trailing_end_char = trailing_start_char + trailing_char_len;
184+ let trailing_end_byte = self . source . len ( ) ;
185+
186+ // Create paragraph range that includes the trailing content
187+ self . paragraphs . ranges . push ( (
188+ trailing_start_byte..trailing_end_byte,
189+ trailing_start_char..trailing_end_char,
190+ ) ) ;
191+
192+ // Start a new HTML segment for this trailing paragraph
193+ self . writer . new_segment ( ) ;
194+ let node_id = self . gen_node_id ( ) ;
195+
196+ // Write the actual trailing content plus ZWSP for cursor positioning
197+ write ! ( & mut self . writer, "<span id=\" {}\" >" , node_id) ?;
198+ escape_html ( & mut self . writer , trailing_content) ?;
199+ self . write ( "\u{200B} </span>" ) ?;
200+
201+ // Record offset mapping for the trailing content
202+ let mapping = OffsetMapping {
203+ byte_range : trailing_start_byte..trailing_end_byte,
204+ char_range : trailing_start_char..trailing_end_char,
205+ node_id,
206+ char_offset_in_node : 0 ,
207+ child_index : None ,
208+ utf16_len : trailing_char_len + 1 , // Content + ZWSP
209+ } ;
210+
211+ // Create offset_maps/syntax_spans/refs for this trailing paragraph
212+ self . offset_maps_by_para . push ( vec ! [ mapping] ) ;
213+ self . syntax_spans_by_para . push ( vec ! [ ] ) ;
214+ self . refs_by_para . push ( vec ! [ ] ) ;
215+ }
216+
142217 // Get HTML segments from writer
143218 let html_segments = self . writer . into_segments ( ) ;
144219
0 commit comments