From 794320d20252ef7a45e74188f764935cf9ba450e Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Wed, 17 Dec 2025 13:10:02 -0500 Subject: [PATCH 1/7] Ensure we're scanning correctly --- src/scanner/mod.rs | 70 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index bbd2100..577cbf1 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -715,21 +715,31 @@ impl<'a> Scanner<'a> { constrained: TokenType, unconstrained: TokenType, ) -> Result { + let inline_markup_chars = ['*', '_', '`', '+', '^', '~', '#']; + let mut end_of_inline_markers = vec![ + ' ', '\0', '.', ',', ';', ':', '\n', ')', '"', '!', '?', '\'', ']', '…', '“', '”', '‘', + '’', + ]; + let mut beginning_of_inline_markers = vec![' ', '\n', '\0', ']', '(', '"', '[']; + end_of_inline_markers.extend_from_slice(&inline_markup_chars); + beginning_of_inline_markers.extend_from_slice(&inline_markup_chars); // guard clause against dangling markup if self.peek() == ' ' && self.peek_back() == ' ' { self.add_text_until_next_markup() - } else if [ - ' ', '\0', '.', ',', ';', ':', '\n', ')', '"', '!', '?', '\'', ']', '…', '“', '”', '‘', - '’', - ] - .contains(&self.peek()) - || [' ', '\n', '\0', ']', '(', '"', '['].contains(&self.peek_back()) && self.peek() != c - { - self.add_token(constrained, false, 0) } else if self.peek() == c { - // we've got an unconstrained version + // the next character is the same + // we've got an unconstrained (i.e., "**foo**bar") version self.current += 1; self.add_token(unconstrained, false, 0) + + } + + // we're at the end of a span, or are butted up against another inline marker + else if end_of_inline_markers.contains(&self.peek()) || + // or we're at the beginning, or butted up against another inline marker + beginning_of_inline_markers.contains(&self.peek_back()) && self.peek() != c + { + self.add_token(constrained, false, 0) } else { self.add_text_until_next_markup() } @@ -1282,6 +1292,48 @@ mod tests { scan_and_assert_eq(&markup, expected_tokens); } + #[rstest] + #[case('*', TokenType::Strong)] + #[case('`', TokenType::Monospace)] + #[case('+', TokenType::Literal)] + #[case('^', TokenType::Superscript)] + #[case('~', TokenType::Subscript)] + #[case('#', TokenType::Mark)] + fn inline_formatting_by_other(#[case] markup_char: char, #[case] expected_token: TokenType) { + let markup = format!("Somx {}_bar_{} bar.", markup_char, markup_char); + let expected_tokens = vec![ + Token::new_default( + TokenType::Text, + "Somx ".to_string(), + Some("Somx ".to_string()), + 1, + 1, + 5, + ), + Token::new_default(expected_token, markup_char.to_string(), None, 1, 6, 6), + Token::new_default(TokenType::Emphasis, "_".to_string(), None, 1, 7, 7), + Token::new_default( + TokenType::Text, + "bar".to_string(), + Some("bar".to_string()), + 1, + 8, + 10, + ), + Token::new_default(TokenType::Emphasis, "_".to_string(), None, 1, 11, 11), + Token::new_default(expected_token, markup_char.to_string(), None, 1, 12, 12), + Token::new_default( + TokenType::Text, + " bar.".to_string(), + Some(" bar.".to_string()), + 1, + 13, + markup.len(), + ), + ]; + scan_and_assert_eq(&markup, expected_tokens); + } + #[rstest] #[case('*')] #[case('_')] From 4f8a32d544c570e02ecc5eb8becb471151d73b41 Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Wed, 17 Dec 2025 13:10:20 -0500 Subject: [PATCH 2/7] Beginning of failing test; note that the json is garbage --- tests/data/inlines/span-inside-spans.adoc | 5 + tests/data/inlines/span-inside-spans.json | 242 ++++++++++++++++++++++ tests/inline_tests.rs | 9 + 3 files changed, 256 insertions(+) create mode 100644 tests/data/inlines/span-inside-spans.adoc create mode 100644 tests/data/inlines/span-inside-spans.json diff --git a/tests/data/inlines/span-inside-spans.adoc b/tests/data/inlines/span-inside-spans.adoc new file mode 100644 index 0000000..51e1ae0 --- /dev/null +++ b/tests/data/inlines/span-inside-spans.adoc @@ -0,0 +1,5 @@ +*_foo_* + +**_foo_** + +**__foo__** diff --git a/tests/data/inlines/span-inside-spans.json b/tests/data/inlines/span-inside-spans.json new file mode 100644 index 0000000..84fe96d --- /dev/null +++ b/tests/data/inlines/span-inside-spans.json @@ -0,0 +1,242 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "*", + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 1, + "col": 1 + } + ] + }, + { + "name": "span", + "type": "inline", + "variant": "emphasis", + "form": "constrained", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "foo", + "location": [ + { + "line": 1, + "col": 3 + }, + { + "line": 1, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 2 + }, + { + "line": 1, + "col": 6 + } + ] + }, + { + "name": "span", + "type": "inline", + "variant": "strong", + "form": "constrained", + "inlines": [], + "location": [ + { + "line": 1, + "col": 7 + }, + { + "line": 1, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 1, + "col": 7 + } + ] + }, + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "**", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 2 + } + ] + }, + { + "name": "span", + "type": "inline", + "variant": "emphasis", + "form": "constrained", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "foo", + "location": [ + { + "line": 3, + "col": 4 + }, + { + "line": 3, + "col": 6 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 7 + } + ] + }, + { + "name": "span", + "type": "inline", + "variant": "strong", + "form": "unconstrained", + "inlines": [], + "location": [ + { + "line": 3, + "col": 8 + }, + { + "line": 3, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 9 + } + ] + }, + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "*x*", + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 2 + } + ] + }, + { + "name": "span", + "type": "inline", + "variant": "emphasis", + "form": "unconstrained", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "foo**", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 11 + } + ] +} diff --git a/tests/inline_tests.rs b/tests/inline_tests.rs index 0c4a3ba..664e3a0 100644 --- a/tests/inline_tests.rs +++ b/tests/inline_tests.rs @@ -36,6 +36,15 @@ fn test_spans_with_chars_between(#[case] markup_char: &str, #[case] variant: &st assert_parsed_doc_matches_expected_asg_from_str(&adoc_str, &asg_json_str) } +#[test] +fn test_spans_inside_spans() { + let adoc_str = fs::read_to_string("tests/data/inlines/span-inside-spans.adoc") + .expect("Unable to read asciidoc test template"); + let asg_json_str = fs::read_to_string("tests/data/inlines/span-inside-spans.json") + .expect("Unable to read asg json test template"); + assert_parsed_doc_matches_expected_asg_from_str(&adoc_str, &asg_json_str) +} + #[rstest] #[case::emphasis("_")] #[case::strong("*")] From b67557a84ec47d1dd2fded09ecb9f36737aeebfd Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Wed, 17 Dec 2025 13:14:08 -0500 Subject: [PATCH 3/7] Progress on spans inside spans, now to fix regression --- src/graph/inlines.rs | 20 ++- src/parser.rs | 12 ++ tests/data/inlines/span-inside-spans.json | 179 ++++++++++------------ 3 files changed, 102 insertions(+), 109 deletions(-) diff --git a/src/graph/inlines.rs b/src/graph/inlines.rs index d7ff8af..700c50b 100644 --- a/src/graph/inlines.rs +++ b/src/graph/inlines.rs @@ -12,7 +12,9 @@ use super::{ nodes::{Location, NodeTypes}, substitutions::CHARREF_MAP, }; -use crate::scanner::tokens::{Token, TokenType}; +use crate::{ + scanner::tokens::{Token, TokenType}, +}; /// Inlines enum containing literals, spans, and references (the latter not implemented) #[derive(Serialize, Clone, Debug)] @@ -385,14 +387,18 @@ impl InlineSpan { pub fn add_inline(&mut self, inline: Inline) { // update the locations self.location = Location::reconcile(self.location.clone(), inline.locations()); - // combine literals if necessary - if matches!(inline, Inline::InlineLiteral(_)) { - if let Some(Inline::InlineLiteral(prior_literal)) = self.inlines.last_mut() { - prior_literal.add_text_from_inline_literal(inline); - return; + if let Some(Inline::InlineSpan(last_span)) = self.inlines.last_mut() { + last_span.add_inline(inline); + } else { + // combine literals if necessary + if matches!(inline, Inline::InlineLiteral(_)) { + if let Some(Inline::InlineLiteral(prior_literal)) = self.inlines.last_mut() { + prior_literal.add_text_from_inline_literal(inline); + return; + } } + self.inlines.push(inline); } - self.inlines.push(inline); } fn new_footnote_ref(footnote_ref: InlineRef) -> Self { diff --git a/src/parser.rs b/src/parser.rs index 7917475..64396d4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -144,6 +144,7 @@ impl Parser { for result in tokens { match result { Ok(token) => { + let _ = dbg!(&token); let token_type = token.token_type(); self.token_into(token, &mut asg)?; @@ -832,6 +833,16 @@ impl Parser { last_inline.close(); self.in_inline_span = false; return Ok(()); + } else if let Inline::InlineSpan(last_span) = last_inline { + if let Some(last_internal_inline) = last_span.inlines.last_mut() { + if inline == *last_internal_inline { + last_internal_inline.reconcile_locations(inline.locations()); + last_internal_inline.close(); + } + } else { + last_span.add_inline(inline); + } + return Ok(()); } } // handle newline tokens prior to constrained spans @@ -1272,6 +1283,7 @@ impl Parser { } fn handle_dangling_spans(&mut self) { + dbg!(&self.inline_stack); // look for the last span in the stack if let Some(open_span_idx) = self .inline_stack diff --git a/tests/data/inlines/span-inside-spans.json b/tests/data/inlines/span-inside-spans.json index 84fe96d..f6e5f50 100644 --- a/tests/data/inlines/span-inside-spans.json +++ b/tests/data/inlines/span-inside-spans.json @@ -6,39 +6,42 @@ "name": "paragraph", "type": "block", "inlines": [ - { - "name": "text", - "type": "string", - "value": "*", - "location": [ - { - "line": 1, - "col": 1 - }, - { - "line": 1, - "col": 1 - } - ] - }, { "name": "span", "type": "inline", - "variant": "emphasis", + "variant": "strong", "form": "constrained", "inlines": [ { - "name": "text", - "type": "string", - "value": "foo", + "name": "span", + "type": "inline", + "variant": "emphasis", + "form": "constrained", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "foo", + "location": [ + { + "line": 1, + "col": 3 + }, + { + "line": 1, + "col": 5 + } + ] + } + ], "location": [ { "line": 1, - "col": 3 + "col": 2 }, { "line": 1, - "col": 5 + "col": 6 } ] } @@ -46,24 +49,7 @@ "location": [ { "line": 1, - "col": 2 - }, - { - "line": 1, - "col": 6 - } - ] - }, - { - "name": "span", - "type": "inline", - "variant": "strong", - "form": "constrained", - "inlines": [], - "location": [ - { - "line": 1, - "col": 7 + "col": 1 }, { "line": 1, @@ -87,39 +73,42 @@ "name": "paragraph", "type": "block", "inlines": [ - { - "name": "text", - "type": "string", - "value": "**", - "location": [ - { - "line": 3, - "col": 1 - }, - { - "line": 3, - "col": 2 - } - ] - }, { "name": "span", "type": "inline", - "variant": "emphasis", - "form": "constrained", + "variant": "strong", + "form": "unconstrained", "inlines": [ { - "name": "text", - "type": "string", - "value": "foo", + "name": "span", + "type": "inline", + "variant": "emphasis", + "form": "constrained", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "foo", + "location": [ + { + "line": 3, + "col": 4 + }, + { + "line": 3, + "col": 6 + } + ] + } + ], "location": [ { "line": 3, - "col": 4 + "col": 3 }, { "line": 3, - "col": 6 + "col": 7 } ] } @@ -127,24 +116,7 @@ "location": [ { "line": 3, - "col": 3 - }, - { - "line": 3, - "col": 7 - } - ] - }, - { - "name": "span", - "type": "inline", - "variant": "strong", - "form": "unconstrained", - "inlines": [], - "location": [ - { - "line": 3, - "col": 8 + "col": 1 }, { "line": 3, @@ -168,39 +140,42 @@ "name": "paragraph", "type": "block", "inlines": [ - { - "name": "text", - "type": "string", - "value": "*x*", - "location": [ - { - "line": 5, - "col": 1 - }, - { - "line": 5, - "col": 2 - } - ] - }, { "name": "span", "type": "inline", - "variant": "emphasis", + "variant": "strong", "form": "unconstrained", "inlines": [ { - "name": "text", - "type": "string", - "value": "foo**", + "name": "span", + "type": "inline", + "variant": "emphasis", + "form": "unconstrained", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "foo", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 7 + } + ] + } + ], "location": [ { "line": 5, - "col": 5 + "col": 3 }, { "line": 5, - "col": 11 + "col": 9 } ] } @@ -208,7 +183,7 @@ "location": [ { "line": 5, - "col": 3 + "col": 1 }, { "line": 5, @@ -239,4 +214,4 @@ "col": 11 } ] -} +} \ No newline at end of file From 870ac5e214fb73c4a0bd9b07b8bacc8b782f331c Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Thu, 18 Dec 2025 16:15:57 -0500 Subject: [PATCH 4/7] Fix regression --- src/graph/inlines.rs | 34 +++++++++++++++++++++++++---- src/parser.rs | 52 ++++++++++++++++++++++++++++++-------------- 2 files changed, 66 insertions(+), 20 deletions(-) diff --git a/src/graph/inlines.rs b/src/graph/inlines.rs index 700c50b..e16562c 100644 --- a/src/graph/inlines.rs +++ b/src/graph/inlines.rs @@ -12,9 +12,7 @@ use super::{ nodes::{Location, NodeTypes}, substitutions::CHARREF_MAP, }; -use crate::{ - scanner::tokens::{Token, TokenType}, -}; +use crate::scanner::tokens::{Token, TokenType}; /// Inlines enum containing literals, spans, and references (the latter not implemented) #[derive(Serialize, Clone, Debug)] @@ -158,7 +156,7 @@ impl Inline { pub fn extract_child_inlines(&mut self) -> VecDeque { match &self { - Inline::InlineSpan(span) => span.inlines.clone().into(), + Inline::InlineSpan(span) => span.extract_span_inlines(), _ => todo!(), } } @@ -412,6 +410,34 @@ impl InlineSpan { footnote } + // extracts the inlines inside the span, closing any open (dangling) spans that may be + // inside of it + fn extract_span_inlines(&self) -> VecDeque { + let mut children = VecDeque::new(); + for inline in self.inlines.iter() { + // handle any open spans + if inline.is_open() { + let mut working_inline = inline.clone(); + let open_span_literal = working_inline.produce_literal_from_self(); + let mut inline_children = working_inline.extract_child_inlines(); + if let Some(inline) = inline_children.front_mut() { + match inline { + Inline::InlineLiteral(literal) => { + literal.prepend_to_value(open_span_literal, literal.location.clone()); + } + _ => todo!(), + } + } else { + todo!() + } + children.extend(inline_children); + } else { + children.push_back(inline.clone()); + } + } + children + } + /// Deconstructs a footnote span into the relevant footnote definition ID (to be applied to /// the leafblock that contains the footnote text), an InlineSpan `Sup` that replaces the footnote /// with a link to said leafblock, and the vector of inlines that will be inserted into diff --git a/src/parser.rs b/src/parser.rs index 64396d4..90b75a1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -144,7 +144,6 @@ impl Parser { for result in tokens { match result { Ok(token) => { - let _ = dbg!(&token); let token_type = token.token_type(); self.token_into(token, &mut asg)?; @@ -835,9 +834,13 @@ impl Parser { return Ok(()); } else if let Inline::InlineSpan(last_span) = last_inline { if let Some(last_internal_inline) = last_span.inlines.last_mut() { + // okay so what needs to happen is that spans deconstruct themselves, + // I think if inline == *last_internal_inline { last_internal_inline.reconcile_locations(inline.locations()); last_internal_inline.close(); + } else { + last_span.add_inline(inline); } } else { last_span.add_inline(inline); @@ -1283,7 +1286,6 @@ impl Parser { } fn handle_dangling_spans(&mut self) { - dbg!(&self.inline_stack); // look for the last span in the stack if let Some(open_span_idx) = self .inline_stack @@ -1303,26 +1305,44 @@ impl Parser { _ => todo!(), } // put any appended inlines into the stack at the relevant position - for child_inline in children { - self.inline_stack.insert(open_span_idx, child_inline) + while children.len() > 0 { + if let Some(child) = children.pop_back() { + self.inline_stack.insert(open_span_idx, child); + } } // consolidate any resultant or remaining adjacent literals (this should be extracted to a function) - let mut temp_stack: VecDeque = VecDeque::new(); - let mut inline_stack_iter = self.inline_stack.iter_mut().peekable(); - while inline_stack_iter.peek().is_some() { - if let Some(current) = inline_stack_iter.next() { - if let Inline::InlineLiteral(current_literal) = current { - if let Some(Inline::InlineLiteral(next_literal)) = - inline_stack_iter.next() - { - current_literal.combine_literals(next_literal.clone()); - } + let mut temp_stack: Vec = vec![]; + while let Some(mut inline) = self.inline_stack.pop_front() { + if temp_stack.len() == 0 { + temp_stack.push(inline); + } else if inline.is_literal() { + if let Some(Inline::InlineLiteral(last_in_stack)) = temp_stack.last_mut() { + last_in_stack.combine_literals(inline.extract_literal()); + } else { + temp_stack.push(inline); } - temp_stack.push_back(current.clone()); + } else { + temp_stack.push(inline); } } - self.inline_stack = temp_stack; + // let mut inline_stack_iter = self.inline_stack.iter_mut().peekable(); + // // this doesn't work like it should! it's skipping the middle, because of the + // // way it's looping + // while inline_stack_iter.peek().is_some() { + // if let Some(current) = inline_stack_iter.next() { + // dbg!(¤t); + // if let Inline::InlineLiteral(current_literal) = current { + // if let Some(Inline::InlineLiteral(next_literal)) = + // inline_stack_iter.next() + // { + // current_literal.combine_literals(next_literal.clone()); + // } + // } + // temp_stack.push_back(current.clone()); + // } + // } + self.inline_stack = temp_stack.into(); } else { // ... or if there are no children, add the token to the back of the last one; this // is a little hacky, but it is cleaner compared to the rest of the code just to From 7ea5e8bdb71eac4ea53e53227c817a26ca639597 Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Thu, 18 Dec 2025 16:16:05 -0500 Subject: [PATCH 5/7] More broken tests --- .../data/inlines/span-inside-spans-mixed.adoc | 1 + .../data/inlines/span-inside-spans-mixed.json | 113 ++++++++++++++++++ tests/inline_tests.rs | 9 ++ 3 files changed, 123 insertions(+) create mode 100644 tests/data/inlines/span-inside-spans-mixed.adoc create mode 100644 tests/data/inlines/span-inside-spans-mixed.json diff --git a/tests/data/inlines/span-inside-spans-mixed.adoc b/tests/data/inlines/span-inside-spans-mixed.adoc new file mode 100644 index 0000000..cc9cfa1 --- /dev/null +++ b/tests/data/inlines/span-inside-spans-mixed.adoc @@ -0,0 +1 @@ +*_This_ shouldn't be a problem!* diff --git a/tests/data/inlines/span-inside-spans-mixed.json b/tests/data/inlines/span-inside-spans-mixed.json new file mode 100644 index 0000000..196154d --- /dev/null +++ b/tests/data/inlines/span-inside-spans-mixed.json @@ -0,0 +1,113 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "span", + "type": "inline", + "variant": "strong", + "form": "constrained", + "inlines": [ + { + "name": "span", + "type": "inline", + "variant": "emphasis", + "form": "constrained", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "This", + "location": [ + { + "line": 1, + "col": 3 + }, + { + "line": 1, + "col": 6 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 2 + }, + { + "line": 1, + "col": 7 + } + ] + }, + { + "name": "text", + "type": "string", + "value": " shouldn't be a problem!", + "location": [ + { + "line": 1, + "col": 8 + }, + { + "line": 1, + "col": 32 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 1, + "col": 32 + } + ] + }, + { + "name": "text", + "type": "string", + "value": " ", + "location": [ + { + "line": 1, + "col": 33 + }, + { + "line": 1, + "col": 33 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 1, + "col": 33 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 1, + "col": 33 + } + ] +} diff --git a/tests/inline_tests.rs b/tests/inline_tests.rs index 664e3a0..aae12d6 100644 --- a/tests/inline_tests.rs +++ b/tests/inline_tests.rs @@ -45,6 +45,15 @@ fn test_spans_inside_spans() { assert_parsed_doc_matches_expected_asg_from_str(&adoc_str, &asg_json_str) } +#[test] +fn test_spans_inside_spans_mixed() { + let adoc_str = fs::read_to_string("tests/data/inlines/span-inside-spans-mixed.adoc") + .expect("Unable to read asciidoc test template"); + let asg_json_str = fs::read_to_string("tests/data/inlines/span-inside-spans-mixed.json") + .expect("Unable to read asg json test template"); + assert_parsed_doc_matches_expected_asg_from_str(&adoc_str, &asg_json_str) +} + #[rstest] #[case::emphasis("_")] #[case::strong("*")] From 3ecd0c75388689d160aec4cc5d068d1396b1a5b9 Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Thu, 18 Dec 2025 16:27:15 -0500 Subject: [PATCH 6/7] Solve for final known bug edge case Update: formatting --- src/graph/inlines.rs | 6 +++- src/parser.rs | 18 ------------ src/scanner/mod.rs | 2 -- src/scanner/tokens.rs | 2 +- .../data/inlines/span-inside-spans-mixed.json | 28 +++++++++---------- 5 files changed, 20 insertions(+), 36 deletions(-) diff --git a/src/graph/inlines.rs b/src/graph/inlines.rs index e16562c..ce7e3d7 100644 --- a/src/graph/inlines.rs +++ b/src/graph/inlines.rs @@ -386,7 +386,11 @@ impl InlineSpan { // update the locations self.location = Location::reconcile(self.location.clone(), inline.locations()); if let Some(Inline::InlineSpan(last_span)) = self.inlines.last_mut() { - last_span.add_inline(inline); + if last_span.open { + last_span.add_inline(inline); + } else { + self.inlines.push(inline); + } } else { // combine literals if necessary if matches!(inline, Inline::InlineLiteral(_)) { diff --git a/src/parser.rs b/src/parser.rs index 90b75a1..b4b4a51 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -834,8 +834,6 @@ impl Parser { return Ok(()); } else if let Inline::InlineSpan(last_span) = last_inline { if let Some(last_internal_inline) = last_span.inlines.last_mut() { - // okay so what needs to happen is that spans deconstruct themselves, - // I think if inline == *last_internal_inline { last_internal_inline.reconcile_locations(inline.locations()); last_internal_inline.close(); @@ -1326,22 +1324,6 @@ impl Parser { temp_stack.push(inline); } } - // let mut inline_stack_iter = self.inline_stack.iter_mut().peekable(); - // // this doesn't work like it should! it's skipping the middle, because of the - // // way it's looping - // while inline_stack_iter.peek().is_some() { - // if let Some(current) = inline_stack_iter.next() { - // dbg!(¤t); - // if let Inline::InlineLiteral(current_literal) = current { - // if let Some(Inline::InlineLiteral(next_literal)) = - // inline_stack_iter.next() - // { - // current_literal.combine_literals(next_literal.clone()); - // } - // } - // temp_stack.push_back(current.clone()); - // } - // } self.inline_stack = temp_stack.into(); } else { // ... or if there are no children, add the token to the back of the last one; this diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 577cbf1..c756673 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -731,9 +731,7 @@ impl<'a> Scanner<'a> { // we've got an unconstrained (i.e., "**foo**bar") version self.current += 1; self.add_token(unconstrained, false, 0) - } - // we're at the end of a span, or are butted up against another inline marker else if end_of_inline_markers.contains(&self.peek()) || // or we're at the beginning, or butted up against another inline marker diff --git a/src/scanner/tokens.rs b/src/scanner/tokens.rs index d8069bc..1057bc2 100644 --- a/src/scanner/tokens.rs +++ b/src/scanner/tokens.rs @@ -280,7 +280,7 @@ pub enum TokenType { // garden-variety text Hyperlink, // http://whatever.txt - Email, // cats@dogs.foo + Email, // cats@dogs.foo Text, // character reference, such as "—" diff --git a/tests/data/inlines/span-inside-spans-mixed.json b/tests/data/inlines/span-inside-spans-mixed.json index 196154d..15ff2dc 100644 --- a/tests/data/inlines/span-inside-spans-mixed.json +++ b/tests/data/inlines/span-inside-spans-mixed.json @@ -45,21 +45,21 @@ } ] }, + { + "name": "text", + "type": "string", + "value": " shouldn't be a problem!", + "location": [ { - "name": "text", - "type": "string", - "value": " shouldn't be a problem!", - "location": [ - { - "line": 1, - "col": 8 - }, - { - "line": 1, - "col": 32 - } - ] + "line": 1, + "col": 8 + }, + { + "line": 1, + "col": 31 } + ] + } ], "location": [ { @@ -110,4 +110,4 @@ "col": 33 } ] -} +} \ No newline at end of file From 199f8c8d671f470ddd47cff2c50b09aa13c1a46a Mon Sep 17 00:00:00 2001 From: delfanbaum Date: Thu, 18 Dec 2025 16:27:30 -0500 Subject: [PATCH 7/7] Bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d868c00..9f50a3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "asciidocr" -version = "0.1.13" +version = "0.1.14" readme = "README.md" license = "MIT" edition = "2024"