Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "asciidocr"
version = "0.1.13"
version = "0.1.14"
readme = "README.md"
license = "MIT"
edition = "2024"
Expand Down
50 changes: 43 additions & 7 deletions src/graph/inlines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ impl Inline {

pub fn extract_child_inlines(&mut self) -> VecDeque<Inline> {
match &self {
Inline::InlineSpan(span) => span.inlines.clone().into(),
Inline::InlineSpan(span) => span.extract_span_inlines(),
_ => todo!(),
}
}
Expand Down Expand Up @@ -385,14 +385,22 @@ impl InlineSpan {
pub fn add_inline(&mut self, inline: Inline) {
// update the locations
self.location = Location::reconcile(self.location.clone(), inline.locations());
// combine literals if necessary
if matches!(inline, Inline::InlineLiteral(_)) {
if let Some(Inline::InlineLiteral(prior_literal)) = self.inlines.last_mut() {
prior_literal.add_text_from_inline_literal(inline);
return;
if let Some(Inline::InlineSpan(last_span)) = self.inlines.last_mut() {
if last_span.open {
last_span.add_inline(inline);
} else {
self.inlines.push(inline);
}
} else {
// combine literals if necessary
if matches!(inline, Inline::InlineLiteral(_)) {
if let Some(Inline::InlineLiteral(prior_literal)) = self.inlines.last_mut() {
prior_literal.add_text_from_inline_literal(inline);
return;
}
}
self.inlines.push(inline);
}
self.inlines.push(inline);
}

fn new_footnote_ref(footnote_ref: InlineRef) -> Self {
Expand All @@ -406,6 +414,34 @@ impl InlineSpan {
footnote
}

// extracts the inlines inside the span, closing any open (dangling) spans that may be
// inside of it
fn extract_span_inlines(&self) -> VecDeque<Inline> {
let mut children = VecDeque::new();
for inline in self.inlines.iter() {
// handle any open spans
if inline.is_open() {
let mut working_inline = inline.clone();
let open_span_literal = working_inline.produce_literal_from_self();
let mut inline_children = working_inline.extract_child_inlines();
if let Some(inline) = inline_children.front_mut() {
match inline {
Inline::InlineLiteral(literal) => {
literal.prepend_to_value(open_span_literal, literal.location.clone());
}
_ => todo!(),
}
} else {
todo!()
}
children.extend(inline_children);
} else {
children.push_back(inline.clone());
}
}
children
}

/// Deconstructs a footnote span into the relevant footnote definition ID (to be applied to
/// the leafblock that contains the footnote text), an InlineSpan `Sup<InlineRef>` that replaces the footnote
/// with a link to said leafblock, and the vector of inlines that will be inserted into
Expand Down
42 changes: 28 additions & 14 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,18 @@ impl Parser {
last_inline.close();
self.in_inline_span = false;
return Ok(());
} else if let Inline::InlineSpan(last_span) = last_inline {
if let Some(last_internal_inline) = last_span.inlines.last_mut() {
if inline == *last_internal_inline {
last_internal_inline.reconcile_locations(inline.locations());
last_internal_inline.close();
} else {
last_span.add_inline(inline);
}
} else {
last_span.add_inline(inline);
}
return Ok(());
}
}
// handle newline tokens prior to constrained spans
Expand Down Expand Up @@ -1291,26 +1303,28 @@ impl Parser {
_ => todo!(),
}
// put any appended inlines into the stack at the relevant position
for child_inline in children {
self.inline_stack.insert(open_span_idx, child_inline)
while children.len() > 0 {
if let Some(child) = children.pop_back() {
self.inline_stack.insert(open_span_idx, child);
}
}

// consolidate any resultant or remaining adjacent literals (this should be extracted to a function)
let mut temp_stack: VecDeque<Inline> = VecDeque::new();
let mut inline_stack_iter = self.inline_stack.iter_mut().peekable();
while inline_stack_iter.peek().is_some() {
if let Some(current) = inline_stack_iter.next() {
if let Inline::InlineLiteral(current_literal) = current {
if let Some(Inline::InlineLiteral(next_literal)) =
inline_stack_iter.next()
{
current_literal.combine_literals(next_literal.clone());
}
let mut temp_stack: Vec<Inline> = vec![];
while let Some(mut inline) = self.inline_stack.pop_front() {
if temp_stack.len() == 0 {
temp_stack.push(inline);
} else if inline.is_literal() {
if let Some(Inline::InlineLiteral(last_in_stack)) = temp_stack.last_mut() {
last_in_stack.combine_literals(inline.extract_literal());
} else {
temp_stack.push(inline);
}
temp_stack.push_back(current.clone());
} else {
temp_stack.push(inline);
}
}
self.inline_stack = temp_stack;
self.inline_stack = temp_stack.into();
} else {
// ... or if there are no children, add the token to the back of the last one; this
// is a little hacky, but it is cleaner compared to the rest of the code just to
Expand Down
68 changes: 59 additions & 9 deletions src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -715,21 +715,29 @@ impl<'a> Scanner<'a> {
constrained: TokenType,
unconstrained: TokenType,
) -> Result<Token, ScannerError> {
let inline_markup_chars = ['*', '_', '`', '+', '^', '~', '#'];
let mut end_of_inline_markers = vec![
' ', '\0', '.', ',', ';', ':', '\n', ')', '"', '!', '?', '\'', ']', '…', '“', '”', '‘',
'’',
];
let mut beginning_of_inline_markers = vec![' ', '\n', '\0', ']', '(', '"', '['];
end_of_inline_markers.extend_from_slice(&inline_markup_chars);
beginning_of_inline_markers.extend_from_slice(&inline_markup_chars);
// guard clause against dangling markup
if self.peek() == ' ' && self.peek_back() == ' ' {
self.add_text_until_next_markup()
} else if [
' ', '\0', '.', ',', ';', ':', '\n', ')', '"', '!', '?', '\'', ']', '…', '“', '”', '‘',
'’',
]
.contains(&self.peek())
|| [' ', '\n', '\0', ']', '(', '"', '['].contains(&self.peek_back()) && self.peek() != c
{
self.add_token(constrained, false, 0)
} else if self.peek() == c {
// we've got an unconstrained version
// the next character is the same
// we've got an unconstrained (i.e., "**foo**bar") version
self.current += 1;
self.add_token(unconstrained, false, 0)
}
// we're at the end of a span, or are butted up against another inline marker
else if end_of_inline_markers.contains(&self.peek()) ||
// or we're at the beginning, or butted up against another inline marker
beginning_of_inline_markers.contains(&self.peek_back()) && self.peek() != c
{
self.add_token(constrained, false, 0)
} else {
self.add_text_until_next_markup()
}
Expand Down Expand Up @@ -1282,6 +1290,48 @@ mod tests {
scan_and_assert_eq(&markup, expected_tokens);
}

#[rstest]
#[case('*', TokenType::Strong)]
#[case('`', TokenType::Monospace)]
#[case('+', TokenType::Literal)]
#[case('^', TokenType::Superscript)]
#[case('~', TokenType::Subscript)]
#[case('#', TokenType::Mark)]
fn inline_formatting_by_other(#[case] markup_char: char, #[case] expected_token: TokenType) {
let markup = format!("Somx {}_bar_{} bar.", markup_char, markup_char);
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(expected_token, markup_char.to_string(), None, 1, 6, 6),
Token::new_default(TokenType::Emphasis, "_".to_string(), None, 1, 7, 7),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
8,
10,
),
Token::new_default(TokenType::Emphasis, "_".to_string(), None, 1, 11, 11),
Token::new_default(expected_token, markup_char.to_string(), None, 1, 12, 12),
Token::new_default(
TokenType::Text,
" bar.".to_string(),
Some(" bar.".to_string()),
1,
13,
markup.len(),
),
];
scan_and_assert_eq(&markup, expected_tokens);
}

#[rstest]
#[case('*')]
#[case('_')]
Expand Down
2 changes: 1 addition & 1 deletion src/scanner/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ pub enum TokenType {

// garden-variety text
Hyperlink, // http://whatever.txt
Email, // cats@dogs.foo
Email, // cats@dogs.foo
Text,

// character reference, such as "&mdash;"
Expand Down
1 change: 1 addition & 0 deletions tests/data/inlines/span-inside-spans-mixed.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*_This_ shouldn't be a problem!*
113 changes: 113 additions & 0 deletions tests/data/inlines/span-inside-spans-mixed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"name": "document",
"type": "block",
"blocks": [
{
"name": "paragraph",
"type": "block",
"inlines": [
{
"name": "span",
"type": "inline",
"variant": "strong",
"form": "constrained",
"inlines": [
{
"name": "span",
"type": "inline",
"variant": "emphasis",
"form": "constrained",
"inlines": [
{
"name": "text",
"type": "string",
"value": "This",
"location": [
{
"line": 1,
"col": 3
},
{
"line": 1,
"col": 6
}
]
}
],
"location": [
{
"line": 1,
"col": 2
},
{
"line": 1,
"col": 7
}
]
},
{
"name": "text",
"type": "string",
"value": " shouldn't be a problem!",
"location": [
{
"line": 1,
"col": 8
},
{
"line": 1,
"col": 31
}
]
}
],
"location": [
{
"line": 1,
"col": 1
},
{
"line": 1,
"col": 32
}
]
},
{
"name": "text",
"type": "string",
"value": " ",
"location": [
{
"line": 1,
"col": 33
},
{
"line": 1,
"col": 33
}
]
}
],
"location": [
{
"line": 1,
"col": 1
},
{
"line": 1,
"col": 33
}
]
}
],
"location": [
{
"line": 1,
"col": 1
},
{
"line": 1,
"col": 33
}
]
}
5 changes: 5 additions & 0 deletions tests/data/inlines/span-inside-spans.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*_foo_*

**_foo_**

**__foo__**
Loading