Skip to content

Commit

Permalink
Add support for serializing math nodes as markdown
Browse files Browse the repository at this point in the history
Closes GH-148.

Reviewed-by: Titus Wormer <tituswormer@gmail.com>
  • Loading branch information
bnchi authored Oct 11, 2024
1 parent e7ed526 commit 9ed45b4
Show file tree
Hide file tree
Showing 9 changed files with 505 additions and 14 deletions.
6 changes: 6 additions & 0 deletions mdast_util_to_markdown/src/configure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ pub struct Options {
/// Setext headings cannot be used for empty headings or headings with a
/// rank of three or more.
pub setext: bool,
/// Whether to support math (text) with a single dollar (`bool`, default: `true`).
/// Single dollars work in Pandoc and many other places, but often interfere with “normal”
/// dollars in text.
/// If you turn this off, you can still use two or more dollars for text math.
pub single_dollar_text_math: bool,
/// Marker to use for strong (`'*'` or `'_'`, default: `'*'`).
pub strong: char,
/// Whether to join definitions without a blank line (`bool`, default:
Expand All @@ -90,6 +95,7 @@ impl Default for Options {
rule_repetition: 3,
rule_spaces: false,
setext: false,
single_dollar_text_math: true,
strong: '*',
tight_definitions: false,
}
Expand Down
34 changes: 27 additions & 7 deletions mdast_util_to_markdown/src/construct_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@ pub enum ConstructName {
/// ^
/// ```
Blockquote,
/// Whole code (indented).
///
/// ```markdown
/// ␠␠␠␠console.log(1)
/// ^^^^^^^^^^^^^^^^^^
/// ```
CodeIndented,
/// Whole code (fenced).
///
/// ````markdown
Expand Down Expand Up @@ -74,6 +67,13 @@ pub enum ConstructName {
/// | ~~~
/// ````
CodeFencedMetaTilde,
/// Whole code (indented).
///
/// ```markdown
/// ␠␠␠␠console.log(1)
/// ^^^^^^^^^^^^^^^^^^
/// ```
CodeIndented,
/// Whole definition.
///
/// ```markdown
Expand Down Expand Up @@ -186,6 +186,26 @@ pub enum ConstructName {
/// ^^^^
/// ```
ListItem,
/// Math (flow).
///
/// ```markdown
/// > | $$
/// ^^
/// > | a
/// ^
/// > | $$
/// ^^
/// ```
MathFlow,
/// Math (flow) meta flag.
///
/// ```markdown
/// > | $$a
/// ^
/// | b
/// | $$
/// ```
MathFlowMeta,
/// Paragraph.
///
/// ```markdown
Expand Down
82 changes: 82 additions & 0 deletions mdast_util_to_markdown/src/handle/inline_math.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
//! JS equivalent: https://github.com/syntax-tree/mdast-util-math/blob/main/lib/index.js#L241
use super::Handle;
use crate::state::{Info, State};
use alloc::format;
use markdown::{
mdast::{InlineMath, Node},
message::Message,
};
use regex::Regex;

impl Handle for InlineMath {
fn handle(
&self,
state: &mut State,
_info: &Info,
_parent: Option<&Node>,
_node: &Node,
) -> Result<alloc::string::String, Message> {
let mut size: usize = if !state.options.single_dollar_text_math {
2
} else {
1
};

let pattern = format!("(^|[^$]){}([^$]|$)", "\\$".repeat(size));
let mut dollar_sign_match = Regex::new(&pattern).unwrap();
while dollar_sign_match.is_match(&self.value) {
size += 1;
let pattern = format!("(^|[^$]){}([^$]|$)", "\\$".repeat(size));
dollar_sign_match = Regex::new(&pattern).unwrap();
}

let sequence = "$".repeat(size);

let no_whitespaces = !self.value.chars().all(char::is_whitespace);
let starts_with_whitespace = self.value.starts_with(char::is_whitespace);
let ends_with_whitespace = self.value.ends_with(char::is_whitespace);
let starts_with_dollar = self.value.starts_with('$');
let ends_with_dollar = self.value.ends_with('$');

let mut value = self.value.clone();
if no_whitespaces
&& ((starts_with_whitespace && ends_with_whitespace)
|| starts_with_dollar
|| ends_with_dollar)
{
value = format!(" {} ", value);
}

for pattern in &mut state.r#unsafe {
if !pattern.at_break {
continue;
}

State::compile_pattern(pattern);

if let Some(regex) = &pattern.compiled {
while let Some(m) = regex.find(&value) {
let position = m.start();

let position = if position > 0
&& &value[position..m.len()] == "\n"
&& &value[position - 1..position] == "\r"
{
position - 1
} else {
position
};

value.replace_range(position..m.start() + 1, " ");
}
}
}

Ok(format!("{}{}{}", sequence, value, sequence))
}
}

pub fn peek_inline_math() -> char {
'$'
}
46 changes: 46 additions & 0 deletions mdast_util_to_markdown/src/handle/math.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//! JS equivalent: https://github.com/syntax-tree/mdast-util-math/blob/main/lib/index.js#L204
use super::Handle;
use crate::{
construct_name::ConstructName,
state::{Info, State},
util::{longest_char_streak::longest_char_streak, safe::SafeConfig},
};
use alloc::string::String;
use markdown::{
mdast::{Math, Node},
message::Message,
};

impl Handle for Math {
fn handle(
&self,
state: &mut State,
_info: &Info,
_parent: Option<&Node>,
_node: &Node,
) -> Result<alloc::string::String, Message> {
let sequence = "$".repeat((longest_char_streak(&self.value, '$') + 1).max(2));
state.enter(ConstructName::MathFlow);

let mut value = String::new();
value.push_str(&sequence);

if let Some(meta) = &self.meta {
state.enter(ConstructName::MathFlowMeta);
value.push_str(&state.safe(meta, &SafeConfig::new(&value, "\n", Some('$'))));
state.exit();
}

value.push('\n');

if !self.value.is_empty() {
value.push_str(&self.value);
value.push('\n');
}

value.push_str(&sequence);
state.exit();
Ok(value)
}
}
2 changes: 2 additions & 0 deletions mdast_util_to_markdown/src/handle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ pub mod html;
pub mod image;
pub mod image_reference;
pub mod inline_code;
pub mod inline_math;
pub mod link;
pub mod link_reference;
mod list;
mod list_item;
mod math;
mod paragraph;
mod root;
pub mod strong;
Expand Down
5 changes: 3 additions & 2 deletions mdast_util_to_markdown/src/handle/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ fn phrasing(child: &Node) -> bool {
*child,
Node::Break(_)
| Node::Emphasis(_)
| Node::ImageReference(_)
| Node::Image(_)
| Node::ImageReference(_)
| Node::InlineCode(_)
| Node::LinkReference(_)
| Node::InlineMath(_)
| Node::Link(_)
| Node::LinkReference(_)
| Node::Strong(_)
| Node::Text(_)
)
Expand Down
10 changes: 7 additions & 3 deletions mdast_util_to_markdown/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ use crate::{
construct_name::ConstructName,
handle::{
emphasis::peek_emphasis, html::peek_html, image::peek_image,
image_reference::peek_image_reference, inline_code::peek_inline_code, link::peek_link,
link_reference::peek_link_reference, strong::peek_strong, Handle,
image_reference::peek_image_reference, inline_code::peek_inline_code,
inline_math::peek_inline_math, link::peek_link, link_reference::peek_link_reference,
strong::peek_strong, Handle,
},
r#unsafe::Unsafe,
util::{
Expand Down Expand Up @@ -322,6 +323,8 @@ impl<'a> State<'a> {
Node::Strong(strong) => strong.handle(self, info, parent, node),
Node::Text(text) => text.handle(self, info, parent, node),
Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info, parent, node),
Node::Math(math) => math.handle(self, info, parent, node),
Node::InlineMath(inline_math) => inline_math.handle(self, info, parent, node),
_ => Err(Message {
place: None,
reason: format!("Unexpected node type `{:?}`", node),
Expand Down Expand Up @@ -409,7 +412,7 @@ impl<'a> State<'a> {
index_stack: Vec::new(),
options,
stack: Vec::new(),
r#unsafe: Unsafe::get_default_unsafe(),
r#unsafe: Unsafe::get_default_unsafe(options),
}
}

Expand All @@ -424,6 +427,7 @@ impl<'a> State<'a> {
Node::LinkReference(_) => Some(peek_link_reference()),
Node::Link(link) => Some(peek_link(link, node, self)),
Node::Strong(_) => Some(peek_strong(self)),
Node::InlineMath(_) => Some(peek_inline_math()),
_ => None,
}
}
Expand Down
27 changes: 25 additions & 2 deletions mdast_util_to_markdown/src/unsafe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! JS equivalent: <https://github.com/syntax-tree/mdast-util-to-markdown/blob/main/lib/unsafe.js>.
//! Also: <https://github.com/syntax-tree/mdast-util-to-markdown/blob/fd6a508/lib/types.js#L287-L305>.
use crate::construct_name::ConstructName;
use crate::{construct_name::ConstructName, Options};
use alloc::{vec, vec::Vec};
use regex::Regex;

Expand Down Expand Up @@ -38,7 +38,7 @@ impl<'a> Unsafe<'a> {
}
}

pub fn get_default_unsafe() -> Vec<Self> {
pub fn get_default_unsafe(options: &Options) -> Vec<Self> {
let full_phrasing_spans = vec![
ConstructName::Autolink,
ConstructName::DestinationLiteral,
Expand Down Expand Up @@ -87,6 +87,7 @@ impl<'a> Unsafe<'a> {
ConstructName::CodeFencedMetaTilde,
ConstructName::DestinationLiteral,
ConstructName::HeadingAtx,
ConstructName::MathFlowMeta,
],
vec![],
false,
Expand All @@ -102,6 +103,7 @@ impl<'a> Unsafe<'a> {
ConstructName::CodeFencedMetaTilde,
ConstructName::DestinationLiteral,
ConstructName::HeadingAtx,
ConstructName::MathFlowMeta,
],
vec![],
false,
Expand Down Expand Up @@ -308,6 +310,27 @@ impl<'a> Unsafe<'a> {
false,
),
Self::new('~', None, None, vec![], vec![], true),
Self::new(
'$',
None,
if options.single_dollar_text_math {
None
} else {
"\\$".into()
},
vec![ConstructName::Phrasing],
vec![],
false,
),
Self::new(
'$',
None,
None,
vec![ConstructName::MathFlowMeta],
vec![],
false,
),
Self::new('$', None, "\\$".into(), vec![], vec![], true),
]
}

Expand Down
Loading

0 comments on commit 9ed45b4

Please sign in to comment.