Skip to content

Commit

Permalink
fix(latex-renderer): special chars in raw text
Browse files Browse the repository at this point in the history
Escape some additional characters in raw text which have special meaning
in LaTeX and which previously hadn't been dealt with by
`latex-renderer`'s `render_raw_text`-method.

Refactor the corresponding test case along the lines of the other
parameterized test cases to improve the code's consistency.
  • Loading branch information
choeppler committed Oct 2, 2024
1 parent d29c5f4 commit 1c04508
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 11 deletions.
20 changes: 15 additions & 5 deletions mistletoe/latex_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
LaTeX renderer for mistletoe.
"""

import re
import string
from itertools import chain
from urllib.parse import quote
Expand Down Expand Up @@ -78,11 +79,20 @@ def render_escape_sequence(self, token):
return self.render_inner(token)

def render_raw_text(self, token, escape=True):
return (token.content.replace('$', '\\$').replace('#', '\\#')
.replace('{', '\\{').replace('}', '\\}')
.replace('&', '\\&').replace('_', '\\_')
.replace('%', '\\%')
) if escape else token.content
"""Escape all latex special characters $#&%_{}^~\\ within `token.content`.
"""
if not escape:
return token.content

if not hasattr(self, 'raw_escape_chars'):
self.raw_escape_chars = re.compile('([$#&%_{}])')

content = token.content.replace('\\', '\\textbackslash')
content = self.raw_escape_chars.sub(r'\\\1', content)
# The \text* commands gobble up whitespace behind them -> {} to prevent that.
return content.replace('~', '\\textasciitilde{}') \
.replace('^', '\\textasciicircum{}') \
.replace('\\textbackslash', '\\textbackslash{}')

def render_heading(self, token):
inner = self.render_inner(token)
Expand Down
28 changes: 22 additions & 6 deletions test/test_latex_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import mistletoe.latex_renderer
from mistletoe.latex_renderer import LaTeXRenderer
from mistletoe import markdown
import markdown


class TestLaTeXRenderer(TestCase):
Expand All @@ -13,14 +14,14 @@ def setUp(self):
self.addCleanup(self.renderer.__exit__, None, None, None)

def _test_token(self, token_name, expected_output, children=True,
without_attrs=None, **kwargs):
without_attrs=None, render_func_kwargs={}, **kwargs):
render_func = self.renderer.render_map[token_name]
children = mock.MagicMock(spec=list) if children else None
mock_token = mock.Mock(children=children, **kwargs)
without_attrs = without_attrs or []
for attr in without_attrs:
delattr(mock_token, attr)
self.assertEqual(render_func(mock_token), expected_output)
self.assertEqual(render_func(mock_token, **render_func_kwargs), expected_output)

def test_strong(self):
self._test_token('Strong', '\\textbf{inner}')
Expand Down Expand Up @@ -72,10 +73,25 @@ def test_math(self):
self._test_token('Math', expected,
children=False, content='$ 1 + 2 = 3 $')

def test_raw_text(self):
expected = '\\$\\&\\#\\{\\}'
self._test_token('RawText', expected,
children=False, content='$&#{}')
@parameterized.expand([
('$', '\\$'),
('&', '\\&'),
('#', '\\#'),
('%', '\\%'),
('_', '\\_'),
('{', '\\{'),
('}', '\\}'),
('~', '\\textasciitilde{}'),
('^', '\\textasciicircum{}'),
('\\', '\\textbackslash{}'),
])
def test_raw_text(self, target, expected):
self._test_token('RawText', expected, children=False, content=target)

def test_raw_text_no_escape(self):
expected = '$&#%_{}~^\\'
self._test_token('RawText', expected, children=False, content=expected,
render_func_kwargs={'escape': False})

def test_heading(self):
expected = '\n\\section{inner}\n'
Expand Down

0 comments on commit 1c04508

Please sign in to comment.