From 1c04508ed739d22ec674e664d048787fd6138d31 Mon Sep 17 00:00:00 2001 From: Chris Hoeppler Date: Tue, 1 Oct 2024 16:31:28 +0200 Subject: [PATCH] fix(latex-renderer): special chars in raw text Escape some additional characters in raw text which have special meaning in LaTeX and which previously hadn't been dealt with by `latex-renderer`'s `render_raw_text`-method. Refactor the corresponding test case along the lines of the other parameterized test cases to improve the code's consistency. --- mistletoe/latex_renderer.py | 20 +++++++++++++++----- test/test_latex_renderer.py | 28 ++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/mistletoe/latex_renderer.py b/mistletoe/latex_renderer.py index ec76d76..3230392 100644 --- a/mistletoe/latex_renderer.py +++ b/mistletoe/latex_renderer.py @@ -2,6 +2,7 @@ LaTeX renderer for mistletoe. """ +import re import string from itertools import chain from urllib.parse import quote @@ -78,11 +79,20 @@ def render_escape_sequence(self, token): return self.render_inner(token) def render_raw_text(self, token, escape=True): - return (token.content.replace('$', '\\$').replace('#', '\\#') - .replace('{', '\\{').replace('}', '\\}') - .replace('&', '\\&').replace('_', '\\_') - .replace('%', '\\%') - ) if escape else token.content + """Escape all latex special characters $#&%_{}^~\\ within `token.content`. + """ + if not escape: + return token.content + + if not hasattr(self, 'raw_escape_chars'): + self.raw_escape_chars = re.compile('([$#&%_{}])') + + content = token.content.replace('\\', '\\textbackslash') + content = self.raw_escape_chars.sub(r'\\\1', content) + # The \text* commands gobble up whitespace behind them -> {} to prevent that. + return content.replace('~', '\\textasciitilde{}') \ + .replace('^', '\\textasciicircum{}') \ + .replace('\\textbackslash', '\\textbackslash{}') def render_heading(self, token): inner = self.render_inner(token) diff --git a/test/test_latex_renderer.py b/test/test_latex_renderer.py index c9b3760..e0c6407 100644 --- a/test/test_latex_renderer.py +++ b/test/test_latex_renderer.py @@ -3,6 +3,7 @@ import mistletoe.latex_renderer from mistletoe.latex_renderer import LaTeXRenderer from mistletoe import markdown +import markdown class TestLaTeXRenderer(TestCase): @@ -13,14 +14,14 @@ def setUp(self): self.addCleanup(self.renderer.__exit__, None, None, None) def _test_token(self, token_name, expected_output, children=True, - without_attrs=None, **kwargs): + without_attrs=None, render_func_kwargs={}, **kwargs): render_func = self.renderer.render_map[token_name] children = mock.MagicMock(spec=list) if children else None mock_token = mock.Mock(children=children, **kwargs) without_attrs = without_attrs or [] for attr in without_attrs: delattr(mock_token, attr) - self.assertEqual(render_func(mock_token), expected_output) + self.assertEqual(render_func(mock_token, **render_func_kwargs), expected_output) def test_strong(self): self._test_token('Strong', '\\textbf{inner}') @@ -72,10 +73,25 @@ def test_math(self): self._test_token('Math', expected, children=False, content='$ 1 + 2 = 3 $') - def test_raw_text(self): - expected = '\\$\\&\\#\\{\\}' - self._test_token('RawText', expected, - children=False, content='$&#{}') + @parameterized.expand([ + ('$', '\\$'), + ('&', '\\&'), + ('#', '\\#'), + ('%', '\\%'), + ('_', '\\_'), + ('{', '\\{'), + ('}', '\\}'), + ('~', '\\textasciitilde{}'), + ('^', '\\textasciicircum{}'), + ('\\', '\\textbackslash{}'), + ]) + def test_raw_text(self, target, expected): + self._test_token('RawText', expected, children=False, content=target) + + def test_raw_text_no_escape(self): + expected = '$&#%_{}~^\\' + self._test_token('RawText', expected, children=False, content=expected, + render_func_kwargs={'escape': False}) def test_heading(self): expected = '\n\\section{inner}\n'