Skip to content

Commit

Permalink
feat: show math in plain text in library cards (#36055)
Browse files Browse the repository at this point in the history
Converts mathjax equations to unicode to be rendered as plain text in library card previews
  • Loading branch information
navinkarkera authored Jan 13, 2025
1 parent f4c2b46 commit cd9b90f
Show file tree
Hide file tree
Showing 8 changed files with 292 additions and 1 deletion.
3 changes: 2 additions & 1 deletion openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from rest_framework.exceptions import NotFound

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content.search.plain_text_math import process_mathjax
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
Expand Down Expand Up @@ -220,7 +221,7 @@ class implementation returns only:
# Generate description from the content
description = _get_description_from_block_content(block_type, content_data)
if description:
block_data[Fields.description] = description
block_data[Fields.description] = process_mathjax(description)

except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
Expand Down
161 changes: 161 additions & 0 deletions openedx/core/djangoapps/content/search/plain_text_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""
Helper class to convert mathjax equations to plain text.
"""

import re

import unicodeit


class InvalidMathEquation(Exception):
"""Raised when mathjax equation is invalid. This is used to skip all transformations."""


class EqnPatternNotFound(Exception):
"""Raised when a pattern is not found in equation. This is used to skip a specific transformation."""


class PlainTextMath:
"""
Converts mathjax equations to plain text using unicodeit and some preprocessing.
"""
equation_pattern = re.compile(
r'\[mathjaxinline\](.*?)\[\/mathjaxinline\]|\[mathjax\](.*?)\[\/mathjax\]|\\\((.*?)\\\)|\\\[(.*?)\\\]'
)
eqn_replacements = (
# just remove prefix `\`
("\\sin", "sin"),
("\\cos", "cos"),
("\\tan", "tan"),
("\\arcsin", "arcsin"),
("\\arccos", "arccos"),
("\\arctan", "arctan"),
("\\cot", "cot"),
("\\sec", "sec"),
("\\csc", "csc"),
# Is used for matching brackets in mathjax, should not be required in plain text.
("\\left", ""),
("\\right", ""),
)
regex_replacements = (
# Makes text bold, so not required in plain text.
(re.compile(r'{\\bf (.*?)}'), r"\1"),
)
extract_inner_texts = (
# Replaces any eqn: `\name{inner_text}` with `inner_text`
"\\mathbf{",
"\\bm{",
)
frac_open_close_pattern = re.compile(r"}\s*{")

@staticmethod
def _nested_bracket_matcher(equation: str, opening_pattern: str) -> str:
r"""
Matches opening and closing brackets in given string.
Args:
equation: string
opening_pattern: for example, `\mathbf{`
Returns:
String inside the eqn brackets
"""
start = equation.find(opening_pattern)
if start == -1:
raise EqnPatternNotFound()
open_count = 0
inner_start = start + len(opening_pattern)
for i, char in enumerate(equation[inner_start:]):
if char == "{":
open_count += 1
if char == "}":
if open_count == 0:
break
open_count -= 1
else:
raise InvalidMathEquation()
# In below example `|` symbol is used to denote index position
# |\mathbf{, \mathbf{|, \mathbf{some_text|}, \mathbf{some_text}|
return (start, inner_start, inner_start + i, inner_start + i + 1)

def _fraction_handler(self, equation: str) -> str:
r"""
Converts `\frac{x}{y}` to `(x/y)` while handling nested `{}`.
For example: `\frac{2}{\sqrt{1+y}}` is converted to `(2/\sqrt{1+y})`.
Args:
equation: string
Returns:
String with `\frac` replaced by normal `/` symbol.
"""
try:
n_start, n_inner_start, n_inner_end, n_end = self._nested_bracket_matcher(equation, "\\frac{")
except EqnPatternNotFound:
return equation

numerator = equation[n_inner_start:n_inner_end]
# Handle nested fractions
numerator = self._fraction_handler(numerator)

try:
_, d_inner_start, d_inner_end, d_end = self._nested_bracket_matcher(equation[n_end:], "{")
except EqnPatternNotFound:
return equation

denominator = equation[n_end + d_inner_start:n_end + d_inner_end]
# Handle nested fractions
denominator = self._fraction_handler(denominator)
# Now re-create the equation with `(numerator / denominator)`
equation = equation[:n_start] + f"({numerator}/{denominator})" + equation[n_end + d_end:]
return equation

def _nested_text_extractor(self, equation: str, pattern: str) -> str:
"""
Recursively extracts text from equation for given pattern
"""
try:
start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern)
inner_text = equation[inner_start:inner_end]
inner_text = self._nested_text_extractor(inner_text, pattern)
equation = equation[:start] + inner_text + equation[end:]
except EqnPatternNotFound:
pass
return equation

def _handle_replacements(self, equation: str) -> str:
"""
Makes a bunch of replacements in equation string.
"""
for q, replacement in self.eqn_replacements:
equation = equation.replace(q, replacement)
for pattern in self.extract_inner_texts:
equation = self._nested_text_extractor(equation, pattern)
for pattern, replacement in self.regex_replacements:
equation = re.sub(pattern, replacement, equation)
return equation

def run(self, eqn_matches: re.Match) -> str:
"""
Takes re.Match object and runs conversion process on each match group.
"""
groups = eqn_matches.groups()
for group in groups:
if not group:
continue
original = group
try:
group = self._handle_replacements(group)
group = self._fraction_handler(group)
return unicodeit.replace(group)
except Exception: # pylint: disable=broad-except
return original
return None


processor = PlainTextMath()


def process_mathjax(content: str) -> str:
return re.sub(processor.equation_pattern, processor.run, content)
118 changes: 118 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,3 +477,121 @@ def test_collection_with_published_library(self):
"num_children": 1
}
}

def test_mathjax_plain_text_conversion_for_search(self):
"""
Test how an HTML block with mathjax equations gets converted to plain text in search description.
"""
# pylint: disable=line-too-long
eqns = [
# (input, expected output)
('Simple addition: \\( 2 + 3 \\)', 'Simple addition: 2 + 3'),
('Simple subtraction: \\( 5 - 2 \\)', 'Simple subtraction: 5 − 2'),
('Simple multiplication: \\( 4 * 6 \\)', 'Simple multiplication: 4 * 6'),
('Simple division: \\( 8 / 2 \\)', 'Simple division: 8 / 2'),
('Mixed arithmetic: \\( 2 + 3 4 \\)', 'Mixed arithmetic: 2 + 3 4'),
('Simple exponentiation: \\[ 2^3 \\]', 'Simple exponentiation: 2³'),
('Root extraction: \\[ 16^{1/2} \\]', 'Root extraction: 16¹^/²'),
('Exponent with multiple terms: \\[ (2 + 3)^2 \\]', 'Exponent with multiple terms: (2 + 3)²'),
('Nested exponents: \\[ 2^(3^2) \\]', 'Nested exponents: 2⁽3²)'),
('Mixed roots: \\[ 8^{1/2} 3^2 \\]', 'Mixed roots: 8¹^/² 3²'),
('Simple fraction: [mathjaxinline] 3/4 [/mathjaxinline]', 'Simple fraction: 3/4'),
(
'Decimal to fraction conversion: [mathjaxinline] 0.75 = 3/4 [/mathjaxinline]',
'Decimal to fraction conversion: 0.75 = 3/4',
),
('Mixed fractions: [mathjaxinline] 1 1/2 = 3/2 [/mathjaxinline]', 'Mixed fractions: 1 1/2 = 3/2'),
(
'Converting decimals to mixed fractions: [mathjaxinline] 2.5 = 5/2 [/mathjaxinline]',
'Converting decimals to mixed fractions: 2.5 = 5/2',
),
(
'Trig identities: [mathjaxinline] \\sin(x + y) = \\sin(x) \\cos(y) + \\cos(x) \\sin(y) [/mathjaxinline]',
'Trig identities: sin(x + y) = sin(x) cos(y) + cos(x) sin(y)',
),
(
'Sine, cosine, and tangent: [mathjaxinline] \\sin(x) [/mathjaxinline] [mathjaxinline] \\cos(x) [/mathjaxinline] [mathjaxinline] \\tan(x) [/mathjaxinline]',
'Sine, cosine, and tangent: sin(x) cos(x) tan(x)',
),
(
'Hyperbolic trig functions: [mathjaxinline] \\sinh(x) [/mathjaxinline] [mathjaxinline] \\cosh(x) [/mathjaxinline]',
'Hyperbolic trig functions: sinh(x) cosh(x)',
),
(
"Simple derivative: [mathjax] f(x) = x^2, f'(x) = 2x [/mathjax]",
"Simple derivative: f(x) = x², f'(x) = 2x",
),
('Double integral: [mathjax] int\\int (x + y) dxdy [/mathjax]', 'Double integral: int∫ (x + y) dxdy'),
(
'Partial derivatives: [mathjax] f(x,y) = xy, \\frac{\\partial f}{\\partial x} = y [/mathjax] [mathjax] \\frac{\\partial f}{\\partial y} = x [/mathjax]',
'Partial derivatives: f(x,y) = xy, (∂ f/∂ x) = y (∂ f/∂ y) = x',
),
(
'Mean and standard deviation: [mathjax] mu = 2, \\sigma = 1 [/mathjax]',
'Mean and standard deviation: mu = 2, σ = 1',
),
(
'Binomial probability: [mathjax] P(X = k) = (\\binom{n}{k} p^k (1-p)^{n-k}) [/mathjax]',
'Binomial probability: P(X = k) = (\\binom{n}{k} pᵏ (1−p)ⁿ⁻ᵏ)',
),
('Gaussian distribution: [mathjax] N(\\mu, \\sigma^2) [/mathjax]', 'Gaussian distribution: N(μ, σ²)'),
(
'Greek letters: [mathjaxinline] \\alpha [/mathjaxinline] [mathjaxinline] \\beta [/mathjaxinline] [mathjaxinline] \\gamma [/mathjaxinline]',
'Greek letters: α β γ',
),
(
'Subscripted variables: [mathjaxinline] x_i [/mathjaxinline] [mathjaxinline] y_j [/mathjaxinline]',
'Subscripted variables: xᵢ yⱼ',
),
('Superscripted variables: [mathjaxinline] x^{i} [/mathjaxinline]', 'Superscripted variables: xⁱ'),
(
'Not supported: \\( \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I \\)',
'Not supported: \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I',
),
(
'Bold text: \\( {\\bf a} \\cdot {\\bf b} = |{\\bf a}| |{\\bf b}| \\cos(\\theta) \\)',
'Bold text: a ⋅ b = |a| |b| cos(θ)',
),
('Bold text: \\( \\frac{\\sqrt{\\mathbf{2}+3}}{\\sqrt{4}} \\)', 'Bold text: (√{2+3}/√{4})'),
('Nested Bold text 1: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text 1: (1/2)'),
(
'Nested Bold text 2: \\( \\mathbf{a \\cdot (a \\mathbf{\\times} b)} \\)',
'Nested Bold text 2: a ⋅ (a × b)'
),
(
'Nested Bold text 3: \\( \\mathbf{a \\cdot (a \\bm{\\times} b)} \\)',
'Nested Bold text 3: a ⋅ (a × b)'
),
('Sqrt test 1: \\(\\sqrt\\)', 'Sqrt test 1: √'),
('Sqrt test 2: \\(x^2 + \\sqrt(y)\\)', 'Sqrt test 2: x² + √(y)'),
('Sqrt test 3: [mathjaxinline]x^2 + \\sqrt(y)[/mathjaxinline]', 'Sqrt test 3: x² + √(y)'),
('Fraction test 1: \\( \\frac{2} {3} \\)', 'Fraction test 1: (2/3)'),
('Fraction test 2: \\( \\frac{2}{3} \\)', 'Fraction test 2: (2/3)'),
('Fraction test 3: \\( \\frac{\\frac{2}{3}}{4} \\)', 'Fraction test 3: ((2/3)/4)'),
('Fraction test 4: \\( \\frac{\\frac{2} {3}}{4} \\)', 'Fraction test 4: ((2/3)/4)'),
('Fraction test 5: \\( \\frac{\\frac{2} {3}}{\\frac{4}{3}} \\)', 'Fraction test 5: ((2/3)/(4/3))'),
# Invalid equations.
('Fraction error: \\( \\frac{2} \\)', 'Fraction error: \\frac{2}'),
('Fraction error 2: \\( \\frac{\\frac{2}{3}{4} \\)', 'Fraction error 2: \\frac{\\frac{2}{3}{4}'),
('Unclosed: [mathjaxinline]x^2', 'Unclosed: [mathjaxinline]x^2'),
(
'Missing closing bracket: \\( \\frac{\\frac{2} {3}{\\frac{4}{3}} \\)',
'Missing closing bracket: \\frac{\\frac{2} {3}{\\frac{4}{3}}'
),
('No equation: normal text', 'No equation: normal text'),
]
# pylint: enable=line-too-long
block = BlockFactory.create(
parent_location=self.toy_course.location,
category="html",
display_name="Non-default HTML Block",
editor="raw",
use_latex_compiler=True,
data="|||".join(e[0] for e in eqns),
)
doc = {}
doc.update(searchable_doc_for_course_block(block))
doc.update(searchable_doc_tags(block.usage_key))
result = doc['description'].split('|||')
for i, eqn in enumerate(result):
assert eqn.strip() == eqns[i][1]
2 changes: 2 additions & 0 deletions requirements/edx/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/kernel.in
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/kernel.in
uritemplate==4.1.1
# via
# drf-spectacular
Expand Down
4 changes: 4 additions & 0 deletions requirements/edx/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2160,6 +2160,10 @@ unicodecsv==0.14.1
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# edx-enterprise
unicodeit==0.7.5
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
unidiff==0.7.5
# via -r requirements/edx/testing.txt
uritemplate==4.1.1
Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1521,6 +1521,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/base.txt
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/base.txt
uritemplate==4.1.1
# via
# -r requirements/edx/base.txt
Expand Down
1 change: 1 addition & 0 deletions requirements/edx/kernel.in
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,4 @@ web-fragments # Provides the ability to render fragments o
wrapt # Better functools.wrapped. TODO: functools has since improved, maybe we can switch?
XBlock[django] # Courseware component architecture
xss-utils # https://github.com/openedx/edx-platform/pull/20633 Fix XSS via Translations
unicodeit # Converts mathjax equation to plain text by using unicode symbols
2 changes: 2 additions & 0 deletions requirements/edx/testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1605,6 +1605,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/base.txt
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/base.txt
unidiff==0.7.5
# via -r requirements/edx/testing.in
uritemplate==4.1.1
Expand Down

0 comments on commit cd9b90f

Please sign in to comment.