Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse units correctly #42

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
- Suggest some good initial configuration for Jupyter notebook, e.g. `print_auto=True` and `ignore_result_overwrite=True`.
- Naming: we call it "uncertainty". Give a hint that others might also call it "error" interchangeably.
- Jupyter Notebook tip to avoid
- Add warning that users should not rely on console stringified output for further processing since that might change more often in the future, e.g. better formatting of units or whitespaces etc. Only rely on the final LaTeX output written to the external file.


```
<api.printable_result.PrintableResult at 0x7f35beb20510>
Expand Down
95 changes: 82 additions & 13 deletions src/api/console_stringifier.py
Splines marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from domain.result import Result
from application.stringifier import Stringifier

Expand Down Expand Up @@ -33,16 +34,84 @@ def _modify_unit(self, unit: str) -> str:
"""
Returns the modified unit.
"""
unit = (
unit.replace(r"\squared", "^2")
.replace(r"\cubed", "^3")
.replace("\\per\\", "/")
.replace(r"\per", "/")
.replace("\\", " ")
.strip()
)

if unit[0] == "/":
unit = f"1{unit}"

return unit

# Remove all whitespace characters (space, tab, newline etc.)
unit = "".join(unit.split())

# Detect "\squared" etc.
unit = unit.replace(r"\squared", "^2").replace(r"\cubed", "^3")

# Detect special units
unit = unit.replace(r"\percent", r"\%").replace(r"\degree", r"\°")

# Detect "/"
unit = unit.replace("/", " / ")

# Iterate over unit parts
unit_parts = re.split(r"[\\|\s]+", unit)
numerator_parts = []
denominator_parts = []
is_next_part_in_denominator = False

for unit_part in unit_parts:
# Skip empty parts
if unit_part == "":
continue

# If next part is a denominator part
if unit_part in ("/", "per"):
is_next_part_in_denominator = True
continue

# Add part to numerator or denominator
if is_next_part_in_denominator:
denominator_parts.append(unit_part)
is_next_part_in_denominator = False
else:
numerator_parts.append(unit_part)

# Assemble unit
modified_unit = ""

# Handle empty unit
if not numerator_parts and not denominator_parts:
return ""

# Numerator
if not numerator_parts:
modified_unit += "1"
elif len(numerator_parts) == 1 or not denominator_parts:
modified_unit += " ".join(numerator_parts)
else:
modified_unit += f"({' '.join(numerator_parts)})"

# Denominator
if denominator_parts:
modified_unit += "/"
if len(denominator_parts) == 1:
modified_unit += denominator_parts[0]
else:
modified_unit += f"({' '.join(denominator_parts)})"

modified_unit = self.strip_whitespaces_around_parentheses(modified_unit)
modified_unit = self.replace_per_by_symbol(modified_unit)

return modified_unit

def strip_whitespaces_around_parentheses(self, string: str) -> str:
return string.replace(" (", "(").replace("( ", "(").replace(" )", ")").replace(") ", ")")

def replace_per_by_symbol(self, string: str) -> str:
"""
Replaces all occurrences of `per` with `/`.

This might be necessary due to limitations of the above parsing method
where `per(` is recognized as a single token. For a proper parser, we
would have to deal with parentheses in a more sophisticated way. As this
is not the scope of this project for now, we just do a simple replacement
of the `per` that slipped through the above logic.

Note that at this point, `percent` was already replaced by `%`, so
we can safely replace all occurrences of "per" with "/".
"""
return string.replace("per", " / ")
9 changes: 8 additions & 1 deletion tests/playground.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@

wiz.res("a911", 1.05, unit=r"\mm\s\per\N\kg")
# wiz.res("a911", "1.052", 0.25, r"\mm\s\per\N\kg")
wiz.res("a911_2", 1.05, unit=r"\mm\s\per(\N\kg)")
wiz.res("more parentheses", 1.05, unit=r"\mm\s\per((\N\kg))")
wiz.res("wrong parentheses", 1.05, unit=r"\mm\s\per(((\N\kg)\T")
wiz.res("a_unit_parsing", "1.0", unit=r"\per\percent")
wiz.res("a_unit_parsing_only_numerator", "1.0", unit=r"\m\N\kg")
wiz.res("a_unit_squared", "1.0", unit=r"\m \squared")
wiz.res("a_unit_custom_slash", "1.0", unit=r"\m\squared/\s")

wiz.res("1 b", 1.0, 0.01, unit=r"\per\mm\cubed")

Expand All @@ -54,7 +61,7 @@
Decimal("42.0e-30"),
sys=Decimal("0.1e-31"),
stat=Decimal("0.05e-31"),
unit=r"\m\per\s\squared",
unit=r"\m\per\s\squared\newton\per\kg",
)
wiz.res("j", 0.009, None, "", 2) # really bad, but this is valid
# wiz.res("k", 1.55, 0.0, unit=r"\tesla") # -> uncertainty must be positive
Expand Down