From eea62d4836733f5d45ee63daffa096200ea42f5f Mon Sep 17 00:00:00 2001 From: Marcus Boerger Date: Fri, 6 Sep 2024 21:18:01 +0200 Subject: [PATCH] Add size and flag parsing. (#14) * Add size (flag) parsing: * `ParseByteSize`: Parse a size (defaults to byte sizes). * `ActionByteSize`: Argument parsing `Action` for size values (default for bytes). * The `ActioByteSize` does not allow an empty string. That should lead to `argument flag: value cannot be empty.` covered in test: 'Unit only is invalid.'.' * Even if the unit is `0` it is not allowed. --- mbo/app/commands.py | 4 +- mbo/app/flags.py | 162 ++++++++++++++++++++++- mbo/app/flags_test.py | 301 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 457 insertions(+), 10 deletions(-) diff --git a/mbo/app/commands.py b/mbo/app/commands.py index e73d19e..0939795 100644 --- a/mbo/app/commands.py +++ b/mbo/app/commands.py @@ -57,7 +57,7 @@ def Main(self): from argparse_formatter import ParagraphFormatter -from mbo.app.flags import EnumAction +from mbo.app.flags import ActionEnum if TYPE_CHECKING: from _typeshed import OpenTextMode @@ -335,7 +335,7 @@ def Run(argv: list[str] = sys.argv): "--help-output-mode", dest="help_output_mode", type=HelpOutputMode, - action=EnumAction, + action=ActionEnum, help="Output mode for help.", ) command.Prepare(argv) diff --git a/mbo/app/flags.py b/mbo/app/flags.py index 711ef5c..5115a4b 100644 --- a/mbo/app/flags.py +++ b/mbo/app/flags.py @@ -13,10 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""A flag support library.""" +"""A flag support library. + +* `ActionEnum`: Argument parsing `Action` for enum values. +* `ParseEnumList`: Parses lists of enum values. +* `ActionEnumList`: Argument parsing `Action` for enum lists. +* `ParseDateTimeOrTimeDelta`: Parses `datetime.datetime` and time delta values. +* `ActionDateTimeOrTimeDelta`: Argument parsing `Action` for `datetime` and time deltas. +* `ParseByteSize`: Parse a size (defaults to byte sizes). +* `ActionByteSize`: Argument parsing `Action` for size values (default for bytes). +""" import argparse import collections +import re from datetime import datetime, time, timedelta, timezone, tzinfo from enum import Enum from typing import Any, Callable, Iterable, Optional, cast @@ -24,7 +34,7 @@ from pytimeparse.timeparse import timeparse -class EnumAction(argparse.Action): +class ActionEnum(argparse.Action): """Argparse action that handles single Enum values.""" def __init__(self, **kwargs): @@ -33,7 +43,7 @@ def __init__(self, **kwargs): raise ValueError(f"Type must be an Enum, provided type is '{enum_type}'.") kwargs.setdefault("choices", tuple(e.value for e in enum_type)) - super(EnumAction, self).__init__(**kwargs) + super(ActionEnum, self).__init__(**kwargs) self._enum = enum_type def __call__(self, parser, namespace, values, option_string=None): @@ -41,7 +51,7 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, value) -def EnumListParser(enum_type: type[Enum]) -> Callable[[str], list[Enum]]: +def ParseEnumList(enum_type: type[Enum]) -> Callable[[str], list[Enum]]: """Implements flags comma separate lists of enum values. In the argument definition default values can be specified as a list of the actual enum values. @@ -54,7 +64,7 @@ def EnumListParser(enum_type: type[Enum]) -> Callable[[str], list[Enum]]: parser.add_argument( "--myenum", default=[MyEnum.MY_DEFAULT], - type=EnumListParser(enum_type=MyEnum), + type=ParseEnumList(enum_type=MyEnum), help="Comma separated list of MyEnum {}.".format(set(MyEnum.__members__.keys())), ) args=parser.parse_args(["--nyenum", "my_default,my_other"]) @@ -394,8 +404,10 @@ def _parse(self, value: str) -> datetime: raise argparse.ArgumentError(self, f"{error}") def __call__(self, parser, namespace, values, option_string=None) -> None: + if values is None: + return setattr(namespace, self.dest, None) result: Any - if values is list: + if isinstance(values, list): result = [self._parse(v) for v in values] elif isinstance(values, str): result = self._parse(values) @@ -407,3 +419,141 @@ def __call__(self, parser, namespace, values, option_string=None) -> None: setattr(namespace, self.dest, str(values)) else: setattr(namespace, self.dest, result) + + +def ParseByteSize( + value: str, + *, + suffix_case_sensitive: bool | None = None, + unit: str = "B", + unit_case_sensitive: bool = False, + unit_required: bool = False, +) -> int: + """Parses size values (default bytes). + + Args: + * value: The value string to parse. + * suffix_case_sensitive: Whether the suffix (e.g. `Ki`) is case sensitive (defaults to `unit_case_sensitive`). + * unit_case_sensitive: Whether the unit is case-sensitive (default is False). + * unit_required: Whether the unit is required, this defaults to `True` if `unit` is specified. + * unit: The expected unit (defaults to `B`). + """ + if not value: + raise ValueError("value must not be empty.") + original = value + value = str(value) # If not mypy. + if not unit_case_sensitive and value.lower().endswith(unit.lower()): + value = value[: -len(unit)] + elif value.endswith(unit): + value = value.removesuffix(unit) + elif unit_required: + if value.lower().endswith(unit.lower()): + lower_case = " (found via case insensitive search)" + else: + lower_case = "" + raise ValueError(f"value does not have required unit `{unit}`{lower_case}.") + _SUFFIXES: dict[str, int] = { + "": 1, + "K": 1000**1, # kilo + "M": 1000**2, # mega + "G": 1000**3, # giga + "T": 1000**4, # tera + "P": 1000**5, # peta + "E": 1000**6, # exa + "Z": 1000**7, # zetta + "Y": 1000**8, # yotta + "X": 1000**9, # xona" + "Ki": 1024**1, + "Mi": 1024**2, + "Gi": 1024**3, + "Ti": 1024**4, + "Pi": 1024**5, + "Ei": 1024**6, + "Zi": 1024**7, + "Yi": 1024**8, + "Xi": 1024**9, + } + _REGEX = re.compile(f"([0-9]*[.]?[0-9]*)[ ]?({'|'.join(_SUFFIXES.keys())})?") + value_str = value + if value.endswith(("i", "I")): + value_nc_str = value_str[:-1].upper() + "i" + else: + value_nc_str = value_str.upper() + if value_str and not suffix_case_sensitive: + value_str = value_nc_str + match = _REGEX.fullmatch(value_str) + if not match: + if _REGEX.fullmatch(value_nc_str) or value_str.endswith(("i", "I")): + raise ValueError(f"value has bad suffix case, got '{original}'.") + raise ValueError( + f"value does not match pattern (not a valid byte size), got '{original}'." + ) + number = match.group(1) + if number == "": + raise ValueError("value cannot be empty.") + if number == ".": + raise ValueError("value cannot be '.'.") + result = float(number) if number.find(".") > -1 else int(number) + if match.group(2): + factor = _SUFFIXES.get(match.group(2) or "", 0) + if not factor: + raise ValueError( + f"value has unsupported suffix ({match.group(2)}), got '{original}'." + ) + else: + factor = 1 + result *= factor + return int(result) + + +class ActionByteSize(argparse.Action): + """Parses arguments as bytes, supporting KB, MB, GB, TB, PB, EB as well as KiB etc. suffixes. + + This action has the additional config: + * default: Can be a number of type `int` or `float`. + * unit: The expected unit (defaults to `B`). + * unit_required: Whether the unit is required, this defaults to `True` if `unit` is specified. + * unit_case_sensitive: Whether the unit is case-sensitive (default is False). + * suffix_case_sensitive: Whether the suffix (e.g. `Ki`) is case sensitive (defaults to `unit_case_sensitive`). + """ + + def __init__(self, **kwargs) -> None: + self.unit = kwargs.pop("unit", "B") + self.unit_required = bool(kwargs.pop("unit_required", self.unit)) + self.unit_case_sensitive = bool(kwargs.pop("unit_case_sensitive", False)) + self.suffix_case_sensitive = bool( + kwargs.pop("suffix_case_sensitive", self.unit_case_sensitive) + ) + default: Any = kwargs.pop("default", None) + if default is None or default == "": + default = None + elif isinstance(default, int) or isinstance(default, float): + default = f"{default}{self.unit}" + super(ActionByteSize, self).__init__(default=default, **kwargs) + if default is None: + self.default = None + + def _parse(self, value: Any) -> int | None: + if value is int: + return value + else: + try: + return ParseByteSize( + value=str(value), + suffix_case_sensitive=self.suffix_case_sensitive, + unit=self.unit, + unit_case_sensitive=self.unit_case_sensitive, + unit_required=self.unit_required, + ) + except ValueError as error: + raise argparse.ArgumentError(self, str(error)) + except Exception as error: + raise error + + def __call__(self, parser, namespace, values, option_string=None) -> None: + if values is None: + setattr(namespace, self.dest, None) + elif isinstance(values, list): + setattr(namespace, self.dest, [self._parse(str(v)) for v in values]) + else: + setattr(namespace, self.dest, self._parse(str(values))) diff --git a/mbo/app/flags_test.py b/mbo/app/flags_test.py index ce39aac..172d92c 100644 --- a/mbo/app/flags_test.py +++ b/mbo/app/flags_test.py @@ -423,7 +423,7 @@ def FlagTest(self, test: FlagTestData) -> None: ), ] ) - def test_EnumListAction(self, test: FlagTestData): + def test_ActionEnumList(self, test: FlagTestData): self.FlagTest(test) @parameterized.expand( @@ -672,7 +672,304 @@ def test_EnumListAction(self, test: FlagTestData): ), ] ) - def test_DateTimeOrTimeDeltaAction(self, test: FlagTestData): + def test_ActionDateTimeOrTimeDelta(self, test: FlagTestData): + self.FlagTest(test) + + @parameterized.expand( + [ + FlagTestData( + test="Parse default None.", + expected=None, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + nargs="?", + ), + input=[], + ), + FlagTestData( + test="Parse empty.", + expected="argument flag: value must not be empty.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + ), + input=[""], + ), + FlagTestData( + test="Parse default value.", + expected=25, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + default=25, + nargs="?", + ), + input=[], + ), + FlagTestData( + test="Parse zero.", + expected="argument flag: value does not have required unit `B`.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + ), + input=["0"], + ), + FlagTestData( + test="Parse zero, no unit required.", + expected=0, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit_required=False, + ), + input=["0"], + ), + FlagTestData( + test="Unit only is invalid.", + expected="argument flag: value cannot be empty.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + ), + input=["B"], + ), + FlagTestData( + test="Unit only is invalid.", + expected="argument flag: value cannot be empty.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="0", + ), + input=["0"], + ), + FlagTestData( + test="Parse zero bytes.", + expected=[0, 0, 0, 0, 0], + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + nargs="+", + ), + input=["0B", "0b", "0.b", ".0b", "0.0b"], + ), + FlagTestData( + test="Parse '.' is invalid.", + expected="argument flag: value cannot be '.'.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + ), + input=[".B"], + ), + FlagTestData( + test="Parse '.ib' is invalid.", + expected="argument flag: value has bad suffix case, got '.ib'.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit_case_sensitive=False, + unit_required=True, + suffix_case_sensitive=False, + ), + input=[".ib"], + ), + FlagTestData( + test="Parse zero X.", + expected=0, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + ), + input=["0x"], + ), + FlagTestData( + test="Parse zero X, case sensitive, finding z.", + expected="argument flag: value does not have required unit `X`.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + unit_case_sensitive=True, + ), + input=["0z"], + ), + FlagTestData( + test="Parse zero X, case sensitive.", + expected="argument flag: value does not have required unit `X` (found via case insensitive search).", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + unit_case_sensitive=True, + ), + input=["0x"], + ), + FlagTestData( + test="Parse zero X, case sensitive with suffix.", + expected="argument flag: value has bad suffix case, got '0kX'.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + unit_case_sensitive=True, + ), + input=["0kX"], + ), + FlagTestData( + test="Parse zero X, case sensitive with i-suffix.", + expected="argument flag: value has bad suffix case, got '0KIX'.", + expected_error=argparse.ArgumentError, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + unit_case_sensitive=True, + ), + input=["0KIX"], + ), + FlagTestData( + test="Parse zero X, case sensitive with i-suffix (correct case).", + expected=0, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + unit_case_sensitive=True, + ), + input=["0KiX"], + ), + FlagTestData( + test="Parse zero X, case sensitive with suffix (ignore-case).", + expected=0, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + unit_case_sensitive=True, + suffix_case_sensitive=False, + ), + input=["0kX"], + ), + FlagTestData( + test="Parse zero X, case sensitive, correct case.", + expected=0, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + unit="X", + unit_case_sensitive=False, + ), + input=["0X"], + ), + FlagTestData( + test="Parse 2 kilo-bytes.", + expected=2000, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + ), + input=["2kb"], + ), + FlagTestData( + test="Parse 2 kibi-bytes.", + expected=2048, + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + ), + input=["2kib"], + ), + FlagTestData( + test="Parse list of bytes.", + expected=[max(1, f) * 1000**f for f in range(10)], + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + nargs="+", + ), + input=[ + "1b", + "1kb", + "2mb", + "3gb", + "4tb", + "5pb", + "6eb", + "7zb", + "8yb", + "9xb", + ], + ), + FlagTestData( + test="Parse list of bytes (caps).", + expected=[max(1, f) * 1000**f for f in range(10)], + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + nargs="+", + unit_case_sensitive=True, + suffix_case_sensitive=True, + ), + input=[ + "1B", + "1KB", + "2MB", + "3GB", + "4TB", + "5PB", + "6EB", + "7ZB", + "8YB", + "9XB", + ], + ), + FlagTestData( + test="Parse list of i-bytes.", + expected=[max(1, f) * 1024**f for f in range(10)], + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + nargs="+", + ), + input=[ + "1b", + "1kib", + "2mib", + "3gib", + "4tib", + "5pib", + "6eib", + "7zib", + "8yib", + "9xib", + ], + ), + FlagTestData( + test="Parse list of bytes (caps).", + expected=[max(1, f) * 1024**f for f in range(10)], + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + nargs="+", + unit_case_sensitive=True, + suffix_case_sensitive=True, + ), + input=[ + "1B", + "1KiB", + "2MiB", + "3GiB", + "4TiB", + "5PiB", + "6EiB", + "7ZiB", + "8YiB", + "9XiB", + ], + ), + FlagTestData( + test="Parse fraction and space.", + expected=[0, 1234, 1325606222], + action=ActionArgs( + action=mbo.app.flags.ActionByteSize, + nargs="+", + unit_case_sensitive=True, + suffix_case_sensitive=True, + ), + input=["0.7 B", "1.2345 KB", "1.234567 GiB"], + ), + ] + ) + def test_ActionByteSize(self, test: FlagTestData): self.FlagTest(test)