Remove dependency on regex

kdeldycke · Jul 25, 2024 · e81e67f · e81e67f
1 parent 741b629
commit e81e67f
Show file tree

Hide file tree

Showing 5 changed files with 97 additions and 99 deletions.
diff --git a/changelog.md b/changelog.md
@@ -8,6 +8,7 @@
 - Switch from Poetry to `uv`.
 - Drop support for Python 3.8.
 - Mark Python 3.13-dev tests as stable.
+- Remove dependency on `regex`.
 
 ## [4.8.3 (2024-05-25)](https://github.com/kdeldycke/click-extra/compare/v4.8.2...v4.8.3)
 

diff --git a/click_extra/colorize.py b/click_extra/colorize.py
@@ -29,7 +29,6 @@
 
 import click
 import cloup
-import regex as re3
 from boltons.strutils import complement_int_list, int_ranges_from_int_list
 from cloup._util import identity
 from cloup.styling import Color, IStyle
@@ -805,37 +804,55 @@ def getvalue(self) -> str:
 
 
 def highlight(
-    string: str,
+    content: str,
     substrings: Iterable[str],
     styling_method: Callable,
     ignore_case: bool = False,
 ) -> str:
     """Highlights parts of the ``string`` that matches ``substrings``.
 
     Takes care of overlapping parts within the ``string``.
+
+    ..todo:
+        Same as the ``ignore_case`` parameter, should we support case-folding?
+        As in "Straße" => "Strasse"? Beware, it messes with string length and
+        characters index...
     """
     # Ranges of character indices flagged for highlighting.
     ranges = set()
 
+    # Search for occurrences of query parts in original string.
     for part in set(substrings):
-        # Search for occurrences of query parts in original string.
-        flags = re3.IGNORECASE if ignore_case else 0
+        # Reduce the matching space to the lower-case realm.
+        searched_content = content
+        if ignore_case:
+            lower_part = part.lower()
+            assert len(part) == len(
+                lower_part
+            ), "Lowering case is messing with string length"
+            part = lower_part
+            searched_content = content.lower()
+            assert len(content) == len(
+                searched_content
+            ), "Lowering case is messing with string length"
+        # Lookahead assertion which is going to give the starting position of each overlapping match.
+        pattern = rf"(?={re.escape(part)})"
         ranges |= {
-            f"{match.start()}-{match.end() - 1}"
-            for match in re3.finditer(part, string, flags=flags, overlapped=True)
+            f"{match.start()}-{match.start() + len(part) - 1}"
+            for match in re.finditer(pattern, searched_content)
         }
 
     # Reduce ranges, compute complement ranges, transform them to list of integers.
     range_arg = ",".join(ranges)
     highlight_ranges = int_ranges_from_int_list(range_arg)
     untouched_ranges = int_ranges_from_int_list(
-        complement_int_list(range_arg, range_end=len(string)),
+        complement_int_list(range_arg, range_end=len(content)),
     )
 
     # Apply style to range of characters flagged as matching.
     styled_str = ""
     for i, j in sorted(highlight_ranges + untouched_ranges):
-        segment = getitem(string, slice(i, j + 1))
+        segment = getitem(content, slice(i, j + 1))
         if (i, j) in highlight_ranges:
             segment = styling_method(segment)
         styled_str += str(segment)

diff --git a/pyproject.toml b/pyproject.toml
@@ -76,9 +76,6 @@ dependencies = [
     "commentjson ~= 0.9.0",
     "mergedeep ~= 1.3.4",
     "pyyaml ~= 6.0.0",
-    # regex is required for case-insensitive matches in Unicode.
-    # v2023.3.22 is the first to drop Python 3.7.
-    "regex ~= 2024.4.16",
     # requests 2.28.2 is the first version to support charset_normalizer 3.x.
     "requests ~= 2.32.3",
     # tabulate 0.9 is the first to add `*grid` and `*outline` formats.

diff --git a/tests/test_colorize.py b/tests/test_colorize.py
@@ -696,67 +696,121 @@ def command1(ctx):
 
 
 @pytest.mark.parametrize(
-    ("substrings", "expected", "ignore_case"),
+    ("original", "substrings", "expected", "ignore_case"),
     (
         # Function input types.
-        (["hey"], "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
-        (("hey",), "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
-        ({"hey"}, "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
         (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
+            ["hey"],
+            "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
+            False,
+        ),
+        (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
+            ("hey",),
+            "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
+            False,
+        ),
+        (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
+            {"hey"},
+            "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
+            False,
+        ),
+        (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
             "hey",
             "H\x1b[32mey\x1b[0m-xx-xxx-\x1b[32mhe\x1b[0mY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
             False,
         ),
         # Duplicate substrings.
-        (["hey", "hey"], "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
-        (("hey", "hey"), "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
-        ({"hey", "hey"}, "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
         (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
+            ["hey", "hey"],
+            "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
+            False,
+        ),
+        (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
+            ("hey", "hey"),
+            "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
+            False,
+        ),
+        (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
+            {"hey", "hey"},
+            "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
+            False,
+        ),
+        (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
             "heyhey",
             "H\x1b[32mey\x1b[0m-xx-xxx-\x1b[32mhe\x1b[0mY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
             False,
         ),
         # Case-sensitivity and multiple matches.
-        (["hey"], "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
         (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
+            ["hey"],
+            "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
+            False,
+        ),
+        (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
             ["Hey"],
             "\x1b[32mHey\x1b[0m-xx-xxx-\x1b[32mheY\x1b[0m-xXxXxxxxx-\x1b[32mhey\x1b[0m",
             True,
         ),
         (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
             "x",
             "Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-\x1b[32mx\x1b[0mX\x1b[32mx\x1b[0mX\x1b[32mxxxxx\x1b[0m-hey",
             False,
         ),
         (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
             "x",
             "Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-\x1b[32mxXxXxxxxx\x1b[0m-hey",
             True,
         ),
         # Overlaps.
         (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
             ["xx"],
             "Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-\x1b[32mxXxXxxxxx\x1b[0m-hey",
             True,
         ),
         (
+            "Hey-xx-xxx-heY-xXxXxxxxx-hey",
             ["xx"],
             "Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-xXxX\x1b[32mxxxxx\x1b[0m-hey",
             False,
         ),
         # No match.
-        ("z", "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
-        (["XX"], "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
+        ("Hey-xx-xxx-heY-xXxXxxxxx-hey", "z", "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
+        ("Hey-xx-xxx-heY-xXxXxxxxx-hey", ["XX"], "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
+        # Special characters.
+        (
+            "(?P<quote>[']).*?(?P=quote)",
+            "[",
+            "(?P<quote>\x1b[32m[\x1b[0m']).*?(?P=quote)",
+            False,
+        ),
+        # Unicode normalization.
+        ("Straße", "ß", "Stra\x1b[32mß\x1b[0me", False),
+        # ("Straße", ["SS"], "Stra\x1b[32mß\x1b[0me", True),
     ),
 )
-def test_substring_highlighting(substrings, expected, ignore_case):
-    result = highlight(
-        "Hey-xx-xxx-heY-xXxXxxxxx-hey",
-        substrings,
-        styling_method=theme.success,
-        ignore_case=ignore_case,
+def test_substring_highlighting(original, substrings, expected, ignore_case):
+    assert (
+        highlight(
+            original,
+            substrings,
+            styling_method=theme.success,
+            ignore_case=ignore_case,
+        )
+        == expected
     )
-    assert result == expected
 
 
 @parametrize(