Skip to content

Commit

Permalink
Remove dependency on regex
Browse files Browse the repository at this point in the history
  • Loading branch information
kdeldycke committed Jul 25, 2024
1 parent 741b629 commit e81e67f
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 99 deletions.
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Switch from Poetry to `uv`.
- Drop support for Python 3.8.
- Mark Python 3.13-dev tests as stable.
- Remove dependency on `regex`.

## [4.8.3 (2024-05-25)](https://github.com/kdeldycke/click-extra/compare/v4.8.2...v4.8.3)

Expand Down
33 changes: 25 additions & 8 deletions click_extra/colorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

import click
import cloup
import regex as re3
from boltons.strutils import complement_int_list, int_ranges_from_int_list
from cloup._util import identity
from cloup.styling import Color, IStyle
Expand Down Expand Up @@ -805,37 +804,55 @@ def getvalue(self) -> str:


def highlight(
string: str,
content: str,
substrings: Iterable[str],
styling_method: Callable,
ignore_case: bool = False,
) -> str:
"""Highlights parts of the ``string`` that matches ``substrings``.
Takes care of overlapping parts within the ``string``.
..todo:
Same as the ``ignore_case`` parameter, should we support case-folding?
As in "Straße" => "Strasse"? Beware, it messes with string length and
characters index...
"""
# Ranges of character indices flagged for highlighting.
ranges = set()

# Search for occurrences of query parts in original string.
for part in set(substrings):
# Search for occurrences of query parts in original string.
flags = re3.IGNORECASE if ignore_case else 0
# Reduce the matching space to the lower-case realm.
searched_content = content
if ignore_case:
lower_part = part.lower()
assert len(part) == len(
lower_part
), "Lowering case is messing with string length"
part = lower_part
searched_content = content.lower()
assert len(content) == len(
searched_content
), "Lowering case is messing with string length"
# Lookahead assertion which is going to give the starting position of each overlapping match.
pattern = rf"(?={re.escape(part)})"
ranges |= {
f"{match.start()}-{match.end() - 1}"
for match in re3.finditer(part, string, flags=flags, overlapped=True)
f"{match.start()}-{match.start() + len(part) - 1}"
for match in re.finditer(pattern, searched_content)
}

# Reduce ranges, compute complement ranges, transform them to list of integers.
range_arg = ",".join(ranges)
highlight_ranges = int_ranges_from_int_list(range_arg)
untouched_ranges = int_ranges_from_int_list(
complement_int_list(range_arg, range_end=len(string)),
complement_int_list(range_arg, range_end=len(content)),
)

# Apply style to range of characters flagged as matching.
styled_str = ""
for i, j in sorted(highlight_ranges + untouched_ranges):
segment = getitem(string, slice(i, j + 1))
segment = getitem(content, slice(i, j + 1))
if (i, j) in highlight_ranges:
segment = styling_method(segment)
styled_str += str(segment)
Expand Down
3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ dependencies = [
"commentjson ~= 0.9.0",
"mergedeep ~= 1.3.4",
"pyyaml ~= 6.0.0",
# regex is required for case-insensitive matches in Unicode.
# v2023.3.22 is the first to drop Python 3.7.
"regex ~= 2024.4.16",
# requests 2.28.2 is the first version to support charset_normalizer 3.x.
"requests ~= 2.32.3",
# tabulate 0.9 is the first to add `*grid` and `*outline` formats.
Expand Down
88 changes: 71 additions & 17 deletions tests/test_colorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,67 +696,121 @@ def command1(ctx):


@pytest.mark.parametrize(
("substrings", "expected", "ignore_case"),
("original", "substrings", "expected", "ignore_case"),
(
# Function input types.
(["hey"], "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
(("hey",), "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
({"hey"}, "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
["hey"],
"Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
("hey",),
"Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
{"hey"},
"Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
"hey",
"H\x1b[32mey\x1b[0m-xx-xxx-\x1b[32mhe\x1b[0mY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
# Duplicate substrings.
(["hey", "hey"], "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
(("hey", "hey"), "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
({"hey", "hey"}, "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
["hey", "hey"],
"Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
("hey", "hey"),
"Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
{"hey", "hey"},
"Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
"heyhey",
"H\x1b[32mey\x1b[0m-xx-xxx-\x1b[32mhe\x1b[0mY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
# Case-sensitivity and multiple matches.
(["hey"], "Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m", False),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
["hey"],
"Hey-xx-xxx-heY-xXxXxxxxx-\x1b[32mhey\x1b[0m",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
["Hey"],
"\x1b[32mHey\x1b[0m-xx-xxx-\x1b[32mheY\x1b[0m-xXxXxxxxx-\x1b[32mhey\x1b[0m",
True,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
"x",
"Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-\x1b[32mx\x1b[0mX\x1b[32mx\x1b[0mX\x1b[32mxxxxx\x1b[0m-hey",
False,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
"x",
"Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-\x1b[32mxXxXxxxxx\x1b[0m-hey",
True,
),
# Overlaps.
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
["xx"],
"Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-\x1b[32mxXxXxxxxx\x1b[0m-hey",
True,
),
(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
["xx"],
"Hey-\x1b[32mxx\x1b[0m-\x1b[32mxxx\x1b[0m-heY-xXxX\x1b[32mxxxxx\x1b[0m-hey",
False,
),
# No match.
("z", "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
(["XX"], "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
("Hey-xx-xxx-heY-xXxXxxxxx-hey", "z", "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
("Hey-xx-xxx-heY-xXxXxxxxx-hey", ["XX"], "Hey-xx-xxx-heY-xXxXxxxxx-hey", False),
# Special characters.
(
"(?P<quote>[']).*?(?P=quote)",
"[",
"(?P<quote>\x1b[32m[\x1b[0m']).*?(?P=quote)",
False,
),
# Unicode normalization.
("Straße", "ß", "Stra\x1b[32mß\x1b[0me", False),
# ("Straße", ["SS"], "Stra\x1b[32mß\x1b[0me", True),
),
)
def test_substring_highlighting(substrings, expected, ignore_case):
result = highlight(
"Hey-xx-xxx-heY-xXxXxxxxx-hey",
substrings,
styling_method=theme.success,
ignore_case=ignore_case,
def test_substring_highlighting(original, substrings, expected, ignore_case):
assert (
highlight(
original,
substrings,
styling_method=theme.success,
ignore_case=ignore_case,
)
== expected
)
assert result == expected


@parametrize(
Expand Down
Loading

0 comments on commit e81e67f

Please sign in to comment.