Skip to content

Commit

Permalink
Add surrogates to filter replacement words
Browse files Browse the repository at this point in the history
Fix replacement length calculation

For e. x., emoji counts as 2 symbols
  • Loading branch information
khoben committed Sep 30, 2024
1 parent 6cac995 commit c21a5c9
Showing 1 changed file with 13 additions and 19 deletions.
32 changes: 13 additions & 19 deletions telemirror/messagefilters/messagefilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def __init__(
filter_mention: Union[bool, Set[str]] = False,
filter_by_id_mention: bool = False,
) -> None:
self._placeholder = placeholder
self._placeholder = utils.add_surrogate(placeholder)
self._placeholder_len = len(self._placeholder)

self._url_matcher = UrlMatcher(blacklist, whitelist)

Expand All @@ -130,31 +131,24 @@ async def _process_message(
update_pos = False

if (
(
isinstance(entity, types.MessageEntityUrl)
and self._url_matcher.match(
filtered_text[entity.offset : entity.offset + entity.length]
)
isinstance(entity, types.MessageEntityUrl)
and self._url_matcher.match(
filtered_text[entity.offset : entity.offset + entity.length]
)
or (
isinstance(entity, types.MessageEntityMention)
and self._match_mention(
filtered_text[entity.offset : entity.offset + entity.length]
)
) or (
isinstance(
entity, (types.MessageEntityMention, types.MessageEntityTextUrl)
)
or (
isinstance(entity, types.MessageEntityTextUrl)
and self._match_mention(
filtered_text[entity.offset : entity.offset + entity.length]
)
and self._match_mention(
filtered_text[entity.offset : entity.offset + entity.length]
)
):
filtered_text = (
filtered_text[: entity.offset]
+ self._placeholder
+ filtered_text[entity.offset + entity.length :]
)
entity_len_diff = len(self._placeholder) - entity.length
entity_len_diff = self._placeholder_len - entity.length
update_pos = True
drop_entity = True
elif (
Expand Down Expand Up @@ -189,7 +183,7 @@ async def _process_message(
+ filtered_text[actual_end:]
)

diff = len(self._placeholder) - (end - start)
diff = self._placeholder_len - (end - start)
offset_error += diff

self.update_entities_params(
Expand Down Expand Up @@ -420,7 +414,7 @@ async def _process_message(

def repl(match: re.Match[str]) -> str:
group = match.group()
replacement = self._keywords_mapping.get(group.lower())
replacement = utils.add_surrogate(self._keywords_mapping.get(group.lower()))

nonlocal entities_offset_error
match_start, match_end = match.span()
Expand Down

0 comments on commit c21a5c9

Please sign in to comment.