From 0c43a79f236828ba3082c91a00e62dcf4ed6c622 Mon Sep 17 00:00:00 2001 From: Daniel Lienert Date: Mon, 18 Jan 2021 22:14:28 +0100 Subject: [PATCH 1/2] TASK: Use mre fine-grained sanitation The previously used `[[:^alnum:]]` strips german umlauts and valid characters of other languages which leeds to strange query results. This is more fine grained approach to sanitize the search word based on elasticsearch recommendations. --- Classes/Controller/SuggestController.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Classes/Controller/SuggestController.php b/Classes/Controller/SuggestController.php index cf8eb4f..3268892 100644 --- a/Classes/Controller/SuggestController.php +++ b/Classes/Controller/SuggestController.php @@ -114,8 +114,8 @@ protected function buildRequestForTerm(string $term, string $contextNodeIdentifi $term = strtolower($term); // The suggest function only works well with one word - // and the term is trimmed to alnum characters to avoid errors - $suggestTerm = preg_replace('/[[:^alnum:]]/', '', explode(' ', $term)[0]); + // special search characters are escaped + $suggestTerm = str_replace(['=', '>', '<', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', '/'], ['', '', '', '(', '\)', '\{', '\}', '[', '\]', '\^', '\"', '\~', '\*', '\?', '\:', '\\\\', '\/'], explode(' ', $term)[0]); if (!$this->elasticSearchQueryTemplateCache->has($cacheKey)) { $contentContext = $this->createContentContext('live', $dimensionCombination ? json_decode($dimensionCombination, true) : []); From 0902359d81889aaa4fe8cacf60e22e536c946834 Mon Sep 17 00:00:00 2001 From: Daniel Lienert Date: Tue, 26 Jan 2021 10:35:47 +0100 Subject: [PATCH 2/2] TASK: Extract sanitation rule and use it in suggestions --- Classes/Controller/SuggestController.php | 3 ++- Classes/EelHelper/SuggestionIndexHelper.php | 4 +++- Classes/Utility/Sanitation.php | 24 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 Classes/Utility/Sanitation.php diff --git a/Classes/Controller/SuggestController.php b/Classes/Controller/SuggestController.php index 3268892..7fd356c 100644 --- a/Classes/Controller/SuggestController.php +++ b/Classes/Controller/SuggestController.php @@ -14,6 +14,7 @@ use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Eel\ElasticSearchQueryBuilder; use Flowpack\ElasticSearch\ContentRepositoryAdaptor\ElasticSearchClient; use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Exception\QueryBuildingException; +use Flowpack\SearchPlugin\Utility\Sanitation; use Neos\Cache\Frontend\VariableFrontend; use Neos\Flow\Annotations as Flow; use Neos\Flow\Mvc\Controller\ActionController; @@ -115,7 +116,7 @@ protected function buildRequestForTerm(string $term, string $contextNodeIdentifi // The suggest function only works well with one word // special search characters are escaped - $suggestTerm = str_replace(['=', '>', '<', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', '/'], ['', '', '', '(', '\)', '\{', '\}', '[', '\]', '\^', '\"', '\~', '\*', '\?', '\:', '\\\\', '\/'], explode(' ', $term)[0]); + $suggestTerm = Sanitation::sanitizeSearchInput(explode(' ', $term)[0]); if (!$this->elasticSearchQueryTemplateCache->has($cacheKey)) { $contentContext = $this->createContentContext('live', $dimensionCombination ? json_decode($dimensionCombination, true) : []); diff --git a/Classes/EelHelper/SuggestionIndexHelper.php b/Classes/EelHelper/SuggestionIndexHelper.php index 40bd0f4..6ac959c 100644 --- a/Classes/EelHelper/SuggestionIndexHelper.php +++ b/Classes/EelHelper/SuggestionIndexHelper.php @@ -14,6 +14,7 @@ */ use Flowpack\SearchPlugin\Exception; +use Flowpack\SearchPlugin\Utility\Sanitation; use Neos\Eel\ProtectedContextAwareInterface; use Neos\Flow\Annotations as Flow; @@ -47,8 +48,9 @@ protected function prepareInput($input): ?array { $process = static function (?string $input) { $input = preg_replace("/\r|\n/", '', $input); - return array_values(array_filter(explode(' ', preg_replace("/[^[:alnum:][:space:]]/u", ' ', strip_tags($input))))); + return array_values(array_filter(explode(' ', Sanitation::sanitizeSearchInput(strip_tags($input))))); }; + if (\is_string($input)) { return $process($input); } elseif (\is_array($input)) { diff --git a/Classes/Utility/Sanitation.php b/Classes/Utility/Sanitation.php new file mode 100644 index 0000000..e94ae47 --- /dev/null +++ b/Classes/Utility/Sanitation.php @@ -0,0 +1,24 @@ +', '<', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', '/'], ['', '', '', '(', '\)', '\{', '\}', '[', '\]', '\^', '\"', '\~', '\*', '\?', '\:', '\\\\', '\/'], $input); + } + +}