From 1852b54e7f5c9d5dcdf81370741c5f54524bca13 Mon Sep 17 00:00:00 2001 From: Holger Veltrup Date: Mon, 16 Dec 2024 09:41:19 +0100 Subject: [PATCH] feat: content-matcher also requires the resource --- src/Service/Indexer/ContentCollector.php | 11 ++++++----- src/Service/Indexer/SiteKit/ContentMatcher.php | 4 +++- .../SiteKit/DefaultSchema2xDocumentEnricher.php | 1 + src/Service/Indexer/SiteKit/HeadlineMatcher.php | 4 +++- .../Indexer/SiteKit/QuoteSectionMatcher.php | 4 +++- src/Service/Indexer/SiteKit/RichtTextMatcher.php | 4 +++- test/Service/Indexer/ContentCollectorTest.php | 7 +++++-- .../Indexer/SiteKit/HeadlineMatcherTest.php | 10 +++++++--- .../Indexer/SiteKit/QuoteSectionMatcherTest.php | 16 +++++++++++----- .../Indexer/SiteKit/RichtTextMatcherTest.php | 10 +++++++--- 10 files changed, 49 insertions(+), 22 deletions(-) diff --git a/src/Service/Indexer/ContentCollector.php b/src/Service/Indexer/ContentCollector.php index 310202c..2759c08 100644 --- a/src/Service/Indexer/ContentCollector.php +++ b/src/Service/Indexer/ContentCollector.php @@ -4,6 +4,7 @@ namespace Atoolo\Search\Service\Indexer; +use Atoolo\Resource\Resource; use Atoolo\Search\Service\Indexer\SiteKit\ContentMatcher; class ContentCollector @@ -16,9 +17,9 @@ public function __construct(private readonly iterable $matchers) {} /** * @param array $data */ - public function collect(array $data): string + public function collect(array $data, Resource $resource): string { - $content = $this->walk([], $data); + $content = $this->walk([], $data, $resource); return implode(' ', $content); } @@ -27,7 +28,7 @@ public function collect(array $data): string * @param array $data * @return string[] */ - private function walk(array $path, array $data): array + private function walk(array $path, array $data, Resource $resource): array { $contentCollections = []; foreach ($data as $key => $value) { @@ -41,14 +42,14 @@ private function walk(array $path, array $data): array $matcherContent = []; foreach ($this->matchers as $matcher) { - $content = $matcher->match($path, $value); + $content = $matcher->match($path, $value, $resource); if (!is_string($content)) { continue; } $matcherContent[] = $content; } $contentCollections[] = $matcherContent; - $contentCollections[] = $this->walk($path, $value); + $contentCollections[] = $this->walk($path, $value, $resource); if (is_string($key)) { array_pop($path); diff --git a/src/Service/Indexer/SiteKit/ContentMatcher.php b/src/Service/Indexer/SiteKit/ContentMatcher.php index 6e60acd..8a20cdd 100644 --- a/src/Service/Indexer/SiteKit/ContentMatcher.php +++ b/src/Service/Indexer/SiteKit/ContentMatcher.php @@ -4,6 +4,8 @@ namespace Atoolo\Search\Service\Indexer\SiteKit; +use Atoolo\Resource\Resource; + /** * The `ContentMatcher` interface is implemented in order to extract from the * content structure of resources the content that is relevant for the `content` @@ -31,5 +33,5 @@ interface ContentMatcher * @return string|false The extracted content or `false` if the * content is not relevant for the search index. */ - public function match(array $path, array $value): string|false; + public function match(array $path, array $value, Resource $resource): string|false; } diff --git a/src/Service/Indexer/SiteKit/DefaultSchema2xDocumentEnricher.php b/src/Service/Indexer/SiteKit/DefaultSchema2xDocumentEnricher.php index 5e51e1c..73ff705 100644 --- a/src/Service/Indexer/SiteKit/DefaultSchema2xDocumentEnricher.php +++ b/src/Service/Indexer/SiteKit/DefaultSchema2xDocumentEnricher.php @@ -284,6 +284,7 @@ private function enrichContent( $content[] = $this->contentCollector->collect( $resource->data->getArray('content'), + $resource, ); /** @var ContactPoint $contactPoint */ diff --git a/src/Service/Indexer/SiteKit/HeadlineMatcher.php b/src/Service/Indexer/SiteKit/HeadlineMatcher.php index 4b18549..e0043b3 100644 --- a/src/Service/Indexer/SiteKit/HeadlineMatcher.php +++ b/src/Service/Indexer/SiteKit/HeadlineMatcher.php @@ -4,12 +4,14 @@ namespace Atoolo\Search\Service\Indexer\SiteKit; +use Atoolo\Resource\Resource; + class HeadlineMatcher implements ContentMatcher { /** * @inheritDoc */ - public function match(array $path, array $value): string|false + public function match(array $path, array $value, Resource $resource): string|false { $len = count($path); if ($len < 2) { diff --git a/src/Service/Indexer/SiteKit/QuoteSectionMatcher.php b/src/Service/Indexer/SiteKit/QuoteSectionMatcher.php index c8e64b2..7649321 100644 --- a/src/Service/Indexer/SiteKit/QuoteSectionMatcher.php +++ b/src/Service/Indexer/SiteKit/QuoteSectionMatcher.php @@ -4,6 +4,8 @@ namespace Atoolo\Search\Service\Indexer\SiteKit; +use Atoolo\Resource\Resource; + /** * @phpstan-type Model array{quote?: ?string, citation?: ?string} */ @@ -12,7 +14,7 @@ class QuoteSectionMatcher implements ContentMatcher /** * @inheritDoc */ - public function match(array $path, array $value): string|false + public function match(array $path, array $value, Resource $resource): string|false { $len = count($path); if ($len < 1) { diff --git a/src/Service/Indexer/SiteKit/RichtTextMatcher.php b/src/Service/Indexer/SiteKit/RichtTextMatcher.php index 614c962..5f74887 100644 --- a/src/Service/Indexer/SiteKit/RichtTextMatcher.php +++ b/src/Service/Indexer/SiteKit/RichtTextMatcher.php @@ -4,12 +4,14 @@ namespace Atoolo\Search\Service\Indexer\SiteKit; +use Atoolo\Resource\Resource; + class RichtTextMatcher implements ContentMatcher { /** * @inheritDoc */ - public function match(array $path, array $value): string|false + public function match(array $path, array $value, Resource $resource): string|false { $modelType = $value['modelType'] ?? false; if ($modelType !== 'html.richText') { diff --git a/test/Service/Indexer/ContentCollectorTest.php b/test/Service/Indexer/ContentCollectorTest.php index 96264de..a5e2471 100644 --- a/test/Service/Indexer/ContentCollectorTest.php +++ b/test/Service/Indexer/ContentCollectorTest.php @@ -4,6 +4,7 @@ namespace Atoolo\Search\Test\Service\Indexer; +use Atoolo\Resource\Resource; use Atoolo\Search\Service\Indexer\ContentCollector; use Atoolo\Search\Service\Indexer\SiteKit\ContentMatcher; use PHPUnit\Framework\Attributes\CoversClass; @@ -15,7 +16,7 @@ class ContentCollectorTest extends TestCase public function testCollect(): void { $matcher = (new class implements ContentMatcher { - public function match(array $path, array $value): string|false + public function match(array $path, array $value, Resource $resource): string|false { $modelType = $value['modelType'] ?? false; if ($modelType !== 'html.richText') { @@ -41,7 +42,9 @@ public function match(array $path, array $value): string|false ], ], ]; - $content = $collector->collect($data); + + $resource = $this->createStub(Resource::class); + $content = $collector->collect($data, $resource); $this->assertEquals('

Ein Text

', $content, 'unexpected content'); } diff --git a/test/Service/Indexer/SiteKit/HeadlineMatcherTest.php b/test/Service/Indexer/SiteKit/HeadlineMatcherTest.php index 3cfd828..aa9c989 100644 --- a/test/Service/Indexer/SiteKit/HeadlineMatcherTest.php +++ b/test/Service/Indexer/SiteKit/HeadlineMatcherTest.php @@ -4,6 +4,7 @@ namespace Atoolo\Search\Test\Service\Indexer\SiteKit; +use Atoolo\Resource\Resource; use Atoolo\Search\Service\Indexer\SiteKit\HeadlineMatcher; use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\TestCase; @@ -19,7 +20,8 @@ public function testMatcher(): void "headline" => "Überschrift", ]; - $content = $matcher->match(['items', 'model'], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match(['items', 'model'], $value, $resource); $this->assertEquals('Überschrift', $content, 'unexpected headline'); } @@ -32,7 +34,8 @@ public function testMatcherNotMachedPathToShort(): void "headline" => "Überschrift", ]; - $content = $matcher->match(['model'], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match(['model'], $value, $resource); $this->assertEmpty( $content, @@ -48,7 +51,8 @@ public function testMatcherNotMachedNoModel(): void "headline" => "Überschrift", ]; - $content = $matcher->match(['items', 'modelX'], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match(['items', 'modelX'], $value, $resource); $this->assertEmpty( $content, diff --git a/test/Service/Indexer/SiteKit/QuoteSectionMatcherTest.php b/test/Service/Indexer/SiteKit/QuoteSectionMatcherTest.php index b34adba..f17b8bb 100644 --- a/test/Service/Indexer/SiteKit/QuoteSectionMatcherTest.php +++ b/test/Service/Indexer/SiteKit/QuoteSectionMatcherTest.php @@ -4,6 +4,7 @@ namespace Atoolo\Search\Test\Service\Indexer\SiteKit; +use Atoolo\Resource\Resource; use Atoolo\Search\Service\Indexer\SiteKit\QuoteSectionMatcher; use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\TestCase; @@ -23,7 +24,8 @@ public function testMatcher(): void ], ]; - $content = $matcher->match(['items'], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match(['items'], $value, $resource); $this->assertEquals( 'Quote-Text Citation', @@ -43,7 +45,8 @@ public function testMatcherNoMatchPathToShort(): void ], ]; - $content = $matcher->match([], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match([], $value, $resource); $this->assertEmpty( $content, @@ -63,7 +66,8 @@ public function testMatcherNoMatchNoItems(): void ], ]; - $content = $matcher->match(['itemsX'], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match(['itemsX'], $value, $resource); $this->assertEmpty( $content, @@ -83,7 +87,8 @@ public function testMatcherNoMatchInvalidType(): void ], ]; - $content = $matcher->match(['items'], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match(['items'], $value, $resource); $this->assertEmpty( $content, @@ -103,7 +108,8 @@ public function testMatcherNoMatchMissingModel(): void ], ]; - $content = $matcher->match(['items'], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match(['items'], $value, $resource); $this->assertEmpty( $content, diff --git a/test/Service/Indexer/SiteKit/RichtTextMatcherTest.php b/test/Service/Indexer/SiteKit/RichtTextMatcherTest.php index 03a4b32..0c1879c 100644 --- a/test/Service/Indexer/SiteKit/RichtTextMatcherTest.php +++ b/test/Service/Indexer/SiteKit/RichtTextMatcherTest.php @@ -4,6 +4,7 @@ namespace Atoolo\Search\Test\Service\Indexer\SiteKit; +use Atoolo\Resource\Resource; use Atoolo\Search\Service\Indexer\SiteKit\RichtTextMatcher; use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\TestCase; @@ -21,7 +22,8 @@ public function testMatcher(): void "text" => "

Ein Text

", ]; - $content = $matcher->match([], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match([], $value, $resource); $this->assertEquals('Ein Text', $content, 'unexpected content'); } @@ -36,7 +38,8 @@ public function testMatcherNotMatchedInvalidType(): void "text" => "

Ein Text

", ]; - $content = $matcher->match([], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match([], $value, $resource); $this->assertEmpty( $content, @@ -54,7 +57,8 @@ public function testMatcherNotMatchedTextMissing(): void "textX" => "

Ein Text

", ]; - $content = $matcher->match([], $value); + $resource = $this->createStub(Resource::class); + $content = $matcher->match([], $value, $resource); $this->assertEmpty( $content,