diff --git a/bin/console b/bin/console new file mode 100755 index 0000000..089d452 --- /dev/null +++ b/bin/console @@ -0,0 +1,20 @@ +#!/usr/bin/env php +load('services.yml'); +$container->compile(); + +$application = $container->get(Application::class); + +$application->run(); \ No newline at end of file diff --git a/config/services.yml b/config/services.yml new file mode 100644 index 0000000..1b29748 --- /dev/null +++ b/config/services.yml @@ -0,0 +1,15 @@ +services: + _defaults: + autowire: true + autoconfigure: true + _instanceof: + Symfony\Component\Console\Command\Command: + tags: ['command'] + + Atoolo\Search\Console\: + resource: '../src/Console' + + Atoolo\Search\Console\Application: + public: true + arguments: + - !tagged command \ No newline at end of file diff --git a/src/Console/Application.php b/src/Console/Application.php new file mode 100644 index 0000000..0fcfbbe --- /dev/null +++ b/src/Console/Application.php @@ -0,0 +1,18 @@ +add($command); + } + } +} diff --git a/src/Console/Command/Indexer.php b/src/Console/Command/Indexer.php new file mode 100644 index 0000000..7ef089b --- /dev/null +++ b/src/Console/Command/Indexer.php @@ -0,0 +1,132 @@ +setHelp('Command to fill a search index') + ->addArgument( + 'solr-core', + InputArgument::REQUIRED, + 'Solr core to be used.' + ) + ->addArgument( + 'resource-dir', + InputArgument::REQUIRED, + 'Resource directory whose data is to be indexed.' + ) + ->addArgument( + 'directories', + InputArgument::OPTIONAL | InputArgument::IS_ARRAY, + 'Resources or directories of the resource to be indexed.' + ) + ->addOption( + 'cleanup-threshold', + null, + InputArgument::OPTIONAL, + 'Specifies the number of indexed documents from ' . + 'which indexing is considered successful and old entries ' . + 'can be deleted. Is only used for full indexing.', + 0 + ) + ; + } + + protected function execute( + InputInterface $input, + OutputInterface $output + ): int { + + $this->io = new SymfonyStyle($input, $output); + $this->progressBar = new IndexerProgressProgressBar($output); + $this->resourceDir = $input->getArgument('resource-dir'); + $directories = $input->getArgument('directories'); + + $cleanupThreshold = empty($directories) + ? $input->getArgument('cleanup-threshold') + : 0; + + if (empty($directories)) { + $this->io->title('Index all resources'); + } else { + $this->io->title('Index resources subdirectories'); + $this->io->listing($directories); + } + + $parameter = new IndexerParameter( + $input->getArgument('solr-core'), + $this->resourceDir, + $cleanupThreshold, + $directories + ); + + $indexer = $this->createIndexer(); + $indexer->index($parameter); + + $this->errorReport(); + + return Command::SUCCESS; + } + + protected function errorReport(): void + { + foreach ($this->progressBar->getErrors() as $error) { + if ($error instanceof InvalidResourceException) { + $this->io->error( + $error->getLocation() . ': ' . + $error->getMessage() + ); + } else { + $this->io->error($error->getMessage()); + } + } + } + + protected function createIndexer(): SolrIndexer + { + $resourceLoader = new SiteKitLoader($this->resourceDir); + $navigationLoader = new SiteKitNavigationHierarchyLoader( + $resourceLoader + ); + $schema21 = new DefaultSchema21DocumentEnricher( + $navigationLoader + ); + + $clientFactory = new SolrParameterClientFactory(); + return new SolrIndexer( + [$schema21], + $this->progressBar, + $resourceLoader, + $clientFactory, + 'internal' + ); + } +} diff --git a/src/Console/Command/Io/IndexerProgressProgressBar.php b/src/Console/Command/Io/IndexerProgressProgressBar.php new file mode 100644 index 0000000..13b156c --- /dev/null +++ b/src/Console/Command/Io/IndexerProgressProgressBar.php @@ -0,0 +1,64 @@ +output = $output; + } + + public function start(int $total): void + { + $this->progressBar = new ProgressBar($this->output, $total); + $this->formatProgressBar('green'); + } + + public function advance(int $step): void + { + $this->progressBar->advance($step); + } + + private function formatProgressBar(string $color): void + { + $this->progressBar->setBarCharacter('•'); + $this->progressBar->setEmptyBarCharacter('⚬'); + $this->progressBar->setProgressCharacter('➤'); + $this->progressBar->setFormat( + "%current%/%max% [%bar%] %percent:3s%%\n" . + " %estimated:-20s% %memory:20s%" + ); + } + + public function error(Exception $exception): void + { + $this->formatProgressBar('red'); + $this->errors[] = $exception; + } + + public function finish(): void + { + $this->progressBar->finish(); + } + + /** + * @return array + */ + public function getErrors(): array + { + return $this->errors; + } +} diff --git a/src/Console/Command/MoreLikeThis.php b/src/Console/Command/MoreLikeThis.php new file mode 100644 index 0000000..ab8cc33 --- /dev/null +++ b/src/Console/Command/MoreLikeThis.php @@ -0,0 +1,114 @@ +setHelp('Command to performs a more-like-this search') + ->addArgument( + 'solr-core', + InputArgument::REQUIRED, + 'Solr core to be used.' + ) + ->addArgument( + 'resource-dir', + InputArgument::REQUIRED, + 'Resource directory whose data is to be indexed.' + ) + ->addArgument( + 'location', + InputArgument::REQUIRED, + 'Resource directory whose data is to be indexed.' + ) + ; + } + + protected function execute( + InputInterface $input, + OutputInterface $output + ): int { + + $this->io = new SymfonyStyle($input, $output); + + $this->solrCore = $input->getArgument('solr-core'); + $this->resourceDir = $input->getArgument('resource-dir'); + $location = $input->getArgument('location'); + + $searcher = $this->createSearcher(); + $query = $this->buildQuery($location); + $result = $searcher->moreLikeThis($query); + $this->outputResult($result); + + return Command::SUCCESS; + } + + protected function createSearcher(): SolrMoreLikeThis + { + $resourceLoader = new SiteKitLoader($this->resourceDir); + $clientFactory = new SolrParameterClientFactory(); + $resourceFactoryList = [ + new ExternalResourceFactory(), + new InternalResourceFactory($resourceLoader), + new InternalMediaResourceFactory($resourceLoader) + ]; + $solrResultToResourceResolver = new SolrResultToResourceResolver( + $resourceFactoryList + ); + + return new SolrMoreLikeThis( + $clientFactory, + $solrResultToResourceResolver + ); + } + + protected function buildQuery(string $location): MoreLikeThisQuery + { + $filterList = []; + return new MoreLikeThisQuery( + $this->solrCore, + $location, + $filterList, + 5, + ['content'] + ); + } + + protected function outputResult(ResourceSearchResult $result): void + { + $this->io->text($result->getTotal() . " Results:"); + foreach ($result as $resource) { + $this->io->text($resource->getLocation()); + } + $this->io->text('Query-Time: ' . $result->getQueryTime() . 'ms'); + } +} diff --git a/src/Console/Command/Search.php b/src/Console/Command/Search.php new file mode 100644 index 0000000..999715d --- /dev/null +++ b/src/Console/Command/Search.php @@ -0,0 +1,140 @@ +setHelp('Command to performs a search') + ->addArgument( + 'solr-core', + InputArgument::REQUIRED, + 'Solr core to be used.' + ) + ->addArgument( + 'resource-dir', + InputArgument::REQUIRED, + 'Resource directory whose data is to be indexed.' + ) + ->addArgument( + 'text', + InputArgument::IS_ARRAY, + 'Text with which to search.' + ) + ; + } + + protected function execute( + InputInterface $input, + OutputInterface $output + ): int { + + $this->io = new SymfonyStyle($input, $output); + $this->resourceDir = $input->getArgument('resource-dir'); + $this->solrCore = $input->getArgument('solr-core'); + + $searcher = $this->createSearch(); + $query = $this->buildQuery($input); + + $result = $searcher->select($query); + + $this->outputResult($result); + + return Command::SUCCESS; + } + + protected function createSearch(): SolrSelect + { + $resourceLoader = new SiteKitLoader($this->resourceDir); + $clientFactory = new SolrParameterClientFactory(); + $defaultBoosting = new DefaultBoostModifier(); + + $resourceFactoryList = [ + new ExternalResourceFactory(), + new InternalResourceFactory($resourceLoader), + new InternalMediaResourceFactory($resourceLoader) + ]; + + $solrResultToResourceResolver = new SolrResultToResourceResolver( + $resourceFactoryList + ); + + return new SolrSelect( + $clientFactory, + [$defaultBoosting], + $solrResultToResourceResolver + ); + } + + protected function buildQuery(InputInterface $input): SelectQuery + { + $builder = SelectQuery::builder(); + $builder->core($this->solrCore); + + $text = $input->getArgument('text'); + if (is_array($text)) { + $builder->text(implode(' ', $text)); + } + + // TODO: filter + + // TODO: facet + + return $builder->build(); + } + + protected function outputResult( + ResourceSearchResult $result + ) { + $this->io->title('Results (' . $result->getTotal() . ')'); + foreach ($result as $resource) { + $this->io->text($resource->getLocation()); + } + + if (count($result->getFacetGroupList()) > 0) { + $this->io->title('Facets'); + foreach ($result->getFacetGroupList() as $facetGroup) { + $this->io->section($facetGroup->getKey()); + $listing = []; + foreach ($facetGroup->getFacetList() as $facet) { + $listing[] = + $facet->getKey() . + ' (' . $facet->getHits() . ')'; + } + $this->io->listing($listing); + } + } + + $this->io->text('Query-Time: ' . $result->getQueryTime() . 'ms'); + } +} diff --git a/src/Console/Command/Suggest.php b/src/Console/Command/Suggest.php new file mode 100644 index 0000000..a431467 --- /dev/null +++ b/src/Console/Command/Suggest.php @@ -0,0 +1,95 @@ +setHelp('Command to performs a suggest search') + ->addArgument( + 'solr-core', + InputArgument::REQUIRED, + 'Solr core to be used.' + ) + ->addArgument( + 'terms', + InputArgument::REQUIRED | InputArgument::IS_ARRAY, + 'Suggest terms.' + ) + ; + } + + protected function execute( + InputInterface $input, + OutputInterface $output + ): int { + + $this->io = new SymfonyStyle($input, $output); + $this->solrCore = $input->getArgument('solr-core'); + $terms = $input->getArgument('terms'); + + $search = $this->createSearcher(); + $query = $this->buildQuery($terms); + + $result = $search->suggest($query); + + $this->outputResult($result); + + return Command::SUCCESS; + } + + protected function createSearcher(): SolrSuggest + { + $clientFactory = new SolrParameterClientFactory(); + return new SolrSuggest($clientFactory); + } + + protected function buildQuery(array $terms): SuggestQuery + { + $excludeMedia = new ObjectTypeFilter('media', 'media'); + $excludeMedia = $excludeMedia->exclude(); + return new SuggestQuery( + $this->solrCore, + $terms, + [ + new ArchiveFilter(), + $excludeMedia + ] + ); + } + + protected function outputResult(SuggestResult $result): void + { + foreach ($result as $suggest) { + $this->io->text( + $suggest->getTerm() . + ' (' . $suggest->getHits() . ')' + ); + } + $this->io->text('Query-Time: ' . $result->getQueryTime() . 'ms'); + } +} diff --git a/src/Dto/Indexer/IndexerParameter.php b/src/Dto/Indexer/IndexerParameter.php new file mode 100644 index 0000000..b1ce85e --- /dev/null +++ b/src/Dto/Indexer/IndexerParameter.php @@ -0,0 +1,16 @@ +key; + } + + public function getField(): string + { + return $this->field; + } + + /** + * @return string[] + */ + public function getTerms(): array + { + return $this->terms; + } +} diff --git a/src/Dto/Search/Query/Facet/FacetMultiQuery.php b/src/Dto/Search/Query/Facet/FacetMultiQuery.php new file mode 100644 index 0000000..e93d210 --- /dev/null +++ b/src/Dto/Search/Query/Facet/FacetMultiQuery.php @@ -0,0 +1,29 @@ +key; + } + /** + * @return FacetQuery[] + */ + public function getQueryList(): array + { + return $this->queryList; + } +} diff --git a/src/Dto/Search/Query/Facet/FacetQuery.php b/src/Dto/Search/Query/Facet/FacetQuery.php new file mode 100644 index 0000000..8c02fe3 --- /dev/null +++ b/src/Dto/Search/Query/Facet/FacetQuery.php @@ -0,0 +1,22 @@ +key; + } + public function getQuery(): string + { + return $this->query; + } +} diff --git a/src/Dto/Search/Query/Facet/GroupFacet.php b/src/Dto/Search/Query/Facet/GroupFacet.php new file mode 100644 index 0000000..39dbcdb --- /dev/null +++ b/src/Dto/Search/Query/Facet/GroupFacet.php @@ -0,0 +1,13 @@ +values = $values; + parent::__construct( + $key, + $this->toQuery(), + [$key] + ); + } + + /** + * @param string[] $values + */ + private function toQuery(): string + { + $filterValue = count($this->values) === 1 + ? $this->values[0] + : '(' . implode(' ', $this->values) . ')'; + return $this->field . ':' . $filterValue; + } + + public function exclude(): FieldFilter + { + $field = $this->field; + if (!str_starts_with($field, '-')) { + $field = '-' . $field; + } + return new FieldFilter( + $this->getKey(), + $field, + ...$this->values + ); + } +} diff --git a/src/Dto/Search/Query/Filter/Filter.php b/src/Dto/Search/Query/Filter/Filter.php new file mode 100644 index 0000000..6d26936 --- /dev/null +++ b/src/Dto/Search/Query/Filter/Filter.php @@ -0,0 +1,36 @@ +key; + } + + public function getQuery(): string + { + return $this->query; + } + + /** + * @return string[] + */ + public function getTags(): array + { + return $this->tags; + } +} diff --git a/src/Dto/Search/Query/Filter/GroupFilter.php b/src/Dto/Search/Query/Filter/GroupFilter.php new file mode 100644 index 0000000..e5d22e6 --- /dev/null +++ b/src/Dto/Search/Query/Filter/GroupFilter.php @@ -0,0 +1,17 @@ +core; + } + + public function getLocation(): string + { + return $this->location; + } + + /** + * @return Filter[] + */ + public function getFilterList(): array + { + return $this->filterList; + } + + public function getLimit(): int + { + return $this->limit; + } + + /** + * @return array + */ + public function getFieldList(): array + { + return $this->fieldList; + } +} diff --git a/src/Dto/Search/Query/QueryDefaultOperator.php b/src/Dto/Search/Query/QueryDefaultOperator.php new file mode 100644 index 0000000..b485d5d --- /dev/null +++ b/src/Dto/Search/Query/QueryDefaultOperator.php @@ -0,0 +1,9 @@ +core = $builder->getCore(); + $this->text = $builder->getText(); + $this->offset = $builder->getOffset(); + $this->limit = $builder->getLimit(); + $this->filterList = $builder->getFilterList(); + $this->facetList = $builder->getFacetList(); + $this->queryDefaultOperator = $builder->getQueryDefaultOperator(); + } + + public static function builder(): SelectQueryBuilder + { + return new SelectQueryBuilder(); + } + + public function getCore(): string + { + return $this->core; + } + + public function getText(): string + { + return $this->text; + } + + public function getOffset(): int + { + return $this->offset; + } + + public function getLimit(): int + { + return $this->limit; + } + + /** + * @return Filter[] + */ + public function getFilterList(): array + { + return $this->filterList; + } + /** + * @return Facet[] + */ + public function getFacetList(): array + { + return $this->facetList; + } + public function getQueryDefaultOperator(): QueryDefaultOperator + { + return $this->queryDefaultOperator; + } +} diff --git a/src/Dto/Search/Query/SelectQueryBuilder.php b/src/Dto/Search/Query/SelectQueryBuilder.php new file mode 100644 index 0000000..38033b5 --- /dev/null +++ b/src/Dto/Search/Query/SelectQueryBuilder.php @@ -0,0 +1,171 @@ + + */ + private array $filterList = []; + + /** + * @var array + */ + private array $facetList = []; + + private QueryDefaultOperator $queryDefaultOperator = QueryDefaultOperator::AND; + + /** + * @internal + */ + public function __construct() + { + } + + public function core(string $core): SelectQueryBuilder + { + if (empty($core)) { + throw new \InvalidArgumentException('core is empty'); + } + $this->core = $core; + return $this; + } + + /** + * @internal + */ + public function getCore(): string + { + return $this->core; + } + + public function text(string $text): SelectQueryBuilder + { + $this->text = $text; + return $this; + } + + /** + * @internal + */ + public function getText(): string + { + return $this->text; + } + + public function offset(int $offset): SelectQueryBuilder + { + if ($offset < 0) { + throw new \InvalidArgumentException('offset is lower then 0'); + } + $this->offset = $offset; + return $this; + } + + /** + * @internal + */ + public function getOffset(): int + { + return $this->offset; + } + + public function limit(int $limit): SelectQueryBuilder + { + if ($limit < 0) { + throw new \InvalidArgumentException('offset is lower then 0'); + } + $this->limit = $limit; + return $this; + } + + /** + * @internal + */ + public function getLimit(): int + { + return $this->limit; + } + + /** + * @param Filter[] $filterList + */ + public function filter(Filter ...$filterList): SelectQueryBuilder + { + foreach ($filterList as $filter) { + if (isset($this->filterList[$filter->getKey()])) { + throw new \InvalidArgumentException( + 'filter key "' . $filter->getKey() . + '" already exists' + ); + } + $this->filterList[$filter->getKey()] = $filter; + } + return $this; + } + + /** + * @internal + * @return Filter[] + */ + public function getFilterList(): array + { + return array_values($this->filterList); + } + + /** + * @param Filter[] $filterList + */ + public function facet(Facet ...$facetList): SelectQueryBuilder + { + foreach ($facetList as $facet) { + if (isset($this->facetList[$facet->getKey()])) { + throw new \InvalidArgumentException( + 'facet key "' . $facet->getKey() . + '" already exists' + ); + } + $this->facetList[$facet->getKey()] = $facet; + } + return $this; + } + + /** + * @internal + * @return Facet[] + */ + public function getFacetList(): array + { + return array_values($this->facetList); + } + + public function queryDefaultOperator( + QueryDefaultOperator $queryDefaultOperator + ): SelectQueryBuilder { + $this->queryDefaultOperator = $queryDefaultOperator; + return $this; + } + + public function getQueryDefaultOperator(): QueryDefaultOperator + { + return $this->queryDefaultOperator; + } + + public function build(): SelectQuery + { + if (empty($this->core)) { + throw new \InvalidArgumentException('core is not set'); + } + return new SelectQuery($this); + } +} diff --git a/src/Dto/Search/Query/SuggestQuery.php b/src/Dto/Search/Query/SuggestQuery.php new file mode 100644 index 0000000..144c168 --- /dev/null +++ b/src/Dto/Search/Query/SuggestQuery.php @@ -0,0 +1,52 @@ +core; + } + /** + * @return string[] + */ + public function getTermList(): array + { + return $this->termList; + } + + public function getLimit(): int + { + return $this->limit; + } + + /** + * @return Filter[] + */ + public function getFilterList(): array + { + return $this->filterList; + } + public function getField(): string + { + return $this->field; + } +} diff --git a/src/Dto/Search/Result/Facet.php b/src/Dto/Search/Result/Facet.php new file mode 100644 index 0000000..8e19cd0 --- /dev/null +++ b/src/Dto/Search/Result/Facet.php @@ -0,0 +1,24 @@ +key; + } + + public function getHits(): int + { + return $this->hits; + } +} \ No newline at end of file diff --git a/src/Dto/Search/Result/FacetGroup.php b/src/Dto/Search/Result/FacetGroup.php new file mode 100644 index 0000000..300b2ca --- /dev/null +++ b/src/Dto/Search/Result/FacetGroup.php @@ -0,0 +1,30 @@ +key; + } + + /** + * @return Facet[] + */ + public function getFacetList(): array + { + return $this->facetList; + } +} diff --git a/src/Dto/Search/Result/ResourceSearchResult.php b/src/Dto/Search/Result/ResourceSearchResult.php new file mode 100644 index 0000000..9f3cd68 --- /dev/null +++ b/src/Dto/Search/Result/ResourceSearchResult.php @@ -0,0 +1,65 @@ + + */ +class ResourceSearchResult implements IteratorAggregate +{ + /** + * @param Resource[] $resourceList + * @param FacetGroup[] $facetGroupList + */ + public function __construct( + private readonly int $total, + private readonly int $offset, + private readonly array $resourceList, + private readonly array $facetGroupList, + private readonly int $queryTime + ) { + } + + public function getIterator(): Traversable + { + return new ArrayIterator($this->resourceList); + } + + /** + * @return Resource[] + */ + public function getResourceList(): array + { + return $this->resourceList; + } + + public function getTotal(): int + { + return $this->total; + } + + public function getOffset(): int + { + return $this->offset; + } + + /** + * @return FacetGroup[] + */ + public function getFacetGroupList(): array + { + return $this->facetGroupList; + } + + public function getQueryTime(): int + { + return $this->queryTime; + } +} diff --git a/src/Dto/Search/Result/SuggestResult.php b/src/Dto/Search/Result/SuggestResult.php new file mode 100644 index 0000000..9b4770a --- /dev/null +++ b/src/Dto/Search/Result/SuggestResult.php @@ -0,0 +1,30 @@ + + */ +class SuggestResult implements IteratorAggregate +{ + public function __construct( + private readonly array $suggestions, + private readonly int $queryTime + ) { + } + + public function getIterator(): ArrayIterator + { + return new ArrayIterator($this->suggestions); + } + + public function getQueryTime(): int + { + return $this->queryTime; + } +} diff --git a/src/Dto/Search/Result/Suggestion.php b/src/Dto/Search/Result/Suggestion.php new file mode 100644 index 0000000..bfe9293 --- /dev/null +++ b/src/Dto/Search/Result/Suggestion.php @@ -0,0 +1,24 @@ +term; + } + + public function getHits(): int + { + return $this->hits; + } +} diff --git a/src/Exception/MissMatchingResourceFactoryException.php b/src/Exception/MissMatchingResourceFactoryException.php new file mode 100644 index 0000000..e01833a --- /dev/null +++ b/src/Exception/MissMatchingResourceFactoryException.php @@ -0,0 +1,28 @@ +location; + } +} diff --git a/src/Exception/UnexpectedResultException.php b/src/Exception/UnexpectedResultException.php new file mode 100644 index 0000000..0a5ffda --- /dev/null +++ b/src/Exception/UnexpectedResultException.php @@ -0,0 +1,28 @@ +result, + $code, + $previous + ); + } + + public function getResult(): string + { + return $this->result; + } +} diff --git a/src/Indexer.php b/src/Indexer.php new file mode 100644 index 0000000..55d4f58 --- /dev/null +++ b/src/Indexer.php @@ -0,0 +1,21 @@ +basePath = rtrim($basePath, '/'); + } + + /** + * @return string[] + */ + public function findAll(): array + { + + $finder = new Finder(); + $finder->in($this->basePath); + $finder->name('*.php'); + $finder->files(); + + $pathList = []; + foreach ($finder as $file) { + $pathList[] = $this->toRelativePath($file->getPathname()); + } + + return $pathList; + } + + /** + * @param string[] $directories + */ + public function findInSubdirectories(array $directories): array + { + $finder = new Finder(); + foreach ($directories as $directory) { + $finder->in($this->basePath . '/' . $directory); + } + $finder->name('*.php'); + $finder->files(); + + $pathList = []; + foreach ($finder as $file) { + $pathList[] = $this->toRelativePath($file->getPathname()); + } + + return $pathList; + } + + private function toRelativePath(string $path): string + { + return substr($path, strlen($this->basePath)); + } +} diff --git a/src/Service/Indexer/SiteKit/DefaultSchema21DocumentEnricher.php b/src/Service/Indexer/SiteKit/DefaultSchema21DocumentEnricher.php new file mode 100644 index 0000000..129e367 --- /dev/null +++ b/src/Service/Indexer/SiteKit/DefaultSchema21DocumentEnricher.php @@ -0,0 +1,290 @@ +sp_id = $resource->getId(); + $doc->sp_name = $resource->getName(); + $doc->sp_anchor = $resource->getData('init.anchor'); + $doc->title = $resource->getData('base.title'); + $doc->description = $resource->getData('metadata.description'); + $doc->sp_objecttype = $resource->getObjectType(); + $doc->sp_canonical = true; + $doc->crawl_process_id = $processId; + + $mediaUrl = $resource->getData('init.mediaUrl'); + if ($mediaUrl !== null) { + $doc->id = $mediaUrl; + $doc->url = $mediaUrl; + } else { + $doc->id = $resource->getLocation(); + $doc->url = $resource->getLocation(); + } + + $spContentType = [$resource->getObjectType()]; + if ($resource->getData('init.media') !== true) { + $spContentType[] = 'article'; + } + $contentSectionTypes = $resource->getData('init.contentSectionTypes'); + if (is_array($contentSectionTypes)) { + $spContentType = array_merge($spContentType, $contentSectionTypes); + } + if ($resource->getData('base.teaser.image') !== null) { + $spContentType[] = 'teaserImage'; + } + if ($resource->getData('base.teaser.image.copyright') !== null) { + $spContentType[] = 'teaserImageCopyright'; + } + if ($resource->getData('base.teaser.headline') !== null) { + $spContentType[] = 'teaserHeadline'; + } + if ($resource->getData('base.teaser.text') !== null) { + $spContentType[] = 'teaserText'; + } + $doc->sp_contenttype = $spContentType; + + $locale = $this->getLocaleFromResource($resource); + $lang = $this->toLangFromLocale($locale); + $doc->sp_language = $lang; + $doc->meta_content_language = $lang; + + $doc->sp_changed = $this->toDateTime( + $resource->getData('init.changed') + ); + $doc->sp_date = $this->toDateTime( + $resource->getData('base.date') + ); + + $doc->sp_archive = $resource->getData('base.archive') ?? false; + + $headline = $resource->getData('metadata.headline'); + if (empty($headline)) { + $headline = $resource->getData('base.teaser.headline'); + } + if (empty($headline)) { + $headline = $resource->getData('base.title'); + } + $doc->sp_title = $headline; + + // However, the teaser heading, if specified, must be used for sorting + $sortHeadline = $resource->getData('base.teaser.headline'); + if (empty($sortHeadline)) { + $sortHeadline = $resource->getData('metadata.headline'); + } + if (empty($sortHeadline)) { + $sortHeadline = $resource->getData('base.title'); + } + $doc->sp_sortvalue = $sortHeadline; + + $doc->sp_boost_keywords = $resource->getData('metadata.boostKeywords'); + + $sites = $this->getParentSiteGroupIdList($resource); + + $navigationRoot = $this->navigationLoader->loadRoot( + $resource->getLocation() + ); + $siteGroupId = $navigationRoot->getData('init.siteGroup.id'); + if ($siteGroupId !== null) { + $sites[] = $siteGroupId; + } + $doc->sp_site = array_unique($sites); + + $wktPrimaryList = $resource->getData('base.geo.wkt.primary'); + if (is_array($wktPrimaryList)) { + $allWkt = []; + foreach ($wktPrimaryList as $wkt) { + $allWkt[] = $wkt; + } + if (count($allWkt) > 0) { + $doc->sp_geo_points = $allWkt; + } + } + + $categoryList = $resource->getData('metadata.categories'); + if (is_array($categoryList)) { + $categoryIdList = []; + foreach ($categoryList as $category) { + $categoryIdList[] = $category['id']; + } + $doc->sp_category = $categoryIdList; + } + + $categoryPath = $resource->getData('metadata.categoriesPath'); + if (is_array($categoryPath)) { + $categoryIdPath = []; + foreach ($categoryPath as $category) { + $categoryIdPath[] = $category['id']; + } + $doc->sp_category_path = $categoryIdPath; + } + + $groupPath = $resource->getData('init.groupPath'); + $groupPathAsIdList = []; + if (is_array($groupPath)) { + foreach ($groupPath as $group) { + $groupPathAsIdList[] = $group['id']; + } + } + $doc->sp_group = $groupPathAsIdList[count($groupPathAsIdList) - 2]; + $doc->sp_group_path = $groupPathAsIdList; + + $schedulingList = $resource->getData('metadata.scheduling'); + if (is_array($schedulingList) && count($schedulingList) > 0) { + $doc->sp_date = $this->toDateTime($schedulingList[0]['from']); + $dateList = []; + $contentTypeList = []; + foreach ($schedulingList as $scheduling) { + $contentTypeList[] = explode(' ', $scheduling['contentType']); + $dateList[] = $this->toDateTime($scheduling['from']); + } + $doc->sp_contenttype = array_merge( + $doc->sp_contenttype, + ...$contentTypeList + ); + $doc->sp_contenttype = array_unique($doc->sp_contenttype); + + $doc->sp_date_list = $dateList; + } + + $contentType = $resource->getData('base.mime'); + if ($contentType === null) { + $contentType = 'text/html; charset=UTF-8'; + } + $doc->meta_content_type = $contentType; + $doc->content = $resource->getData('searchindexdata.content'); + + $accessType = $resource->getData('init.access.type'); + $groups = $resource->getData('init.access.groups'); + + + if ($accessType === 'allow' && is_array($groups)) { + $doc->include_groups = array_map( + fn($id): int => $this->idWithoutSignature($id), + $groups + ); + } elseif ($accessType === 'deny' && is_array($groups)) { + $doc->exclude_groups = array_map( + fn($id): int => $this->idWithoutSignature($id), + $groups + ); + } else { + $doc->exclude_groups = ['none']; + $doc->include_groups = ['all']; + } + + $doc->sp_source = ['internal']; + + return $doc; + } + + private function idWithoutSignature(string $id): int + { + $s = substr($id, -11); + return (int)$s; + } + + /* Customization + * - https://gitlab.sitepark.com/customer-projects/fhdo/blob/develop/fhdo-module/src/publish/php/SP/Fhdo/Component/Content/DetailPage/StartletterIndexSupplier.php#L31 + * - https://gitlab.sitepark.com/apis/sitekit-php/blob/develop/php/SP/SiteKit/Component/Content/NewsdeskRss.php#L235 + * - https://gitlab.sitepark.com/customer-projects/fhdo/blob/develop/fhdo-module/src/publish/php/SP/Fhdo/Component/SearchMetadataExtension.php#L41 + * - https://gitlab.sitepark.com/customer-projects/paderborn/blob/develop/paderborn-module/src/publish/php/SP/Paderborn/Component/FscEntity.php#L67 + * - https://gitlab.sitepark.com/customer-projects/paderborn/blob/develop/paderborn-module/src/publish/php/SP/Paderborn/Component/FscContactPerson.php#L24 + * - https://gitlab.sitepark.com/customer-projects/stadtundland/blob/develop/stadtundland-module/src/publish/php/SP/Stadtundland/Component/ParkingSpaceExpose.php#L38 + * - https://gitlab.sitepark.com/customer-projects/stadtundland/blob/develop/stadtundland-module/src/publish/php/SP/Stadtundland/Component/Expose.php#L38 + * - https://gitlab.sitepark.com/customer-projects/stadtundland/blob/develop/stadtundland-module/src/publish/php/SP/Stadtundland/Component/PurchaseExpose.php#L38 + * - https://gitlab.sitepark.com/customer-projects/stuttgart/blob/develop/stuttgart-module/src/publish/php/SP/Stuttgart/Component/JobOffer.php#L29 + * - https://gitlab.sitepark.com/customer-projects/stuttgart/blob/develop/stuttgart-module/src/publish/php/SP/Stuttgart/Component/EventsCalendarExtension.php#L124 + * - https://gitlab.sitepark.com/ies-modules/citycall/blob/develop/citycall-module/src/main/php/src/SP/CityCall/Component/Intro.php#L51 + * - https://gitlab.sitepark.com/ies-modules/citycall/blob/develop/citycall-module/src/main/php/src/SP/CityCall/Controller/Environment.php#L76 + * - https://gitlab.sitepark.com/ies-modules/sitekit-real-estate/blob/develop/src/publish/php/SP/RealEstate/Component/Expose.php#L47 + */ + + private function getLocaleFromResource(Resource $resource): string + { + + $locale = $resource->getData('init.locale'); + if ($locale !== null) { + return $locale; + } + $groupPath = $resource->getData('init.groupPath'); + $len = count($groupPath); + if (is_array($groupPath)) { + for ($i = $len - 1; $i >= 0; $i--) { + $group = $groupPath[$i]; + if (isset($group['locale'])) { + return $group['locale']; + } + } + } + + return 'de_DE'; + } + + private function toLangFromLocale(string $locale): string + { + if (str_contains($locale, '_')) { + $parts = explode('_', $locale); + return $parts[0]; + } + return $locale; + } + + private function toDateTime(?int $timestamp): ?DateTime + { + if ($timestamp === null) { + return null; + } + if ($timestamp <= 0) { + return null; + } + + $dateTime = new DateTime(); + $dateTime->setTimestamp($timestamp); + return $dateTime; + } + + private function getParentSiteGroupIdList(Resource $resource): array + { + $parents = $this->getNavigationParents($resource); + if (empty($parents)) { + return []; + } + + $siteGroupIdList = []; + foreach ($parents as $parent) { + if (isset($parent['siteGroup']['id'])) { + $siteGroupIdList[] = $parent['siteGroup']['id']; + } + } + + return $siteGroupIdList; + } + + /** + * @return array + */ + private function getNavigationParents(Resource $resource): array + { + $parents = $resource->getData('base.trees.navigation.parents'); + return $parents ?? []; + } +} diff --git a/src/Service/Indexer/SolrIndexer.php b/src/Service/Indexer/SolrIndexer.php new file mode 100644 index 0000000..4bf37ee --- /dev/null +++ b/src/Service/Indexer/SolrIndexer.php @@ -0,0 +1,215 @@ + $documentEnricherList + */ + public function __construct( + private readonly iterable $documentEnricherList, + private readonly IndexerProgressHandler $indexerProgressHandler, + private readonly ResourceLoader $resourceLoader, + private readonly SolrClientFactory $clientFactory, + private readonly string $source + ) { + } + + public function index(IndexerParameter $parameter): void + { + $finder = new LocationFinder($parameter->basePath); + if (empty($parameter->directories)) { + $pathList = $finder->findAll(); + } else { + $pathList = $finder->findInSubdirectories($parameter->directories); + } + $this->indexResources($parameter, $pathList); + } + + /** + * @param array $pathList + */ + private function indexResources( + IndexerParameter $parameter, + array $pathList + ): void { + if (count($pathList) === 0) { + return; + } + + $total = count($pathList); + $this->indexerProgressHandler->start($total); + + $processId = uniqid('', true); + $offset = 0; + $chunkSize = 500; + $successCount = 0; + + try { + while (true) { + $indexedCount = $this->indexChunks( + $processId, + $parameter->coreId, + $pathList, + $offset, + $chunkSize + ); + if ($indexedCount === false) { + break; + } + $successCount += $indexedCount; + $offset += $chunkSize; + } + + if ( + $parameter->cleanupThreshold > 0 && + $successCount >= $parameter->cleanupThreshold + ) { + $this->deleteByProcessId($parameter->coreId, $processId); + } + $this->commit($parameter->coreId); + } finally { + $this->indexerProgressHandler->finish(); + } + } + + /** + * @param string[] $pathList + */ + private function indexChunks( + string $processId, + string $solrCore, + array $pathList, + int $offset, + int $length + ): int|false { + $resourceList = $this->loadResources( + $pathList, + $offset, + $length + ); + if ($resourceList === false) { + return false; + } + $this->indexerProgressHandler->advance(count($resourceList)); + $result = $this->add($solrCore, $processId, $resourceList); + + if ($result->getStatus() !== 0) { + $this->indexerProgressHandler->error(new Exception( + $result->getResponse()->getStatusMessage() + )); + return 0; + } + + return count($resourceList); + } + + /** + * @param string[] $pathList + * @return Resource[]|false + */ + private function loadResources( + array $pathList, + int $offset, + int $length + ): array | false { + + $maxLength = (count($pathList) ?? 0) - $offset; + if ($maxLength <= 0) { + return false; + } + + if ($length > $maxLength) { + $length = $maxLength; + } + + $resourceList = []; + for ($i = $offset; $i < ($length + $offset); $i++) { + $path = $pathList[$i]; + try { + $resource = $this->resourceLoader->load($path); + $resourceList[] = $resource; + } catch (InvalidResourceException $e) { + $this->indexerProgressHandler->error($e); + } + } + return $resourceList; + } + + /** + * @param string $solrCore + * @param string $processId + * @param array $resources + * @return ResultInterface|Result + */ + private function add( + string $solrCore, + string $processId, + array $resources + ): ResultInterface|Result { + $client = $this->clientFactory->create($solrCore); + + $update = $client->createUpdate(); + + $documents = []; + foreach ($resources as $resource) { + $doc = $update->createDocument(); + foreach ($this->documentEnricherList as $enricher) { + $doc = $enricher->enrichDocument( + $resource, + $doc, + $processId + ); + } + $documents[] = $doc; + } + + // add the documents and a commit command to the update query + $update->addDocuments($documents); + + // this executes the query and returns the result + return $client->update($update); + } + + private function deleteByProcessId(string $core, string $processId): void + { + $this->deleteByQuery( + $core, + '-crawl_process_id:' . $processId . ' AND ' . + ' sp_source:' . $this->source + ); + } + + private function deleteByQuery(string $core, string $query): void + { + $client = $this->clientFactory->create($core); + $update = $client->createUpdate(); + $update->addDeleteQuery($query); + $client->update($update); + } + + private function commit(string $core): void + { + $client = $this->clientFactory->create($core); + $update = $client->createUpdate(); + $update->addCommit(); + $update->addOptimize(); + $client->update($update); + } +} diff --git a/src/Service/Search/ExternalResourceFactory.php b/src/Service/Search/ExternalResourceFactory.php new file mode 100644 index 0000000..9d09cf9 --- /dev/null +++ b/src/Service/Search/ExternalResourceFactory.php @@ -0,0 +1,38 @@ +url; + return ( + str_starts_with($location, 'http://') || + str_starts_with($location, 'https://') + ); + } + + public function create(Document $document): Resource + { + return new Resource( + $document->url, + '', + $document->title, + 'external', + [] + ); + } +} diff --git a/src/Service/Search/InternalMediaResourceFactory.php b/src/Service/Search/InternalMediaResourceFactory.php new file mode 100644 index 0000000..753e138 --- /dev/null +++ b/src/Service/Search/InternalMediaResourceFactory.php @@ -0,0 +1,42 @@ +getMetaLocation($document); + return $this->resourceLoader->exists($metaLocation); + } + + public function create(Document $document): Resource + { + $metaLocation = $this->getMetaLocation($document); + return $this->resourceLoader->load($metaLocation); + } + + private function getMetaLocation(Document $document): string + { + return $document->url . '.meta.php'; + } +} diff --git a/src/Service/Search/InternalResourceFactory.php b/src/Service/Search/InternalResourceFactory.php new file mode 100644 index 0000000..0217adc --- /dev/null +++ b/src/Service/Search/InternalResourceFactory.php @@ -0,0 +1,33 @@ +url, '.php'); + } + + public function create(Document $document): Resource + { + return $this->resourceLoader->load($document->url); + } +} diff --git a/src/Service/Search/ResourceFactory.php b/src/Service/Search/ResourceFactory.php new file mode 100644 index 0000000..6fbaea0 --- /dev/null +++ b/src/Service/Search/ResourceFactory.php @@ -0,0 +1,27 @@ +getEDisMax(); + $edismax->setQueryFields(implode(' ', [ + 'sp_title^1.4', + 'keywords^1.2', + 'description^1.0', + 'title^1.0', + 'url^0.9', + 'content^0.8' + ])); + $edismax->setPhraseFields(implode(' ', [ + 'sp_title^1.5', + 'description^1', + 'content^0.8' + ])); + $edismax->setBoostQuery('sp_objecttype:searchTip^100'); + + return $query; + } +} diff --git a/src/Service/Search/SolrMoreLikeThis.php b/src/Service/Search/SolrMoreLikeThis.php new file mode 100644 index 0000000..11e87b8 --- /dev/null +++ b/src/Service/Search/SolrMoreLikeThis.php @@ -0,0 +1,77 @@ + $resourceFactoryList + */ + public function __construct( + private readonly SolrClientFactory $clientFactory, + private readonly SolrResultToResourceResolver $resultToResourceResolver + ) { + } + + public function moreLikeThis(MoreLikeThisQuery $query): ResourceSearchResult + { + $client = $this->clientFactory->create($query->getCore()); + $solrQuery = $this->buildSolrQuery($client, $query); + $result = $client->execute($solrQuery); + return $this->buildResult($result); + } + + private function buildSolrQuery( + Client $client, + MoreLikeThisQuery $query + ): SolrMoreLikeThisQuery { + + $solrQuery = $client->createMoreLikeThis(); + $solrQuery->setOmitHeader(false); + $solrQuery->setQuery('url:"' . $query->getLocation() . '"'); + $solrQuery->setMltFields($query->getFieldList()); + $solrQuery->setRows($query->getLimit()); + $solrQuery->setMinimumTermFrequency(2); + $solrQuery->setMatchInclude(true); + $solrQuery->createFilterQuery('nomedia') + ->setQuery('-sp_objecttype:media'); + + // Filter + foreach ($query->getFilterList() as $filter) { + $solrQuery->createFilterQuery($filter->getKey()) + ->setQuery($filter->getQuery()) + ->setTags($filter->getTags()); + } + + return $solrQuery; + } + + private function buildResult( + ResultInterface $result + ): ResourceSearchResult { + + $resourceList = $this->resultToResourceResolver + ->loadResourceList($result); + + return new ResourceSearchResult( + $result->getNumFound(), + 0, + $resourceList, + [], + $result->getQueryTime() + ); + } +} diff --git a/src/Service/Search/SolrQueryModifier.php b/src/Service/Search/SolrQueryModifier.php new file mode 100644 index 0000000..ab51430 --- /dev/null +++ b/src/Service/Search/SolrQueryModifier.php @@ -0,0 +1,17 @@ + $resourceFactoryList + */ + public function __construct( + private readonly iterable $resourceFactoryList, + private readonly LoggerInterface $logger = new NullLogger() + ) { + } + + /** + * @return array + */ + public function loadResourceList(ResultInterface $result): array + { + $resourceList = []; + foreach ($result as $document) { + try { + $resourceList[] = $this->loadResource($document); + } catch (\Exception $e) { + $this->logger->error($e->getMessage(), ['exception' => $e]); + } + } + return $resourceList; + } + + private function loadResource(Document $document): Resource + { + + foreach ($this->resourceFactoryList as $resourceFactory) { + if ($resourceFactory->accept($document)) { + return $resourceFactory->create($document); + } + } + + throw new MissMatchingResourceFactoryException($document->url); + } +} diff --git a/src/Service/Search/SolrSelect.php b/src/Service/Search/SolrSelect.php new file mode 100644 index 0000000..b5b289a --- /dev/null +++ b/src/Service/Search/SolrSelect.php @@ -0,0 +1,260 @@ + $solrQueryModifierList + */ + public function __construct( + private readonly SolrClientFactory $clientFactory, + private readonly iterable $solrQueryModifierList, + private readonly SolrResultToResourceResolver $resultToResourceResolver + ) { + } + + public function select(SelectQuery $query): ResourceSearchResult + { + $client = $this->clientFactory->create($query->getCore()); + + $solrQuery = $this->buildSolrQuery($client, $query); + $result = $client->execute($solrQuery); + return $this->buildResult($query, $result); + } + + private function buildSolrQuery( + Client $client, + SelectQuery $query + ): SolrSelectQuery { + + $solrQuery = $client->createSelect(); + + // supplements the query with standard values, e.g. for boosting + foreach ($this->solrQueryModifierList as $solrQueryModifier) { + $solrQuery = $solrQueryModifier->modify($solrQuery); + } + + $solrQuery->setStart($query->getOffset()); + $solrQuery->setRows($query->getLimit()); + + // to get query-time + $solrQuery->setOmitHeader(false); + + $this->addRequiredFieldListToSolrQuery($solrQuery); + $this->addTextFilterToSolrQuery($solrQuery, $query->getText()); + $this->addQueryDefaultOperatorToSolrQuery( + $solrQuery, + $query->getQueryDefaultOperator() + ); + $this->addFilterQueriesToSolrQuery( + $solrQuery, + $query->getFilterList() + ); + $this->addFacetListToSolrQuery( + $solrQuery, + $query->getFacetList() + ); + + return $solrQuery; + } + + private function addRequiredFieldListToSolrQuery( + SolrSelectQuery $solrQuery + ): void { + $fields = $solrQuery->getFields(); + if (in_array('url', $fields, true)) { + return; + } + $fields[] = 'url'; + $solrQuery->setFields($fields); + } + + private function addTextFilterToSolrQuery( + SolrSelectQuery $solrQuery, + string $text + ): void { + if (empty($text)) { + return; + } + $terms = explode(' ', $text); + $terms = array_map(function ($term) use ($solrQuery) { + $term = trim($term); + return $solrQuery->getHelper()->escapeTerm($term); + }, + $terms); + $text = implode(' ', $terms); + $solrQuery->setQuery($text); + } + + private function addQueryDefaultOperatorToSolrQuery( + SolrSelectQuery $solrQuery, + QueryDefaultOperator $operator + ): void { + if ($operator === QueryDefaultOperator::OR) { + $solrQuery->setQueryDefaultOperator( + SolrSelectQuery::QUERY_OPERATOR_OR + ); + } else { + $solrQuery->setQueryDefaultOperator( + SolrSelectQuery::QUERY_OPERATOR_AND + ); + } + } + + /** + * @param Filter[] $filterList + */ + private function addFilterQueriesToSolrQuery( + SolrSelectQuery $solrQuery, + array $filterList + ): void { + + foreach ($filterList as $filter) { + $solrQuery->createFilterQuery($filter->getKey()) + ->setQuery($filter->getQuery()) + ->setTags($filter->getTags()); + } + } + + /** + * @param \Atoolo\Search\Dto\Search\Query\Facet\Facet[] $filterList + */ + private function addFacetListToSolrQuery( + SolrSelectQuery $solrQuery, + array $facetList + ): void { + foreach ($facetList as $facet) { + if ($facet instanceof FacetField) { + $this->addFacetFieldToSolrQuery($solrQuery, $facet); + } elseif ($facet instanceof FacetQuery) { + $this->addFacetQueryToSolrQuery($solrQuery, $facet); + } elseif ($facet instanceof FacetMultiQuery) { + $this->addFacetMultiQueryToSolrQuery($solrQuery, $facet); + } else { + throw new \InvalidArgumentException( + 'Unsupported facet-class ' . get_class($facet) + ); + } + } + } + + /** + * https://solarium.readthedocs.io/en/stable/queries/select-query/building-a-select-query/components/facetset-component/facet-field/ + */ + private function addFacetFieldToSolrQuery( + SolrSelectQuery $solrQuery, + FacetField $facet + ): void { + $facetSet = $solrQuery->getFacetSet(); + // https://solr.apache.org/guide/solr/latest/query-guide/faceting.html#tagging-and-excluding-filters + $fieldWithExclude = '{!ex=' . $facet->getKey() . '}' . + $facet->getField(); + $facetSet->createFacetField($facet->getKey()) + ->setField($fieldWithExclude) + ->setTerms($facet->getTerms()); + } + + /** + * https://solarium.readthedocs.io/en/stable/queries/select-query/building-a-select-query/components/facetset-component/facet-query/ + */ + private function addFacetQueryToSolrQuery( + SolrSelectQuery $solrQuery, + FacetQuery $facet + ): void { + $facetSet = $solrQuery->getFacetSet(); + $facetSet->createFacetQuery($facet->getKey()) + ->setQuery($facet->getQuery()); + } + + /** + * https://solarium.readthedocs.io/en/stable/queries/select-query/building-a-select-query/components/facetset-component/facet-multiquery/ + */ + private function addFacetMultiQueryToSolrQuery( + SolrSelectQuery $solrQuery, + FacetMultiQuery $facet + ): void { + $facetSet = $solrQuery->getFacetSet(); + $solrFacet = $facetSet->createFacetMultiQuery($facet->getKey()); + foreach ($facet->getQueryList() as $facetQuery) { + $solrFacet->createQuery( + $facetQuery->getKey(), + $facetQuery->getQuery() + ); + } + } + + private function buildResult( + SelectQuery $query, + ResultInterface $result + ): ResourceSearchResult { + + $resourceList = $this->resultToResourceResolver + ->loadResourceList($result); + $facetGroupList = $this->buildFacetGroupList($query, $result); + + return new ResourceSearchResult( + $result->getNumFound(), + $query->getOffset(), + $resourceList, + $facetGroupList, + $result->getQueryTime() + ); + } + + /** + * @param ResultInterface $result + * @return FacetGroup[] + */ + private function buildFacetGroupList( + SelectQuery $query, + ResultInterface $result + ): array { + + $facetSet = $result->getFacetSet(); + if ($facetSet === null) { + return []; + } + + $facetGroupList = []; + foreach ($query->getFacetList() as $facet) { + $facetGroupList[] = $this->buildFacetGroup( + $facet->getKey(), + $facetSet->getFacet($facet->getKey()) + ); + } + return $facetGroupList; + } + + private function buildFacetGroup( + string $key, + FacetResultInterface $solrFacet + ): FacetGroup { + $facetList = []; + foreach ($solrFacet as $value => $count) { + $facetList[] = new Facet((string)$value, $count); + } + return new FacetGroup($key, $facetList); + } +} diff --git a/src/Service/Search/SolrSuggest.php b/src/Service/Search/SolrSuggest.php new file mode 100644 index 0000000..17b9a48 --- /dev/null +++ b/src/Service/Search/SolrSuggest.php @@ -0,0 +1,127 @@ +clientFactory->create($query->getCore()); + + $solrQuery = $this->buildSolrQuery($client, $query); + $solrResult = $client->select($solrQuery); + return $this->buildResult($solrResult, $query->getField()); + } + + private function buildSolrQuery( + Client $client, + SuggestQuery $query + ): SolrSelectQuery { + $solrQuery = $client->createSelect(); + $solrQuery->addParam("spellcheck", "true"); + $solrQuery->addParam("spellcheck.accuracy", "0.6"); + $solrQuery->addParam("spellcheck.onlyMorePopular", "false"); + $solrQuery->addParam("spellcheck.count", "15"); + $solrQuery->addParam("spellcheck.maxCollations", "5"); + $solrQuery->addParam("spellcheck.maxCollationTries", "15"); + $solrQuery->addParam("spellcheck.collate", "true"); + $solrQuery->addParam("spellcheck.collateExtendedResults", "true"); + $solrQuery->addParam("spellcheck.extendedResults", "true"); + $solrQuery->addParam("facet", "true"); + $solrQuery->addParam("facet.sort", "count"); + $solrQuery->addParam("facet.method", "enum"); + $solrQuery->addParam( + "facet.prefix", + implode(' ', $query->getTermList()) + ); + $solrQuery->addParam("facet.limit", $query->getLimit()); + $solrQuery->addParam("facet.field", $query->getField()); + + $solrQuery->setOmitHeader(false); + $solrQuery->setStart(0); + $solrQuery->setRows(0); + + // Filter + foreach ($query->getFilterList() as $filter) { + $solrQuery->createFilterQuery($filter->getKey()) + ->setQuery($filter->getQuery()) + ->setTags($filter->getTags()); + } + + return $solrQuery; + } + + private function buildResult( + SolrSelectResult $solrResult, + string $resultField + ): SuggestResult { + $suggestions = $this->parseSuggestion( + $solrResult->getResponse()->getBody(), + $resultField + ); + return new SuggestResult($suggestions, $solrResult->getQueryTime()); + } + + /** + * @throws UnexpectedResultException + * @return Suggestion[] + */ + private function parseSuggestion( + string $responseBody, + string $facetField + ): array { + try { + $json = json_decode( + $responseBody, + true, + 5, + JSON_THROW_ON_ERROR + ); + $facets = + $json['facet_counts']['facet_fields'][$facetField] + ?? []; + + $len = count($facets); + + $suggestions = []; + for ($i = 0; $i < $len; $i += 2) { + $term = $facets[$i]; + $hits = $facets[$i + 1]; + $suggestions[] = new Suggestion($term, $hits); + } + + return $suggestions; + } catch (JsonException $e) { + throw new UnexpectedResultException( + $responseBody, + "Invalid JSON for suggest result", + 0, + $e + ); + } + } +} diff --git a/src/Service/SolrClientFactory.php b/src/Service/SolrClientFactory.php new file mode 100644 index 0000000..ce26af6 --- /dev/null +++ b/src/Service/SolrClientFactory.php @@ -0,0 +1,16 @@ +setTimeout(30); + //$adapter->setProxy('http://localhost:8889'); + $eventDispatcher = new EventDispatcher(); + $config = [ + 'endpoint' => [ + $host => [ + 'scheme' => 'https', + 'host' => $host, + 'port' => 443, + 'path' => '', + 'core' => $core, + ] + ] + ]; + + // create a client instance + return new Client( + $adapter, + $eventDispatcher, + $config + ); + } +} diff --git a/src/SuggestSearcher.php b/src/SuggestSearcher.php new file mode 100644 index 0000000..e5ff274 --- /dev/null +++ b/src/SuggestSearcher.php @@ -0,0 +1,19 @@ +get('atoolo:indexer'); + $this->assertInstanceOf( + Indexer::class, + $command, + 'unexpected indexer command' + ); + } +} diff --git a/test/Console/Command/IndexerTest.php b/test/Console/Command/IndexerTest.php new file mode 100644 index 0000000..855b898 --- /dev/null +++ b/test/Console/Command/IndexerTest.php @@ -0,0 +1,41 @@ +find('atoolo:indexer'); + $commandTester = new CommandTester($command); + $commandTester->execute([ + // pass arguments to the helper + 'resource-dir' => 'abc', + + // prefix the key with two dashes when passing options, + // e.g: '--some-option' => 'option_value', + // use brackets for testing array value, + // e.g: '--some-option' => ['option_value'], + ]); + + $commandTester->assertCommandIsSuccessful(); + + // the output of the command in the console + $output = $commandTester->getDisplay(); + $this->assertStringContainsString('Whoa!', $output); + + // ... + } + +} diff --git a/test/Service/IndexerTest.php b/test/Service/IndexerTest.php new file mode 100644 index 0000000..ae1a00d --- /dev/null +++ b/test/Service/IndexerTest.php @@ -0,0 +1,19 @@ +index(); + */ + } +}