diff --git a/composer.json b/composer.json index cb78212..aef39f8 100644 --- a/composer.json +++ b/composer.json @@ -21,14 +21,15 @@ "prefer-stable": true, "require": { "php": ">=8.1 <8.4.0", - "atoolo/resource": "dev-feature/hierarchy-loader", + "atoolo/resource": "dev-feature/resource-base-locator", "solarium/solarium": "^6.3", - "symfony/config": "^6.3", - "symfony/console": "^6.3", - "symfony/dependency-injection": "^6.3", - "symfony/event-dispatcher": "^6.3", - "symfony/finder": "^6.3", - "symfony/yaml": "^6.3" + "symfony/config": "^6.3 | ^7.0", + "symfony/console": "^6.3 | ^7.0", + "symfony/dependency-injection": "^6.3 | ^7.0", + "symfony/event-dispatcher": "^6.3 | ^7.0", + "symfony/finder": "^6.3 | ^7.0", + "symfony/yaml": "^6.3 | ^7.0", + "symfony/lock": "^6.3 | ^7.0" }, "require-dev": { "dealerdirect/phpcodesniffer-composer-installer": "^1.0", @@ -77,5 +78,19 @@ "*": "dist" }, "sort-packages": true + }, + "extra": { + "composer-link": { + "atoolo/resource": { + "dev": false, + "version": "dev-feature/hierarchy-loader" + } + } + }, + "repositories": { + "atoolo/resource": { + "type": "path", + "url": "/home/veltrup/.cache/composer/link/atoolo/resource" + } } } diff --git a/src/Console/Command/Indexer.php b/src/Console/Command/Indexer.php index 491a400..8629b38 100644 --- a/src/Console/Command/Indexer.php +++ b/src/Console/Command/Indexer.php @@ -7,6 +7,7 @@ use Atoolo\Resource\Exception\InvalidResourceException; use Atoolo\Resource\Loader\SiteKitLoader; use Atoolo\Resource\Loader\SiteKitNavigationHierarchyLoader; +use Atoolo\Resource\Loader\StaticResourceBaseLocator; use Atoolo\Search\Console\Command\Io\IndexerProgressProgressBar; use Atoolo\Search\Dto\Indexer\IndexerParameter; use Atoolo\Search\Service\Indexer\SiteKit\DefaultSchema21DocumentEnricher; @@ -28,12 +29,19 @@ class Indexer extends Command { private IndexerProgressProgressBar $progressBar; private SymfonyStyle $io; + + private InputInterface $input; private string $resourceDir; protected function configure(): void { $this ->setHelp('Command to fill a search index') + ->addArgument( + 'solr-connection-url', + InputArgument::REQUIRED, + 'Solr connection url.' + ) ->addArgument( 'solr-core', InputArgument::REQUIRED, @@ -66,16 +74,14 @@ protected function execute( OutputInterface $output ): int { + $this->input = $input; $this->io = new SymfonyStyle($input, $output); $this->progressBar = new IndexerProgressProgressBar($output); - $this->resourceDir = $this->getStringArgument( - $input, - 'resource-dir' - ); + $this->resourceDir = $this->getStringArgument('resource-dir'); $directories = (array)$input->getArgument('directories'); $cleanupThreshold = empty($directories) - ? $this->getIntArgument($input, 'cleanup-threshold') + ? $this->getIntArgument('cleanup-threshold', 0) : 0; if (empty($directories)) { @@ -86,8 +92,7 @@ protected function execute( } $parameter = new IndexerParameter( - $this->getStringArgument($input, 'solr-core'), - $this->resourceDir, + $this->getStringArgument('solr-core'), $cleanupThreshold, $directories ); @@ -100,25 +105,23 @@ protected function execute( return Command::SUCCESS; } - private function getStringArgument( - InputInterface $input, - string $name - ): string { - $value = $input->getArgument($name); + private function getStringArgument(string $name): string + { + $value = $this->input->getArgument($name); if (!is_string($value)) { throw new InvalidArgumentException( $name . ' must be a string' ); } - return strval($value); - return (string)$value; + return $value; } - private function getIntArgument( - InputInterface $input, - string $name - ): int { - $value = $input->getArgument($name); + private function getIntArgument(string $name, int $default): int + { + if (!$this->input->hasArgument($name)) { + return $default; + } + $value = $this->input->getArgument($name); if (!is_int($value)) { throw new InvalidArgumentException( $name . ' must be a integer' @@ -143,7 +146,10 @@ protected function errorReport(): void protected function createIndexer(): SolrIndexer { - $resourceLoader = new SiteKitLoader($this->resourceDir); + $resourceBaseLocator = new StaticResourceBaseLocator( + $this->resourceDir + ); + $resourceLoader = new SiteKitLoader($resourceBaseLocator); $navigationLoader = new SiteKitNavigationHierarchyLoader( $resourceLoader ); @@ -151,10 +157,21 @@ protected function createIndexer(): SolrIndexer $navigationLoader ); - $clientFactory = new SolrParameterClientFactory(); + $url = parse_url($this->getStringArgument('solr-connection-url')); + + $clientFactory = new SolrParameterClientFactory( + $url['scheme'], + $url['host'], + $url['port'] ?? ($url['scheme'] === 'https' ? 443 : 8382), + $url['path'] ?? '', + null, + 0 + ); + return new SolrIndexer( [$schema21], $this->progressBar, + $resourceBaseLocator, $resourceLoader, $clientFactory, 'internal' diff --git a/src/Console/Command/MoreLikeThis.php b/src/Console/Command/MoreLikeThis.php index ab8cc33..475a3fb 100644 --- a/src/Console/Command/MoreLikeThis.php +++ b/src/Console/Command/MoreLikeThis.php @@ -5,6 +5,7 @@ namespace Atoolo\Search\Console\Command; use Atoolo\Resource\Loader\SiteKitLoader; +use Atoolo\Resource\Loader\StaticResourceBaseLocator; use Atoolo\Search\Dto\Search\Query\MoreLikeThisQuery; use Atoolo\Search\Dto\Search\Result\ResourceSearchResult; use Atoolo\Search\Service\Search\ExternalResourceFactory; @@ -28,6 +29,7 @@ class MoreLikeThis extends Command { private SymfonyStyle $io; + private InputInterface $input; private string $solrCore; private string $resourceDir; @@ -35,6 +37,11 @@ protected function configure(): void { $this ->setHelp('Command to performs a more-like-this search') + ->addArgument( + 'solr-connection-url', + InputArgument::REQUIRED, + 'Solr connection url.' + ) ->addArgument( 'solr-core', InputArgument::REQUIRED, @@ -58,6 +65,7 @@ protected function execute( OutputInterface $output ): int { + $this->input = $input; $this->io = new SymfonyStyle($input, $output); $this->solrCore = $input->getArgument('solr-core'); @@ -74,8 +82,19 @@ protected function execute( protected function createSearcher(): SolrMoreLikeThis { - $resourceLoader = new SiteKitLoader($this->resourceDir); - $clientFactory = new SolrParameterClientFactory(); + $resourceBaseLocator = new StaticResourceBaseLocator( + $this->resourceDir + ); + $resourceLoader = new SiteKitLoader($resourceBaseLocator); + $url = parse_url($this->input->getArgument('solr-connection-url')); + $clientFactory = new SolrParameterClientFactory( + $url['scheme'], + $url['host'], + $url['port'] ?? ($url['scheme'] === 'https' ? 443 : 8983), + $url['path'] ?? '', + null, + 0 + ); $resourceFactoryList = [ new ExternalResourceFactory(), new InternalResourceFactory($resourceLoader), diff --git a/src/Console/Command/Search.php b/src/Console/Command/Search.php index cf8b0e2..49fd4fe 100644 --- a/src/Console/Command/Search.php +++ b/src/Console/Command/Search.php @@ -5,6 +5,7 @@ namespace Atoolo\Search\Console\Command; use Atoolo\Resource\Loader\SiteKitLoader; +use Atoolo\Resource\Loader\StaticResourceBaseLocator; use Atoolo\Search\Dto\Search\Query\SelectQuery; use Atoolo\Search\Dto\Search\Result\ResourceSearchResult; use Atoolo\Search\Service\Search\ExternalResourceFactory; @@ -28,6 +29,7 @@ class Search extends Command { private SymfonyStyle $io; + private InputInterface $input; private string $index; private string $resourceDir; @@ -59,6 +61,7 @@ protected function execute( OutputInterface $output ): int { + $this->input = $input; $this->io = new SymfonyStyle($input, $output); $this->resourceDir = $input->getArgument('resource-dir'); $this->index = $input->getArgument('index'); @@ -75,8 +78,19 @@ protected function execute( protected function createSearch(): SolrSelect { - $resourceLoader = new SiteKitLoader($this->resourceDir); - $clientFactory = new SolrParameterClientFactory(); + $resourceBaseLocator = new StaticResourceBaseLocator( + $this->resourceDir + ); + $resourceLoader = new SiteKitLoader($resourceBaseLocator); + $url = parse_url($this->input->getArgument('solr-connection-url')); + $clientFactory = new SolrParameterClientFactory( + $url['scheme'], + $url['host'], + $url['port'] ?? ($url['scheme'] === 'https' ? 443 : 8983), + $url['path'] ?? '', + null, + 0 + ); $defaultBoosting = new DefaultBoostModifier(); $resourceFactoryList = [ diff --git a/src/Console/Command/Suggest.php b/src/Console/Command/Suggest.php index a431467..e40638b 100644 --- a/src/Console/Command/Suggest.php +++ b/src/Console/Command/Suggest.php @@ -23,6 +23,7 @@ )] class Suggest extends Command { + private InputInterface $input; private SymfonyStyle $io; private string $solrCore; @@ -47,7 +48,7 @@ protected function execute( InputInterface $input, OutputInterface $output ): int { - + $this->input = $input; $this->io = new SymfonyStyle($input, $output); $this->solrCore = $input->getArgument('solr-core'); $terms = $input->getArgument('terms'); @@ -65,6 +66,15 @@ protected function execute( protected function createSearcher(): SolrSuggest { $clientFactory = new SolrParameterClientFactory(); + $url = parse_url($this->input->getArgument('solr-connection-url')); + $clientFactory = new SolrParameterClientFactory( + $url['scheme'], + $url['host'], + $url['port'] ?? ($url['scheme'] === 'https' ? 443 : 8983), + $url['path'] ?? '', + null, + 0 + ); return new SolrSuggest($clientFactory); } @@ -74,7 +84,7 @@ protected function buildQuery(array $terms): SuggestQuery $excludeMedia = $excludeMedia->exclude(); return new SuggestQuery( $this->solrCore, - $terms, + implode(' ', $terms), [ new ArchiveFilter(), $excludeMedia diff --git a/src/Dto/Indexer/IndexerParameter.php b/src/Dto/Indexer/IndexerParameter.php index b1ce85e..e343f40 100644 --- a/src/Dto/Indexer/IndexerParameter.php +++ b/src/Dto/Indexer/IndexerParameter.php @@ -7,8 +7,7 @@ class IndexerParameter { public function __construct( - public readonly string $coreId, - public readonly string $basePath, + public readonly string $index, public readonly int $cleanupThreshold = 0, public readonly array $directories = [] ) { diff --git a/src/Indexer.php b/src/Indexer.php index 55d4f58..54a38ff 100644 --- a/src/Indexer.php +++ b/src/Indexer.php @@ -17,5 +17,8 @@ */ interface Indexer { - public function index(IndexerParameter $parameter): void; + /** + * @return string process id + */ + public function index(IndexerParameter $parameter): string; } diff --git a/src/Service/Indexer/BackgroundIndexer.php b/src/Service/Indexer/BackgroundIndexer.php new file mode 100644 index 0000000..52c9465 --- /dev/null +++ b/src/Service/Indexer/BackgroundIndexer.php @@ -0,0 +1,82 @@ + $documentEnricherList + */ + public function __construct( + private readonly iterable $documentEnricherList, + private readonly ResourceBaseLocator $resourceBaseLocator, + private readonly ResourceLoader $resourceLoader, + private readonly SolrClientFactory $clientFactory, + private readonly string $source, + private readonly string $statusCacheDir + ) { + $this->lockFactory = new LockFactory(new SemaphoreStore()); + if ( + !is_dir($concurrentDirectory = $this->statusCacheDir) && + !mkdir($concurrentDirectory) && + !is_dir($concurrentDirectory) + ) { + throw new \RuntimeException(sprintf( + 'Directory "%s" was not created', + $concurrentDirectory + )); + } + } + + public function index(IndexerParameter $parameter): string + { + $lock = $this->lockFactory->createLock($parameter->index); + if (!$lock->acquire()) { + return ''; + } + try { + return $this->getIndexer($parameter->index)->index($parameter); + } finally { + $lock->release(); + } + } + + public function getStatus(string $index): ?BackgroundIndexerStatus + { + $file = $this->getStatusFile($index); + return BackgroundIndexerStatus::load($file); + } + + private function getIndexer(string $index): SolrIndexer + { + $progressHandler = new BackgroundIndexerProgressState( + $this->getStatusFile($index) + ); + return new SolrIndexer( + $this->documentEnricherList, + $progressHandler, + $this->resourceBaseLocator, + $this->resourceLoader, + $this->clientFactory, + $this->source, + ); + } + + private function getStatusFile(string $index): string + { + return $this->statusCacheDir . + '/atoolo.search.index.' . $index . ".status.json"; + } +} diff --git a/src/Service/Indexer/BackgroundIndexerProgressState.php b/src/Service/Indexer/BackgroundIndexerProgressState.php new file mode 100644 index 0000000..a500ab9 --- /dev/null +++ b/src/Service/Indexer/BackgroundIndexerProgressState.php @@ -0,0 +1,59 @@ +status = new BackgroundIndexerStatus( + new \DateTime(), + null, + $total, + 0, + 0 + ); + } + + public function advance(int $step): void + { + $this->status->processed += $step; + $this->status->store($this->file); + } + + public function error(Exception $exception): void + { + $this->status->errors++; + } + + public function finish(): void + { + $this->status->endTime = new DateTime(); + $this->status->store($this->file); + } + + /** + * @return array + */ + public function getErrors(): array + { + return []; + } + + private function getStatusLine(): string + { + return $this->status->getStatusLine(); + } +} diff --git a/src/Service/Indexer/BackgroundIndexerStatus.php b/src/Service/Indexer/BackgroundIndexerStatus.php new file mode 100644 index 0000000..a14fe89 --- /dev/null +++ b/src/Service/Indexer/BackgroundIndexerStatus.php @@ -0,0 +1,83 @@ +endTime; + if ($endTime === null) { + $endTime = new DateTime(); + } + $duration = $this->startTime->diff($endTime); + return + 'start: ' . $this->startTime->format('d.m.Y H:i') . ', ' . + 'time: ' . $duration->format('%Hh %Im %Ss') . ', ' . + 'processed: ' . $this->processed . "/" . $this->total . ', ' . + 'errors: ' . $this->errors; + } + + /** + * @throws \JsonException + */ + public static function load(string $file): ?BackgroundIndexerStatus + { + if (!file_exists($file)) { + return null; + } + $content = file_get_contents($file); + $data = json_decode( + $content, + true, + 512, + JSON_THROW_ON_ERROR + ); + + $startTime = new DateTime(); + $startTime->setTimestamp($data['startTime']); + + $endTime = null; + if ($data['endTime'] !== null) { + $endTime = new DateTime(); + $endTime->setTimestamp($data['endTime']); + } + + return new BackgroundIndexerStatus( + $startTime, + $endTime, + $data['total'], + $data['processed'], + $data['errors'] + ); + } + + /** + * @throws \JsonException + */ + public function store(string $file): void + { + $jsonString = json_encode([ + 'statusline' => $this->getStatusLine(), + 'startTime' => $this->startTime->getTimestamp(), + 'endTime' => $this->endTime?->getTimestamp(), + 'total' => $this->total, + 'processed' => $this->processed, + 'errors' => $this->errors + ], JSON_THROW_ON_ERROR); + file_put_contents($file, $jsonString); + } +} diff --git a/src/Service/Indexer/SolrIndexer.php b/src/Service/Indexer/SolrIndexer.php index 4bf37ee..c9888c2 100644 --- a/src/Service/Indexer/SolrIndexer.php +++ b/src/Service/Indexer/SolrIndexer.php @@ -6,6 +6,7 @@ use Atoolo\Resource\Exception\InvalidResourceException; use Atoolo\Resource\Resource; +use Atoolo\Resource\ResourceBaseLocator; use Atoolo\Resource\ResourceLoader; use Atoolo\Search\Dto\Indexer\IndexerParameter; use Atoolo\Search\Indexer; @@ -25,21 +26,22 @@ class SolrIndexer implements Indexer public function __construct( private readonly iterable $documentEnricherList, private readonly IndexerProgressHandler $indexerProgressHandler, + private readonly ResourceBaseLocator $resourceBaseLocator, private readonly ResourceLoader $resourceLoader, private readonly SolrClientFactory $clientFactory, private readonly string $source ) { } - public function index(IndexerParameter $parameter): void + public function index(IndexerParameter $parameter): string { - $finder = new LocationFinder($parameter->basePath); + $finder = new LocationFinder($this->resourceBaseLocator->locate()); if (empty($parameter->directories)) { $pathList = $finder->findAll(); } else { $pathList = $finder->findInSubdirectories($parameter->directories); } - $this->indexResources($parameter, $pathList); + return $this->indexResources($parameter, $pathList); } /** @@ -48,9 +50,9 @@ public function index(IndexerParameter $parameter): void private function indexResources( IndexerParameter $parameter, array $pathList - ): void { + ): string { if (count($pathList) === 0) { - return; + return ''; } $total = count($pathList); @@ -65,7 +67,7 @@ private function indexResources( while (true) { $indexedCount = $this->indexChunks( $processId, - $parameter->coreId, + $parameter->index, $pathList, $offset, $chunkSize @@ -81,9 +83,11 @@ private function indexResources( $parameter->cleanupThreshold > 0 && $successCount >= $parameter->cleanupThreshold ) { - $this->deleteByProcessId($parameter->coreId, $processId); + $this->deleteByProcessId($parameter->index, $processId); } - $this->commit($parameter->coreId); + $this->commit($parameter->index); + + return $processId; } finally { $this->indexerProgressHandler->finish(); } diff --git a/src/Service/Search/SolrSelect.php b/src/Service/Search/SolrSelect.php index cc9ed8f..d00be49 100644 --- a/src/Service/Search/SolrSelect.php +++ b/src/Service/Search/SolrSelect.php @@ -127,7 +127,8 @@ private function addFilterQueriesToSolrQuery( ): void { foreach ($filterList as $filter) { - $solrQuery->createFilterQuery($filter->getKey()) + $key = $filter->getKey() ?? uniqid('', true); + $solrQuery->createFilterQuery($key) ->setQuery($filter->getQuery()) ->setTags($filter->getTags()); } diff --git a/src/Service/SolrParameterClientFactory.php b/src/Service/SolrParameterClientFactory.php index 207c0b0..38902fd 100644 --- a/src/Service/SolrParameterClientFactory.php +++ b/src/Service/SolrParameterClientFactory.php @@ -16,21 +16,31 @@ */ class SolrParameterClientFactory implements SolrClientFactory { + public function __construct( + private readonly string $scheme, + private readonly string $host, + private readonly int $port, + private readonly string $path = '', + private readonly ?string $proxy = null, + private readonly ?int $timeout = 0 + ) { + } + public function create(string $core): Client { $host = 'solr-neu-isenburg-whinchat.veltrup.sitepark.de'; $adapter = new Curl(); - $adapter->setTimeout(30); - //$adapter->setProxy('http://localhost:8889'); + $adapter->setTimeout($this->timeout); + $adapter->setProxy($this->proxy); $eventDispatcher = new EventDispatcher(); $config = [ 'endpoint' => [ $host => [ - 'scheme' => 'https', - 'host' => $host, - 'port' => 443, - 'path' => '', + 'scheme' => $this->scheme, + 'host' => $this->host, + 'port' => $this->port, + 'path' => $this->path, 'core' => $core, ] ]