From f8f56a109b63b92668d67d1cb32254714dd21e1c Mon Sep 17 00:00:00 2001 From: Jesse Leite Date: Sat, 23 Dec 2023 02:29:03 -0500 Subject: [PATCH] Report generation performance improvements (#310) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Clean up controller logic by extracting to `data()` function. * Add basic test coverage. * Check all files instead of directories, because of how they're generated into a random structure. * Actual implementation has to be using carbon too. * Windows, yuck. * This stuff is required on Page. * Chunking overhaul, almost there. * Defer validating of pages until after all yaml pages are generated. * Test chunking. * Suppress pending page results errors when running queue workers. * Ensure we only trigger generation once, even with async queue workers. * These aren’t being used anywhere. They are defined on report. * Massively improve performance around validating unique titles and descriptions. * Cleanup and more performance improvements. * Cache key cleanup. * Only cache `toArray()` when generated and WITH pages. * Make queue chunk size configurable. * Fix resource deleter refs issue. * Ensure caches are cleared when creating and deleting, in case IDs get reused. * Update test. --- config/seo-pro.php | 4 + resources/views/reports/index.blade.php | 4 +- src/Http/Controllers/ReportController.php | 13 +- src/Reporting/Chunk.php | 91 ++++++ src/Reporting/Page.php | 46 ++- src/Reporting/Report.php | 261 ++++++++++++------ src/Reporting/Rules/UniqueMetaDescription.php | 17 +- src/Reporting/Rules/UniqueTitleTag.php | 17 +- tests/ReportTest.php | 200 ++++++++++++++ 9 files changed, 512 insertions(+), 141 deletions(-) create mode 100644 src/Reporting/Chunk.php create mode 100644 tests/ReportTest.php diff --git a/config/seo-pro.php b/config/seo-pro.php index 04b86103..fc443d03 100644 --- a/config/seo-pro.php +++ b/config/seo-pro.php @@ -43,4 +43,8 @@ 'excluded_sites' => [], ], + 'reports' => [ + 'queue_chunk_size' => 1000, + ], + ]; diff --git a/resources/views/reports/index.blade.php b/resources/views/reports/index.blade.php index c4c7c946..c41f0641 100644 --- a/resources/views/reports/index.blade.php +++ b/resources/views/reports/index.blade.php @@ -28,9 +28,9 @@ @can('delete seo reports') - + ajax()) { - return $this->showOrGenerateReport($report); + return $report->data(); } return view('seo-pro::reports.show', ['report' => $report]); @@ -48,15 +48,4 @@ public function destroy($id) return Report::find($id)->delete(); } - - private function showOrGenerateReport($report) - { - if ($report->status() === 'pending') { - $report = $report->queueGenerate(); - } elseif ($report->isLegacyReport()) { - $report = $report->updateLegacyReport(); - } - - return $report->withPages(); - } } diff --git a/src/Reporting/Chunk.php b/src/Reporting/Chunk.php new file mode 100644 index 00000000..a908fbd2 --- /dev/null +++ b/src/Reporting/Chunk.php @@ -0,0 +1,91 @@ +id = $id; + $this->contentIds = $contentIds; + $this->report = $report; + } + + protected function folderPath() + { + return $this->report->chunksFolder()."/{$this->id}"; + } + + protected function yamlPath() + { + return $this->folderPath().'/chunk.yaml'; + } + + public function save() + { + File::put($this->yamlPath(), YAML::dump([ + 'ids' => $this->contentIds, + ])); + + return $this; + } + + public function queueGenerate() + { + $ids = $this->contentIds; + + dispatch(function () use ($ids) { + $this->generate($ids); + }); + } + + protected function generate($ids) + { + $content = Cache::get($this->report->cacheKey(Report::CONTENT_CACHE_KEY_SUFFIX)); + + foreach ($ids as $id) { + $this->createPage($content[$id] ?? Data::find($id))->save(); + } + + File::delete($this->folderPath()); + + if ($this->wasLastChunk()) { + $this->report->finalize(); + } + } + + protected function wasLastChunk() + { + return File::getFolders($this->report->chunksFolder())->isEmpty(); + } + + protected function createPage($content) + { + if ($content->value('seo') === false || is_null($content->uri())) { + return; + } + + $data = (new Cascade) + ->with(SiteDefaults::load()->augmented()) + ->with($this->getAugmentedSectionDefaults($content)) + ->with($content->augmentedValue('seo')->value()) + ->withCurrent($content) + ->get(); + + return new Page($content->id(), $data, $this->report); + } +} diff --git a/src/Reporting/Page.php b/src/Reporting/Page.php index 4c419da9..0536f1ac 100644 --- a/src/Reporting/Page.php +++ b/src/Reporting/Page.php @@ -5,26 +5,20 @@ use Statamic\Facades\Data; use Statamic\Facades\File; use Statamic\Facades\YAML; +use Statamic\Support\Arr; class Page { - protected $report; + protected $id; protected $data; + protected $report; protected $results; - protected $id; - protected $rules = [ - Rules\UniqueTitleTag::class, - Rules\UniqueMetaDescription::class, - Rules\NoUnderscoresInUrl::class, - Rules\ThreeSegmentUrls::class, - ]; - - public function setData($data) + public function __construct($id, $data, Report $report) { - $this->data = collect($data); - - return $this; + $this->id = $id; + $this->data = $data; + $this->report = $report; } public function setResults($results) @@ -34,13 +28,6 @@ public function setResults($results) return $this; } - public function setReport(Report $report) - { - $this->report = $report; - - return $this; - } - public function report() { return $this->report; @@ -75,11 +62,15 @@ public function validate() public function get($key) { - return $this->data->get($key); + return Arr::get($this->data, $key); } public function status() { + if (! $this->results) { + return 'pending'; + } + $status = 'pass'; foreach ($this->getRuleResults() as $result) { @@ -99,6 +90,10 @@ public function getRuleResults() { $results = collect(); + if (! $this->results) { + return $results; + } + foreach ($this->results as $class => $array) { $class = "Statamic\\SeoPro\\Reporting\\Rules\\$class"; $rule = new $class; @@ -129,18 +124,11 @@ public function id() return $this->id; } - public function setId($id) - { - $this->id = $id; - - return $this; - } - public function save() { $data = [ 'id' => $this->id, - 'data' => $this->data->all(), + 'data' => $this->data, 'results' => $this->results, ]; diff --git a/src/Reporting/Report.php b/src/Reporting/Report.php index bbcf108f..b1eb4964 100644 --- a/src/Reporting/Report.php +++ b/src/Reporting/Report.php @@ -6,26 +6,27 @@ use Illuminate\Contracts\Support\Arrayable; use Illuminate\Contracts\Support\Jsonable; use Illuminate\Support\Facades\Artisan; +use Illuminate\Support\Facades\Cache; use Statamic\Facades\Entry; use Statamic\Facades\File; use Statamic\Facades\Folder; use Statamic\Facades\Term; use Statamic\Facades\YAML; use Statamic\SeoPro\Cascade; -use Statamic\SeoPro\GetsSectionDefaults; use Statamic\SeoPro\SiteDefaults; class Report implements Arrayable, Jsonable { - use GetsSectionDefaults; + const GENERATING_CACHE_KEY_SUFFIX = 'generating'; + const CONTENT_CACHE_KEY_SUFFIX = 'content'; + const PAGES_CACHE_KEY_SUFFIX = 'pages'; + const TO_ARRAY_CACHE_KEY_SUFFIX = 'to-array'; protected $id; protected $raw; protected $pages; protected $pagesCrawled; protected $results; - protected $generating = false; - protected $generatePages = false; protected $date; protected $score; @@ -41,9 +42,9 @@ public static function create($id = null) { $report = new static; - $report->setId($id ?: static::nextId()); - - return $report; + return $report + ->clearCaches() + ->setId($id ?: static::nextId()); } public function setId($id) @@ -72,26 +73,94 @@ public function date() return Carbon::parse($this->date); } + public function queueGenerate() + { + if ($this->isGenerating()) { + return $this; + } + + $this->clearCaches(); + + Cache::put($this->cacheKey(static::GENERATING_CACHE_KEY_SUFFIX), true); + + Artisan::queue('statamic:seo-pro:generate-report', ['--report' => $this->id()]); + + return $this; + } + + protected function isPending() + { + return $this->status() === 'pending'; + } + + protected function isGenerating() + { + return Cache::has($this->cacheKey(static::GENERATING_CACHE_KEY_SUFFIX)); + } + public function generate() { - $this->generating = true; + // The last chunk will trigger a `finalize()` call on our report, + // which is important when chunks are being run asyncronously. + $this + ->chunks() + ->each(fn ($chunk) => $chunk->queueGenerate()); + + return $this; + } + + protected function hasRemainingChunks() + { + return File::exists($this->chunksFolder()) + && File::getFolders($this->chunksFolder())->isNotEmpty(); + } + + protected function chunks() + { + $chunks = $this + ->allContent() + ->chunk(config('statamic.seo-pro.reports.queue_chunk_size')) + ->map(function ($chunk, $id) { + return app() + ->makeWith(Chunk::class, [ + 'id' => $id + 1, + 'contentIds' => $chunk->map->id()->values()->all(), + 'report' => $this, + ]) + ->save(); + }); + // Save to update report status now that we have chunks in storage $this->save(); - $this->pages()->each(function ($page) { - $page->validate(); - }); + return $chunks; + } + + public function finalize() + { + if (File::exists($dir = $this->chunksFolder())) { + File::delete($dir); + } + + $this + ->withPages() + ->validatePages() + ->validateSite(); - $this->generating = false; + // Cache the pages for first load, since we already have them in memory here. + Cache::put($this->cacheKey(static::PAGES_CACHE_KEY_SUFFIX), $this->pages()); - $this->validateSite()->save(); + // Clear generating status before saving! + Cache::forget($this->cacheKey(static::GENERATING_CACHE_KEY_SUFFIX)); - return $this; + return $this->save(); } - public function queueGenerate() + protected function validatePages() { - Artisan::queue('statamic:seo-pro:generate-report', ['--report' => $this->id()]); + $this + ->pages() + ->each(fn ($page) => $page->validate()); return $this; } @@ -113,68 +182,13 @@ protected function validateSite() return $this; } - public function pages() - { - if ($this->pages) { - return $this->pages; - } - - return $this->createPages(); - } - - protected function createPages() - { - return $this->pages = $this->pagesFromContent(); - } - public function pagesCrawled() { if ($this->pagesCrawled) { return $this->pagesCrawled; } - return $this->pagesCrawled = $this->pages?->count(); - } - - protected function pagesFromContent() - { - return $this->allContent() - ->map(function ($content) { - if ($content->value('seo') === false || is_null($content->uri())) { - return; - } - - $data = (new Cascade) - ->with(SiteDefaults::load()->augmented()) - ->with($this->getAugmentedSectionDefaults($content)) - ->with($content->augmentedValue('seo')->value()) - ->withCurrent($content) - ->get(); - - return (new Page) - ->setId($content->id()) - ->setData($data) - ->setReport($this); - }) - ->filter(); - } - - public function loadPages() - { - $dir = storage_path(sprintf('statamic/seopro/reports/%s/pages', $this->id)); - $files = Folder::getFilesRecursively($dir); - - $this->pages = collect($files)->map(function ($file) { - $yaml = YAML::parse(File::get($file)); - - return (new Page) - ->setId($yaml['id']) - ->setData($yaml['data']) - ->setResults($yaml['results']) - ->setReport($this); - }); - - return $this; + return $this->pagesCrawled = $this->pages()?->count(); } public function results() @@ -184,6 +198,10 @@ public function results() public function toArray() { + if ($this->isGenerated() && $array = Cache::get($this->cacheKey(static::TO_ARRAY_CACHE_KEY_SUFFIX))) { + return $array; + } + $array = [ 'id' => $this->id(), 'date' => $this->date()->timestamp, @@ -193,8 +211,8 @@ public function toArray() 'results' => $this->resultsToArray(), ]; - if ($this->generatePages) { - $array['pages'] = $this->pages()->map(function ($page) { + if ($this->isGenerated() && $pages = $this->pages()) { + $array['pages'] = $pages->map(function ($page) { return [ 'status' => $page->status(), 'url' => $page->url(), @@ -203,6 +221,8 @@ public function toArray() 'results' => $page->getRuleResults(), ]; }); + + Cache::put($this->cacheKey(static::TO_ARRAY_CACHE_KEY_SUFFIX), $array); } return $array; @@ -216,7 +236,7 @@ protected function resultsToArray() $array = []; - foreach ($this->results() as $class => $result) { + foreach ($results as $class => $result) { $class = "Statamic\\SeoPro\\Reporting\\Rules\\$class"; $rule = (new $class)->setReport($this); @@ -238,15 +258,49 @@ public function toJson($options = 0) return json_encode($this->toArray()); } - public function withPages() + public function pages() + { + if ($this->isGenerated() && $pages = Cache::get($this->cacheKey(static::PAGES_CACHE_KEY_SUFFIX))) { + return $pages; + } + + return $this->pages; + } + + public function withPages($preferCache = false) { - $this->generatePages = true; + if ($preferCache && $this->pages() && $this->pages()->isNotEmpty()) { + return $this; + } + + $dir = storage_path(sprintf('statamic/seopro/reports/%s/pages', $this->id)); + + $files = Folder::getFilesRecursively($dir); + + $this->pages = collect($files) + ->map(fn ($file) => YAML::parse(File::get($file))) + ->map(fn ($yaml) => (new Page($yaml['id'], $yaml['data'], $this))->setResults($yaml['results'])); - $this->loadPages(); + if ($this->isGenerated()) { + Cache::put($this->cacheKey(static::PAGES_CACHE_KEY_SUFFIX), $this->pages); + } return $this; } + public function data() + { + if ($this->isGenerating()) { + return $this; + } elseif ($this->isPending()) { + return $this->queueGenerate(); + } elseif ($this->isLegacyReport()) { + return $this->updateLegacyReport(); + } + + return $this->withPages(true); + } + public static function all() { $folders = collect(Folder::getFolders(static::preparePath())); @@ -256,12 +310,8 @@ public static function all() } return $folders - ->map(function ($path) { - return (int) pathinfo($path)['filename']; - }) - ->map(function ($id) { - return static::find($id); - }) + ->map(fn ($path) => (int) pathinfo($path)['filename']) + ->map(fn ($id) => static::find($id)) ->filter() ->sortByDesc ->id() @@ -292,7 +342,7 @@ public function fresh() public function save() { File::put($this->path(), YAML::dump($this->raw = [ - 'date' => $this->date ?? time(), + 'date' => $this->date ?? now()->timestamp, 'status' => $this->status(), 'score' => $this->score(), 'pages_crawled' => $this->pagesCrawled(), @@ -306,14 +356,19 @@ public function delete() { File::delete($this->parentFolder()); - return $this; + return $this->clearCaches(); } - private function parentFolder() + public function parentFolder() { return storage_path('statamic/seopro/reports/'.$this->id); } + public function chunksFolder() + { + return storage_path('statamic/seopro/reports/'.$this->id.'/chunks'); + } + public function path() { return storage_path('statamic/seopro/reports/'.$this->id.'/report.yaml'); @@ -338,7 +393,7 @@ public function load() public function status() { - if ($this->generating) { + if ($this->isGenerating()) { return 'generating'; } @@ -432,10 +487,31 @@ public static function preparePath($path = null) protected function allContent() { - return collect() + $content = collect() ->merge(Entry::all()) ->merge(Term::all()) - ->values(); + ->keyBy + ->id(); + + // Cache this for use when generating chunks of pages in the queue by other processes. + Cache::put($this->cacheKey(static::CONTENT_CACHE_KEY_SUFFIX), $content); + + return $content; + } + + public function cacheKey($name) + { + return 'seo-pro-report-'.$this->id().'-'.$name; + } + + public function clearCaches() + { + Cache::forget($this->cacheKey(static::GENERATING_CACHE_KEY_SUFFIX)); + Cache::forget($this->cacheKey(static::CONTENT_CACHE_KEY_SUFFIX)); + Cache::forget($this->cacheKey(static::PAGES_CACHE_KEY_SUFFIX)); + Cache::forget($this->cacheKey(static::TO_ARRAY_CACHE_KEY_SUFFIX)); + + return $this; } public function isLegacyReport() @@ -446,9 +522,10 @@ public function isLegacyReport() public function updateLegacyReport() { return $this - ->withPages() + ->withPages(false) ->generateScore() ->save() - ->fresh(); + ->fresh() + ->withPages(); } } diff --git a/src/Reporting/Rules/UniqueMetaDescription.php b/src/Reporting/Rules/UniqueMetaDescription.php index 4e959556..8aa2bc59 100644 --- a/src/Reporting/Rules/UniqueMetaDescription.php +++ b/src/Reporting/Rules/UniqueMetaDescription.php @@ -2,6 +2,7 @@ namespace Statamic\SeoPro\Reporting\Rules; +use Statamic\Facades\Blink; use Statamic\SeoPro\Reporting\Rule; class UniqueMetaDescription extends Rule @@ -43,9 +44,19 @@ public function pagePassingComment() public function processPage() { - $this->count = $this->page->report()->pages()->filter(function ($page) { - return $page->get('description') === $this->metaDescription(); - })->count(); + $this->count = $this + ->groupAllPagesByDescription() + ->get($this->metaDescription()) + ->count(); + } + + protected function groupAllPagesByDescription() + { + return Blink::once('seo-pro-report-'.$this->report->id().'-page-descriptions-grouped', function () { + return $this->page->report()->pages()->mapToGroups(function ($page) { + return [$page->get('description') => $page->id()]; + }); + }); } public function savePage() diff --git a/src/Reporting/Rules/UniqueTitleTag.php b/src/Reporting/Rules/UniqueTitleTag.php index 90b42082..4356879a 100644 --- a/src/Reporting/Rules/UniqueTitleTag.php +++ b/src/Reporting/Rules/UniqueTitleTag.php @@ -2,6 +2,7 @@ namespace Statamic\SeoPro\Reporting\Rules; +use Statamic\Facades\Blink; use Statamic\SeoPro\Reporting\Rule; class UniqueTitleTag extends Rule @@ -43,9 +44,19 @@ public function pagePassingComment() public function processPage() { - $this->count = $this->page->report()->pages()->filter(function ($page) { - return $page->get('title') === $this->title(); - })->count(); + $this->count = $this + ->groupAllPagesByTitle() + ->get($this->title()) + ->count(); + } + + protected function groupAllPagesByTitle() + { + return Blink::once('seo-pro-report-'.$this->report->id().'-page-titles-grouped', function () { + return $this->page->report()->pages()->mapToGroups(function ($page) { + return [$page->get('title') => $page->id()]; + }); + }); } public function savePage() diff --git a/tests/ReportTest.php b/tests/ReportTest.php new file mode 100644 index 00000000..f9c86d17 --- /dev/null +++ b/tests/ReportTest.php @@ -0,0 +1,200 @@ +each->delete(); + Term::all()->each->delete(); + + if ($this->files->exists($path = $this->reportsPath())) { + $this->files->deleteDirectory($path); + } + } + + /** @test */ + public function it_can_save_pending_report() + { + $this->assertFileNotExists($this->reportsPath()); + + Carbon::setTestNow($now = now()); + Report::create()->save(); + + $this->assertCount(1, $this->files->directories($this->reportsPath())); + $this->assertFileExists($this->reportsPath('1')); + + $expected = <<<"EXPECTED" +date: $now->timestamp +status: pending +score: null +pages_crawled: null +results: null + +EXPECTED; + + $this->assertCount(1, $this->files->allFiles($this->reportsPath('1'))); + $this->assertEquals($expected, $this->files->get($this->reportsPath('1/report.yaml'))); + } + + /** @test */ + public function it_increments_report_folder_numbers() + { + $this->assertFileNotExists($this->reportsPath()); + + Report::create()->save(); + Report::create()->save(); + Report::create()->save(); + + $this->assertCount(3, $this->files->directories($this->reportsPath())); + $this->assertFileExists($this->reportsPath('1')); + $this->assertFileExists($this->reportsPath('2')); + $this->assertFileExists($this->reportsPath('3')); + } + + /** @test */ + public function it_can_generate_a_report() + { + $this + ->generateEntries(5) + ->generateTerms(5); + + $this->assertFileNotExists($this->reportsPath()); + + Carbon::setTestNow($now = now()); + Report::create()->save()->generate(); + + $expected = <<<"EXPECTED" +date: $now->timestamp +status: fail +score: 75.0 +pages_crawled: 10 +results: + SiteName: true + UniqueTitleTag: 0 + UniqueMetaDescription: 10 + NoUnderscoresInUrl: 0 + ThreeSegmentUrls: 0 + +EXPECTED; + + $this->assertCount(1, $this->files->files($this->reportsPath('1'))); + $this->assertEquals($expected, $this->files->get($this->reportsPath('1/report.yaml'))); + + $this->assertFileExists($this->reportsPath('1/pages')); + $this->assertCount(10, $this->files->allFiles($this->reportsPath('1/pages'))); + } + + /** @test */ + public function it_can_generate_a_large_report_with_multiple_chunked_jobs() + { + Config::set('statamic.seo-pro.reports.queue_chunk_size', 3); + + $this + ->generateEntries(5) + ->generateTerms(5); + + $this->assertFileNotExists($this->reportsPath()); + + // Bind our test chunk class so we can detect how many are generated. + app()->bind(Chunk::class, TestChunk::class); + + Carbon::setTestNow($now = now()); + Report::create()->save()->generate(); + + // Assert we saved exactly four chunks, based on our above config, and the number of pages being generated. + $this->assertEquals(4, Blink::get('saving-chunk')); + + $expected = <<<"EXPECTED" +date: $now->timestamp +status: fail +score: 75.0 +pages_crawled: 10 +results: + SiteName: true + UniqueTitleTag: 0 + UniqueMetaDescription: 10 + NoUnderscoresInUrl: 0 + ThreeSegmentUrls: 0 + +EXPECTED; + + $this->assertFileNotExists($this->reportsPath('1/chunks')); + + $this->assertCount(1, $this->files->files($this->reportsPath('1'))); + $this->assertEquals($expected, $this->files->get($this->reportsPath('1/report.yaml'))); + + $this->assertFileExists($this->reportsPath('1/pages')); + $this->assertCount(10, $this->files->allFiles($this->reportsPath('1/pages'))); + } + + public function reportsPath($path = null) + { + if ($path) { + $path = Str::ensureLeft($path, '/'); + } + + return storage_path('statamic/seopro/reports').$path; + } + + protected function generateEntries($count) + { + collect(range(1, $count))->each(function ($i) { + Entry::make() + ->collection('articles') + ->blueprint('article') + ->slug('test-entry-'.$i) + ->set('title', 'Test Entry '.$i) + ->save(); + }); + + return $this; + } + + protected function generateTerms($count) + { + collect(range(1, $count))->each(function ($i) { + Term::make() + ->slug($slug = 'test-term-'.$i) + ->taxonomy('topics') + ->set('title', 'Test Term '.$i) + ->save(); + }); + + return $this; + } + + /** + * Normalize line endings before performing assertion in windows. + */ + public static function assertEquals($needle, $haystack, $message = ''): void + { + parent::assertEquals( + is_string($needle) ? static::normalizeMultilineString($needle) : $needle, + is_string($haystack) ? static::normalizeMultilineString($haystack) : $haystack, + $message + ); + } +} + +class TestChunk extends Chunk +{ + public function save() + { + Blink::increment('saving-chunk'); + + return parent::save(); + } +}