From 7a7d347b69a8fa2d918648d5d658a7ebdb1171dc Mon Sep 17 00:00:00 2001 From: Tomas Norre Mikkelsen Date: Sat, 17 Oct 2020 14:46:03 +0200 Subject: [PATCH] [BUGFIX] Ensure that DataHandlerHook will not add pages to queue that does not exist (#635) Resolves #634 --- CHANGELOG.md | 1 + Classes/Hooks/DataHandlerHook.php | 9 +++- Tests/Unit/Hooks/DataHandlerHookTest.php | 55 ++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f1d9511f..0679781fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ * Frontend User initialization with UserGroups for crawling protected pages * Making sure PageUid added with ExcludeString is kept as integers * Instatiation of ProcessRepository and QueueRepository change to GeneralUtility::makeInstance +* Ensure that DataHandlerHook will not add pages to queue that does not exist ## Crawler 9.1.0 Crawler 9.1.0 was released on August 2nd, 2020 diff --git a/Classes/Hooks/DataHandlerHook.php b/Classes/Hooks/DataHandlerHook.php index c920d0359..ead8197be 100644 --- a/Classes/Hooks/DataHandlerHook.php +++ b/Classes/Hooks/DataHandlerHook.php @@ -23,6 +23,7 @@ use AOE\Crawler\Domain\Repository\QueueRepository; use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Extbase\Object\ObjectManager; +use TYPO3\CMS\Frontend\Page\PageRepository; class DataHandlerHook { @@ -34,7 +35,7 @@ public function addFlushedPagesToCrawlerQueue(array $parameters, \TYPO3\CMS\Core } foreach ($pageIdsToBeFlushedFromCache as $pageId) { $pageId = (int) $pageId; - if ($pageId < 1) { + if ($pageId < 1 || empty($this->getPageRepository()->getPage($pageId))) { continue; } if ($this->getQueueRepository()->isPageInQueue($pageId)) { @@ -53,4 +54,10 @@ private function getCrawlerApi(): CrawlerApi { return GeneralUtility::makeInstance(ObjectManager::class)->get(CrawlerApi::class); } + + private function getPageRepository(): PageRepository + { + // Todo: Switch to TYPO3\CMS\Core\Repository\PageRepository when dropping support for TYPO3 9LTS + return GeneralUtility::makeInstance(ObjectManager::class)->get(PageRepository::class); + } } diff --git a/Tests/Unit/Hooks/DataHandlerHookTest.php b/Tests/Unit/Hooks/DataHandlerHookTest.php index 33ec11887..60a5c66ec 100644 --- a/Tests/Unit/Hooks/DataHandlerHookTest.php +++ b/Tests/Unit/Hooks/DataHandlerHookTest.php @@ -29,6 +29,7 @@ use TYPO3\CMS\Core\DataHandling\DataHandler; use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Extbase\Object\ObjectManager; +use TYPO3\CMS\Frontend\Page\PageRepository; class DataHandlerHookTest extends UnitTestCase { @@ -49,9 +50,14 @@ public function itShouldAddPageToQueue(): void $queueRepository->isPageInQueue(1)->willReturn(false); $queueRepository->isPageInQueue(2)->willReturn(true); + $pageRepository = $this->prophesize(PageRepository::class); + $pageRepository->getPage(1)->willReturn(['Faking that page exists as not empty array']); + $pageRepository->getPage(2)->willReturn(['Faking that page exists as not empty array']); + $objectManager = $this->prophesize(ObjectManager::class); $objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal()); $objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal()); + $objectManager->get(PageRepository::class)->willReturn($pageRepository->reveal()); $cacheManager = $this->prophesize(CacheManager::class); $cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal()); @@ -89,9 +95,15 @@ public function itShouldAddPageToQueueWithMorePages(): void $queueRepository->isPageInQueue(2)->willReturn(true); $queueRepository->isPageInQueue(3)->willReturn(false); + $pageRepository = $this->prophesize(PageRepository::class); + $pageRepository->getPage(1)->willReturn(['Faking that page exists as not empty array']); + $pageRepository->getPage(2)->willReturn(['Faking that page exists as not empty array']); + $pageRepository->getPage(3)->willReturn(['Faking that page exists as not empty array']); + $objectManager = $this->prophesize(ObjectManager::class); $objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal()); $objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal()); + $objectManager->get(PageRepository::class)->willReturn($pageRepository->reveal()); $cacheManager = $this->prophesize(CacheManager::class); $cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal()); @@ -109,4 +121,47 @@ public function itShouldAddPageToQueueWithMorePages(): void $dataHandler ); } + + /** + * Page with ID 1 is not in queue, should be added + * Page with ID 2 is already in queue. Should NOT be added. + * Page with ID 3 is not in queue, should be added + * + * @test + */ + public function nothingToBeAddedAsPageDoNotExists(): void + { + $dataHandlerHook = new DataHandlerHook(); + $crawlerApi = $this->prophesize(CrawlerApi::class); + $crawlerApi->addPageToQueue(1)->shouldBeCalled(); + + $queueRepository = $this->prophesize(QueueRepository::class); + $queueRepository->isPageInQueue(1)->willReturn(false); + + $pageRepository = $this->prophesize(PageRepository::class); + $pageRepository->getPage(1)->willReturn(['Faking that page exists as not empty array']); + // Empty array to act like pages doesn't exist + $pageRepository->getPage(3000)->willReturn([]); + + $objectManager = $this->prophesize(ObjectManager::class); + $objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal()); + $objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal()); + $objectManager->get(PageRepository::class)->willReturn($pageRepository->reveal()); + + $cacheManager = $this->prophesize(CacheManager::class); + $cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal()); + + GeneralUtility::setSingletonInstance(CacheManager::class, $cacheManager->reveal()); + GeneralUtility::setSingletonInstance(ObjectManager::class, $objectManager->reveal()); + + $dataHandler = new DataHandler(); + + $dataHandlerHook->addFlushedPagesToCrawlerQueue( + [ + 'table' => 'tt_content', + 'pageIdArray' => [0, 1, 3000], + ], + $dataHandler + ); + } }