Skip to content

Commit

Permalink
[BUGFIX] Ensure that DataHandlerHook will not add pages to queue that…
Browse files Browse the repository at this point in the history
… does not exist (#635)

Resolves #634
  • Loading branch information
tomasnorre authored Oct 17, 2020
1 parent 11798cb commit 7a7d347
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Frontend User initialization with UserGroups for crawling protected pages
* Making sure PageUid added with ExcludeString is kept as integers
* Instatiation of ProcessRepository and QueueRepository change to GeneralUtility::makeInstance
* Ensure that DataHandlerHook will not add pages to queue that does not exist

## Crawler 9.1.0
Crawler 9.1.0 was released on August 2nd, 2020
Expand Down
9 changes: 8 additions & 1 deletion Classes/Hooks/DataHandlerHook.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
use AOE\Crawler\Domain\Repository\QueueRepository;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Extbase\Object\ObjectManager;
use TYPO3\CMS\Frontend\Page\PageRepository;

class DataHandlerHook
{
Expand All @@ -34,7 +35,7 @@ public function addFlushedPagesToCrawlerQueue(array $parameters, \TYPO3\CMS\Core
}
foreach ($pageIdsToBeFlushedFromCache as $pageId) {
$pageId = (int) $pageId;
if ($pageId < 1) {
if ($pageId < 1 || empty($this->getPageRepository()->getPage($pageId))) {
continue;
}
if ($this->getQueueRepository()->isPageInQueue($pageId)) {
Expand All @@ -53,4 +54,10 @@ private function getCrawlerApi(): CrawlerApi
{
return GeneralUtility::makeInstance(ObjectManager::class)->get(CrawlerApi::class);
}

private function getPageRepository(): PageRepository
{
// Todo: Switch to TYPO3\CMS\Core\Repository\PageRepository when dropping support for TYPO3 9LTS
return GeneralUtility::makeInstance(ObjectManager::class)->get(PageRepository::class);
}
}
55 changes: 55 additions & 0 deletions Tests/Unit/Hooks/DataHandlerHookTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
use TYPO3\CMS\Core\DataHandling\DataHandler;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Extbase\Object\ObjectManager;
use TYPO3\CMS\Frontend\Page\PageRepository;

class DataHandlerHookTest extends UnitTestCase
{
Expand All @@ -49,9 +50,14 @@ public function itShouldAddPageToQueue(): void
$queueRepository->isPageInQueue(1)->willReturn(false);
$queueRepository->isPageInQueue(2)->willReturn(true);

$pageRepository = $this->prophesize(PageRepository::class);
$pageRepository->getPage(1)->willReturn(['Faking that page exists as not empty array']);
$pageRepository->getPage(2)->willReturn(['Faking that page exists as not empty array']);

$objectManager = $this->prophesize(ObjectManager::class);
$objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal());
$objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal());
$objectManager->get(PageRepository::class)->willReturn($pageRepository->reveal());

$cacheManager = $this->prophesize(CacheManager::class);
$cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal());
Expand Down Expand Up @@ -89,9 +95,15 @@ public function itShouldAddPageToQueueWithMorePages(): void
$queueRepository->isPageInQueue(2)->willReturn(true);
$queueRepository->isPageInQueue(3)->willReturn(false);

$pageRepository = $this->prophesize(PageRepository::class);
$pageRepository->getPage(1)->willReturn(['Faking that page exists as not empty array']);
$pageRepository->getPage(2)->willReturn(['Faking that page exists as not empty array']);
$pageRepository->getPage(3)->willReturn(['Faking that page exists as not empty array']);

$objectManager = $this->prophesize(ObjectManager::class);
$objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal());
$objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal());
$objectManager->get(PageRepository::class)->willReturn($pageRepository->reveal());

$cacheManager = $this->prophesize(CacheManager::class);
$cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal());
Expand All @@ -109,4 +121,47 @@ public function itShouldAddPageToQueueWithMorePages(): void
$dataHandler
);
}

/**
* Page with ID 1 is not in queue, should be added
* Page with ID 2 is already in queue. Should NOT be added.
* Page with ID 3 is not in queue, should be added
*
* @test
*/
public function nothingToBeAddedAsPageDoNotExists(): void
{
$dataHandlerHook = new DataHandlerHook();
$crawlerApi = $this->prophesize(CrawlerApi::class);
$crawlerApi->addPageToQueue(1)->shouldBeCalled();

$queueRepository = $this->prophesize(QueueRepository::class);
$queueRepository->isPageInQueue(1)->willReturn(false);

$pageRepository = $this->prophesize(PageRepository::class);
$pageRepository->getPage(1)->willReturn(['Faking that page exists as not empty array']);
// Empty array to act like pages doesn't exist
$pageRepository->getPage(3000)->willReturn([]);

$objectManager = $this->prophesize(ObjectManager::class);
$objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal());
$objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal());
$objectManager->get(PageRepository::class)->willReturn($pageRepository->reveal());

$cacheManager = $this->prophesize(CacheManager::class);
$cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal());

GeneralUtility::setSingletonInstance(CacheManager::class, $cacheManager->reveal());
GeneralUtility::setSingletonInstance(ObjectManager::class, $objectManager->reveal());

$dataHandler = new DataHandler();

$dataHandlerHook->addFlushedPagesToCrawlerQueue(
[
'table' => 'tt_content',
'pageIdArray' => [0, 1, 3000],
],
$dataHandler
);
}
}

0 comments on commit 7a7d347

Please sign in to comment.