diff --git a/CHANGELOG.md b/CHANGELOG.md index f29f10610..afb7f13f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Added * Adds light red background color + icons to crawler log rows with errors * Crawler processing by page priority +* Automatically adding pages being edited or a page caches is cleared to the crawler queue ## Crawler 9.0.3 Crawler 9.0.2 was released on July 20th, 2020 diff --git a/Classes/Api/CrawlerApi.php b/Classes/Api/CrawlerApi.php index 986a1691e..cb20970f4 100644 --- a/Classes/Api/CrawlerApi.php +++ b/Classes/Api/CrawlerApi.php @@ -143,7 +143,12 @@ public function addPageToQueueTimed($uid, $time): void $time = intval($time); $crawler = $this->findCrawler(); - $pageData = GeneralUtility::makeInstance(PageRepository::class)->getPage($uid); + /** + * Todo: Switch back to getPage(); when dropping support for TYPO3 9 LTS - TNM + * This switch to getPage_noCheck() is needed as TYPO3 9 LTS doesn't return dokType < 200, therefore automatically + * adding pages to crawler queue when editing page-titles from the page tree directly was not working. + */ + $pageData = GeneralUtility::makeInstance(PageRepository::class)->getPage_noCheck($uid, true); $configurations = $crawler->getUrlsForPageRow($pageData); $configurations = $this->filterUnallowedConfigurations($configurations); $downloadUrls = []; diff --git a/Classes/Hooks/DataHandlerHook.php b/Classes/Hooks/DataHandlerHook.php new file mode 100644 index 000000000..c920d0359 --- /dev/null +++ b/Classes/Hooks/DataHandlerHook.php @@ -0,0 +1,56 @@ + + * + * This file is part of the TYPO3 Crawler Extension. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +use AOE\Crawler\Api\CrawlerApi; +use AOE\Crawler\Domain\Repository\QueueRepository; +use TYPO3\CMS\Core\Utility\GeneralUtility; +use TYPO3\CMS\Extbase\Object\ObjectManager; + +class DataHandlerHook +{ + public function addFlushedPagesToCrawlerQueue(array $parameters, \TYPO3\CMS\Core\DataHandling\DataHandler $dataHandler): void + { + $pageIdsToBeFlushedFromCache = $parameters['pageIdArray']; + if (empty($pageIdsToBeFlushedFromCache)) { + return; + } + foreach ($pageIdsToBeFlushedFromCache as $pageId) { + $pageId = (int) $pageId; + if ($pageId < 1) { + continue; + } + if ($this->getQueueRepository()->isPageInQueue($pageId)) { + continue; + } + $this->getCrawlerApi()->addPageToQueue($pageId); + } + } + + private function getQueueRepository(): QueueRepository + { + return GeneralUtility::makeInstance(ObjectManager::class)->get(QueueRepository::class); + } + + private function getCrawlerApi(): CrawlerApi + { + return GeneralUtility::makeInstance(ObjectManager::class)->get(CrawlerApi::class); + } +} diff --git a/Classes/Utility/HookUtility.php b/Classes/Utility/HookUtility.php index f40a2d72d..787f71344 100644 --- a/Classes/Utility/HookUtility.php +++ b/Classes/Utility/HookUtility.php @@ -42,5 +42,17 @@ public static function registerHooks($extKey): void // Activating refresh hooks $GLOBALS['TYPO3_CONF_VARS']['EXTCONF'][$extKey]['refresh_hooks'][] = ProcessCleanUpHook::class; + + // Env-dependent + if (TYPO3_MODE === 'BE') { + self::registerBackendHooks($extKey); + } + } + + private static function registerBackendHooks(string $extKey): void + { + // DataHandler clear page cache pre-processing + $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_tcemain.php']['clearPageCacheEval'][] = + "AOE\Crawler\Hooks\DataHandlerHook->addFlushedPagesToCrawlerQueue"; } } diff --git a/Documentation/Features/AutomaticAddPagesToQueue/Index.rst b/Documentation/Features/AutomaticAddPagesToQueue/Index.rst new file mode 100644 index 000000000..57765c839 --- /dev/null +++ b/Documentation/Features/AutomaticAddPagesToQueue/Index.rst @@ -0,0 +1,38 @@ +.. include:: /Includes.txt + +============================ +Automatic add pages to Queue +============================ + +Since 9.1.0 + +Edit Pages +---------- + +With this feature, you will automatically add pages to the crawler queue +when you are editing content on the page, unless it's within a workspace, then +it will not be added to the queue before it's published. + +This functionality gives you the advantages that you would not need to keep track +of which pages you have edited, it will automatically be handle on next crawler +process task, see :ref:`executing-the-queue-label`. This ensure that +your cache or e.g. Search Index is always up to date and the end-users will see +the most current content as soon as possible. + +Clear Page Single Cache +----------------------- + +As the edit and clear page cache function is using the same dataHandler hooks, +we have an additional feature for free. When you clear the page cache for a specific +page then it will also be added automatically to the crawler queue. Again this will +be processed during the next crawler process. + +.. figure:: /Images/backend_clear_cache.png + :alt: Clearing the page cache + + Clearing the page cache + +.. figure:: /Images/backend_clear_cache_queue.png + :alt: Page is added to the crawler queue + + Page is added to the crawler queue diff --git a/Documentation/Features/Hooks/Index.rst b/Documentation/Features/Hooks/Index.rst index b5e5e09e9..cbdd14d78 100644 --- a/Documentation/Features/Hooks/Index.rst +++ b/Documentation/Features/Hooks/Index.rst @@ -47,5 +47,4 @@ Example:: return false; } - } - + } \ No newline at end of file diff --git a/Documentation/Features/Index.rst b/Documentation/Features/Index.rst index 79116e7ba..0ba4aea1a 100644 --- a/Documentation/Features/Index.rst +++ b/Documentation/Features/Index.rst @@ -10,6 +10,7 @@ Features :titlesonly: :glob: + AutomaticAddPagesToQueue/Index PollableProcessingInstructions/Index MultiprocessSupport/Index Hooks/Index diff --git a/Documentation/Images/backend_clear_cache.png b/Documentation/Images/backend_clear_cache.png new file mode 100644 index 000000000..8003922dc Binary files /dev/null and b/Documentation/Images/backend_clear_cache.png differ diff --git a/Documentation/Images/backend_clear_cache_queue.png b/Documentation/Images/backend_clear_cache_queue.png new file mode 100644 index 000000000..b52c8390d Binary files /dev/null and b/Documentation/Images/backend_clear_cache_queue.png differ diff --git a/Tests/Unit/Hooks/DataHandlerHookTest.php b/Tests/Unit/Hooks/DataHandlerHookTest.php new file mode 100644 index 000000000..33ec11887 --- /dev/null +++ b/Tests/Unit/Hooks/DataHandlerHookTest.php @@ -0,0 +1,112 @@ + + * + * This file is part of the TYPO3 Crawler Extension. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +use AOE\Crawler\Api\CrawlerApi; +use AOE\Crawler\Domain\Repository\QueueRepository; +use AOE\Crawler\Hooks\DataHandlerHook; +use Nimut\TestingFramework\TestCase\UnitTestCase; +use Prophecy\Argument; +use TYPO3\CMS\Core\Cache\CacheManager; +use TYPO3\CMS\Core\Cache\Frontend\FrontendInterface; +use TYPO3\CMS\Core\DataHandling\DataHandler; +use TYPO3\CMS\Core\Utility\GeneralUtility; +use TYPO3\CMS\Extbase\Object\ObjectManager; + +class DataHandlerHookTest extends UnitTestCase +{ + /** + * Page with ID 1 is not in queue, should be added + * Page with ID 2 is already in queue. Should NOT be added. + * + * @test + */ + public function itShouldAddPageToQueue(): void + { + $dataHandlerHook = new DataHandlerHook(); + + $crawlerApi = $this->prophesize(CrawlerApi::class); + $crawlerApi->addPageToQueue(1)->shouldBeCalled(); + + $queueRepository = $this->prophesize(QueueRepository::class); + $queueRepository->isPageInQueue(1)->willReturn(false); + $queueRepository->isPageInQueue(2)->willReturn(true); + + $objectManager = $this->prophesize(ObjectManager::class); + $objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal()); + $objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal()); + + $cacheManager = $this->prophesize(CacheManager::class); + $cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal()); + + GeneralUtility::setSingletonInstance(CacheManager::class, $cacheManager->reveal()); + GeneralUtility::setSingletonInstance(ObjectManager::class, $objectManager->reveal()); + + $dataHandler = new DataHandler(); + + $dataHandlerHook->addFlushedPagesToCrawlerQueue( + [ + 'table' => 'pages', + 'pageIdArray' => [0, 1, 2], + ], + $dataHandler + ); + } + + /** + * Page with ID 1 is not in queue, should be added + * Page with ID 2 is already in queue. Should NOT be added. + * Page with ID 3 is not in queue, should be added + * + * @test + */ + public function itShouldAddPageToQueueWithMorePages(): void + { + $dataHandlerHook = new DataHandlerHook(); + $crawlerApi = $this->prophesize(CrawlerApi::class); + $crawlerApi->addPageToQueue(1)->shouldBeCalled(); + $crawlerApi->addPageToQueue(3)->shouldBeCalled(); + + $queueRepository = $this->prophesize(QueueRepository::class); + $queueRepository->isPageInQueue(1)->willReturn(false); + $queueRepository->isPageInQueue(2)->willReturn(true); + $queueRepository->isPageInQueue(3)->willReturn(false); + + $objectManager = $this->prophesize(ObjectManager::class); + $objectManager->get(QueueRepository::class)->willReturn($queueRepository->reveal()); + $objectManager->get(CrawlerApi::class)->willReturn($crawlerApi->reveal()); + + $cacheManager = $this->prophesize(CacheManager::class); + $cacheManager->getCache(Argument::any())->willReturn($this->prophesize(FrontendInterface::class)->reveal()); + + GeneralUtility::setSingletonInstance(CacheManager::class, $cacheManager->reveal()); + GeneralUtility::setSingletonInstance(ObjectManager::class, $objectManager->reveal()); + + $dataHandler = new DataHandler(); + + $dataHandlerHook->addFlushedPagesToCrawlerQueue( + [ + 'table' => 'tt_content', + 'pageIdArray' => [0, 1, 2, 3], + ], + $dataHandler + ); + } +} diff --git a/Tests/Unit/Hook/IndexedSearchCrawlerHookTest.php b/Tests/Unit/Hooks/IndexedSearchCrawlerHookTest.php similarity index 99% rename from Tests/Unit/Hook/IndexedSearchCrawlerHookTest.php rename to Tests/Unit/Hooks/IndexedSearchCrawlerHookTest.php index 763684df7..978d8a4e9 100644 --- a/Tests/Unit/Hook/IndexedSearchCrawlerHookTest.php +++ b/Tests/Unit/Hooks/IndexedSearchCrawlerHookTest.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace AOE\Crawler\Tests\Unit\Hook; +namespace AOE\Crawler\Tests\Unit\Hooks; /* * (c) 2020 AOE GmbH diff --git a/ext_localconf.php b/ext_localconf.php index 8c1a77e26..d8061c9bc 100644 --- a/ext_localconf.php +++ b/ext_localconf.php @@ -1,7 +1,7 @@