Skip to content

Latest commit

 

History

History
51 lines (38 loc) · 1.5 KB

README.md

File metadata and controls

51 lines (38 loc) · 1.5 KB

#yolo crawler

Status Label Status Value
Build Build Status
Code Quality Scrutinizer Code Quality

find broken links example

require 'bootstrap/autoload.php';

use WP\Crawler\LinkFinder;
use WP\Crawler\DomainCrawler;
use WP\Crawler\Queue\QueueManager;
use WP\Crawler\Queue\ArrayQueue;
use WP\Crawler\Queue\Store\ArrayStore;
use WP\Crawler\Queue\Validator\ValidFileExtension;
use WP\Crawler\Queue\Validator\NoPseudoUrl;
use WP\Crawler\Event\LogSubscriber;
use WP\Crawler\Event\BrokenLinkFinderSubscriber;
use Symfony\Component\EventDispatcher\EventDispatcher;

if (isset($argv[1])) {
    $domain = $argv[1];

    $manager = new QueueManager(new ArrayQueue(), new ArrayStore());
    $manager->addValidator(new NoPseudoUrl())
        ->addValidator(new ValidFileExtension());

    $crawler = new DomainCrawler(
        $manager,
        new LinkFinder()
    );

    if (isset($argv[2]))
        $crawler->setWaitTime($argv[2]);

    $dispatcher = $crawler->getEventDispatcher();
    $dispatcher->addSubscriber(new LogSubscriber);
    $dispatcher->addSubscriber(new BrokenLinkFinderSubscriber);

    $crawler->crawl($domain);

} else {
    echo "\n";
    echo ("Usage " . $argv[0] . ' {domain} {time to wait}' . "\n");
}