From d7651fd96dcfa2829519504e4c8ec1ce511cd57f Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Mon, 7 Nov 2016 21:41:36 +0100 Subject: Big patch merging crawling+indexing into one command, new json document structure --- bin/crawl.php | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100755 bin/crawl.php (limited to 'bin/crawl.php') diff --git a/bin/crawl.php b/bin/crawl.php deleted file mode 100755 index 0d57bb3..0000000 --- a/bin/crawl.php +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env php -description = 'phinde URL crawler'; -$cc->version = '0.0.1'; -$cc->addOption( - 'showLinksOnly', - array( - 'short_name' => '-s', - 'long_name' => '--show-links', - 'description' => 'Only show which URLs were found', - 'action' => 'StoreTrue', - 'default' => false - ) -); -$cc->addArgument( - 'url', - array( - 'description' => 'URL to crawl', - 'multiple' => false - ) -); -try { - $res = $cc->parse(); -} catch (\Exception $e) { - $cc->displayError($e->getMessage()); -} - -$url = $res->args['url']; -$url = Helper::addSchema($url); -if (!Helper::isUrlAllowed($url)) { - echo "Domain is not allowed; not crawling\n"; - exit(2); -} - -try { - $crawler = new Crawler(); - $crawler->setShowLinksOnly($res->options['showLinksOnly']); - $crawler->crawl($url); -} catch (\Exception $e) { - echo $e->getMessage() . "\n"; - exit(10); -} -?> \ No newline at end of file -- cgit v1.2.3