diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-02-11 08:43:01 +0100 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-02-11 08:43:01 +0100 |
| commit | a2e7177d78911d219bc5be86c1cc86989b36983f (patch) | |
| tree | 7de453175d25983fca3ebd54f5520dd8c314cd65 /bin/crawl.php | |
| parent | 87670eb07d1b0e82e0a8b4c1f9b9d20e3cafdb42 (diff) | |
| download | phinde-a2e7177d78911d219bc5be86c1cc86989b36983f.tar.gz phinde-a2e7177d78911d219bc5be86c1cc86989b36983f.zip | |
debug option for crawler
Diffstat (limited to 'bin/crawl.php')
| -rwxr-xr-x | bin/crawl.php | 30 |
1 files changed, 26 insertions, 4 deletions
diff --git a/bin/crawl.php b/bin/crawl.php index e9a6218..0d57bb3 100755 --- a/bin/crawl.php +++ b/bin/crawl.php @@ -3,12 +3,33 @@ namespace phinde; require_once __DIR__ . '/../src/init.php'; -if ($argc < 2) { - echo "No URL given\n"; - exit(1); +$cc = new \Console_CommandLine(); +$cc->description = 'phinde URL crawler'; +$cc->version = '0.0.1'; +$cc->addOption( + 'showLinksOnly', + array( + 'short_name' => '-s', + 'long_name' => '--show-links', + 'description' => 'Only show which URLs were found', + 'action' => 'StoreTrue', + 'default' => false + ) +); +$cc->addArgument( + 'url', + array( + 'description' => 'URL to crawl', + 'multiple' => false + ) +); +try { + $res = $cc->parse(); +} catch (\Exception $e) { + $cc->displayError($e->getMessage()); } -$url = $argv[1]; +$url = $res->args['url']; $url = Helper::addSchema($url); if (!Helper::isUrlAllowed($url)) { echo "Domain is not allowed; not crawling\n"; @@ -17,6 +38,7 @@ if (!Helper::isUrlAllowed($url)) { try { $crawler = new Crawler(); + $crawler->setShowLinksOnly($res->options['showLinksOnly']); $crawler->crawl($url); } catch (\Exception $e) { echo $e->getMessage() . "\n"; |
