diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-11-07 21:41:36 +0100 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-11-07 21:41:36 +0100 |
| commit | d7651fd96dcfa2829519504e4c8ec1ce511cd57f (patch) | |
| tree | e24d7a9f90060b0fee5a652de43bd0627f1c5bde /bin/crawl.php | |
| parent | f90790c6b2a54c9b1c8a0aeaf1f23e6aa67d7aca (diff) | |
| download | phinde-d7651fd96dcfa2829519504e4c8ec1ce511cd57f.tar.gz phinde-d7651fd96dcfa2829519504e4c8ec1ce511cd57f.zip | |
Big patch merging crawling+indexing into one command, new json document structure
Diffstat (limited to 'bin/crawl.php')
| -rwxr-xr-x | bin/crawl.php | 47 |
1 files changed, 0 insertions, 47 deletions
diff --git a/bin/crawl.php b/bin/crawl.php deleted file mode 100755 index 0d57bb3..0000000 --- a/bin/crawl.php +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env php -<?php -namespace phinde; -require_once __DIR__ . '/../src/init.php'; - -$cc = new \Console_CommandLine(); -$cc->description = 'phinde URL crawler'; -$cc->version = '0.0.1'; -$cc->addOption( - 'showLinksOnly', - array( - 'short_name' => '-s', - 'long_name' => '--show-links', - 'description' => 'Only show which URLs were found', - 'action' => 'StoreTrue', - 'default' => false - ) -); -$cc->addArgument( - 'url', - array( - 'description' => 'URL to crawl', - 'multiple' => false - ) -); -try { - $res = $cc->parse(); -} catch (\Exception $e) { - $cc->displayError($e->getMessage()); -} - -$url = $res->args['url']; -$url = Helper::addSchema($url); -if (!Helper::isUrlAllowed($url)) { - echo "Domain is not allowed; not crawling\n"; - exit(2); -} - -try { - $crawler = new Crawler(); - $crawler->setShowLinksOnly($res->options['showLinksOnly']); - $crawler->crawl($url); -} catch (\Exception $e) { - echo $e->getMessage() . "\n"; - exit(10); -} -?>
\ No newline at end of file |
