aboutsummaryrefslogtreecommitdiff
path: root/bin/crawl.php
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-02-11 08:43:01 +0100
committerChristian Weiske <cweiske@cweiske.de>2016-02-11 08:43:01 +0100
commita2e7177d78911d219bc5be86c1cc86989b36983f (patch)
tree7de453175d25983fca3ebd54f5520dd8c314cd65 /bin/crawl.php
parent87670eb07d1b0e82e0a8b4c1f9b9d20e3cafdb42 (diff)
downloadphinde-a2e7177d78911d219bc5be86c1cc86989b36983f.tar.gz
phinde-a2e7177d78911d219bc5be86c1cc86989b36983f.zip
debug option for crawler
Diffstat (limited to 'bin/crawl.php')
-rwxr-xr-xbin/crawl.php30
1 files changed, 26 insertions, 4 deletions
diff --git a/bin/crawl.php b/bin/crawl.php
index e9a6218..0d57bb3 100755
--- a/bin/crawl.php
+++ b/bin/crawl.php
@@ -3,12 +3,33 @@
namespace phinde;
require_once __DIR__ . '/../src/init.php';
-if ($argc < 2) {
- echo "No URL given\n";
- exit(1);
+$cc = new \Console_CommandLine();
+$cc->description = 'phinde URL crawler';
+$cc->version = '0.0.1';
+$cc->addOption(
+ 'showLinksOnly',
+ array(
+ 'short_name' => '-s',
+ 'long_name' => '--show-links',
+ 'description' => 'Only show which URLs were found',
+ 'action' => 'StoreTrue',
+ 'default' => false
+ )
+);
+$cc->addArgument(
+ 'url',
+ array(
+ 'description' => 'URL to crawl',
+ 'multiple' => false
+ )
+);
+try {
+ $res = $cc->parse();
+} catch (\Exception $e) {
+ $cc->displayError($e->getMessage());
}
-$url = $argv[1];
+$url = $res->args['url'];
$url = Helper::addSchema($url);
if (!Helper::isUrlAllowed($url)) {
echo "Domain is not allowed; not crawling\n";
@@ -17,6 +38,7 @@ if (!Helper::isUrlAllowed($url)) {
try {
$crawler = new Crawler();
+ $crawler->setShowLinksOnly($res->options['showLinksOnly']);
$crawler->crawl($url);
} catch (\Exception $e) {
echo $e->getMessage() . "\n";