aboutsummaryrefslogtreecommitdiff
path: root/bin/crawl.php
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-11-07 21:41:36 +0100
committerChristian Weiske <cweiske@cweiske.de>2016-11-07 21:41:36 +0100
commitd7651fd96dcfa2829519504e4c8ec1ce511cd57f (patch)
treee24d7a9f90060b0fee5a652de43bd0627f1c5bde /bin/crawl.php
parentf90790c6b2a54c9b1c8a0aeaf1f23e6aa67d7aca (diff)
downloadphinde-d7651fd96dcfa2829519504e4c8ec1ce511cd57f.tar.gz
phinde-d7651fd96dcfa2829519504e4c8ec1ce511cd57f.zip
Big patch merging crawling+indexing into one command, new json document structure
Diffstat (limited to 'bin/crawl.php')
-rwxr-xr-xbin/crawl.php47
1 files changed, 0 insertions, 47 deletions
diff --git a/bin/crawl.php b/bin/crawl.php
deleted file mode 100755
index 0d57bb3..0000000
--- a/bin/crawl.php
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env php
-<?php
-namespace phinde;
-require_once __DIR__ . '/../src/init.php';
-
-$cc = new \Console_CommandLine();
-$cc->description = 'phinde URL crawler';
-$cc->version = '0.0.1';
-$cc->addOption(
- 'showLinksOnly',
- array(
- 'short_name' => '-s',
- 'long_name' => '--show-links',
- 'description' => 'Only show which URLs were found',
- 'action' => 'StoreTrue',
- 'default' => false
- )
-);
-$cc->addArgument(
- 'url',
- array(
- 'description' => 'URL to crawl',
- 'multiple' => false
- )
-);
-try {
- $res = $cc->parse();
-} catch (\Exception $e) {
- $cc->displayError($e->getMessage());
-}
-
-$url = $res->args['url'];
-$url = Helper::addSchema($url);
-if (!Helper::isUrlAllowed($url)) {
- echo "Domain is not allowed; not crawling\n";
- exit(2);
-}
-
-try {
- $crawler = new Crawler();
- $crawler->setShowLinksOnly($res->options['showLinksOnly']);
- $crawler->crawl($url);
-} catch (\Exception $e) {
- echo $e->getMessage() . "\n";
- exit(10);
-}
-?> \ No newline at end of file