diff options
Diffstat (limited to 'bin')
| -rwxr-xr-x | bin/crawl.php | 11 | ||||
| -rwxr-xr-x | bin/index.php | 9 |
2 files changed, 4 insertions, 16 deletions
diff --git a/bin/crawl.php b/bin/crawl.php index 26cf994..17b1fc3 100755 --- a/bin/crawl.php +++ b/bin/crawl.php @@ -1,14 +1,7 @@ #!/usr/bin/env php <?php namespace phinde; - -set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path()); -require_once __DIR__ . '/../data/config.php'; -require_once 'HTTP/Request2.php'; -require_once 'Elasticsearch.php'; -require_once 'Elasticsearch/Request.php'; -require_once 'Net/URL2.php'; -require_once 'functions.php'; +require_once __DIR__ . '/../src/init.php'; $supportedCrawlTypes = array( 'text/html', 'application/xhtml+xml' @@ -23,7 +16,7 @@ if ($argc < 2) { $es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']); $url = $argv[1]; -if (!isUrlAllowed($url)) { +if (!Helper::isUrlAllowed($url)) { echo "Domain is not allowed; not crawling\n"; exit(2); } diff --git a/bin/index.php b/bin/index.php index 6a13afd..eb82df2 100755 --- a/bin/index.php +++ b/bin/index.php @@ -2,12 +2,7 @@ <?php namespace phinde; // index a given URL -set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path()); -require_once __DIR__ . '/../data/config.php'; -require_once 'HTTP/Request2.php'; -require_once 'Elasticsearch.php'; -require_once 'Elasticsearch/Request.php'; -require_once 'functions.php'; +require_once __DIR__ . '/../src/init.php'; $supportedIndexTypes = array( 'application/xhtml+xml', @@ -143,7 +138,7 @@ if (count($arSxdates)) { //language //there may be "en-US" and "de-DE" -$indexDoc->language = substr((string) $sx['lang'], 0, 2); +$indexDoc->language = strtolower(substr((string) $sx['lang'], 0, 2)); //FIXME: fallback, autodetection //FIXME: check noindex |
