diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-02-03 22:37:15 +0100 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-02-03 22:37:15 +0100 |
| commit | f156497be5637d7815ae57370e8b175ce6960a19 (patch) | |
| tree | 6363a52913e006e90b223f17f6aa39206f153ffd /bin | |
| parent | 915b66fe6ca517610a41acec0a71597e7cee0807 (diff) | |
| download | phinde-f156497be5637d7815ae57370e8b175ce6960a19.tar.gz phinde-f156497be5637d7815ae57370e8b175ce6960a19.zip | |
fix indexing, boost config
Diffstat (limited to 'bin')
| -rwxr-xr-x | bin/crawl.php | 2 | ||||
| -rwxr-xr-x | bin/index.php | 10 | ||||
| -rwxr-xr-x | bin/setup.php | 5 |
3 files changed, 6 insertions, 11 deletions
diff --git a/bin/crawl.php b/bin/crawl.php index 17b1fc3..e39a622 100755 --- a/bin/crawl.php +++ b/bin/crawl.php @@ -86,7 +86,7 @@ foreach ($links as $link) { //var_dump($linkTitle, $linkUrl); $es->markQueued($linkUrl); addToIndex($linkUrl, $linkTitle, $url); - if (isUrlAllowed($linkUrl)) { + if (Helper::isUrlAllowed($linkUrl)) { addToCrawl($linkUrl); } $alreadySeen[$linkUrl] = true; diff --git a/bin/index.php b/bin/index.php index c6de5a9..374923c 100755 --- a/bin/index.php +++ b/bin/index.php @@ -182,12 +182,10 @@ if ($arXpdates->length) { //language //there may be "en-US" and "de-DE" -$indexDoc->language = strtolower( - substr( - $doc->documentElement->attributes->getNamedItem('lang')->textContent, - 0, 2 - ) -); +$xlang = $doc->documentElement->attributes->getNamedItem('lang'); +if ($xlang) { + $indexDoc->language = strtolower(substr($xlang->textContent, 0, 2)); +} //FIXME: fallback, autodetection //FIXME: check noindex diff --git a/bin/setup.php b/bin/setup.php index 7dacedd..1e6c66d 100755 --- a/bin/setup.php +++ b/bin/setup.php @@ -2,10 +2,7 @@ <?php namespace phinde; //configure the elasticsearch index -set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path()); -require_once __DIR__ . '/../data/config.php'; -require_once 'HTTP/Request2.php'; -require_once 'Elasticsearch/Request.php'; +require_once __DIR__ . '/../src/init.php'; //delete old index $r = new Elasticsearch_Request( |
