From 31f0bc4f5a980b40ab8d6ebc6cf682e97f59f647 Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Wed, 9 Nov 2016 21:46:05 +0100 Subject: properly handle noindex pages --- src/phinde/Indexer.php | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'src/phinde/Indexer.php') diff --git a/src/phinde/Indexer.php b/src/phinde/Indexer.php index 98b52c3..2e40ba9 100644 --- a/src/phinde/Indexer.php +++ b/src/phinde/Indexer.php @@ -21,10 +21,8 @@ class Indexer } if ($esDoc === null) { - $esDoc = new \stdClass(); - } - if (!isset($esDoc->status)) { - $esDoc->status = new \stdClass(); + $esDoc = Helper::baseDoc($url); + $retrieved->esDoc = $esDoc; } //FIXME: update index only if changed since last index time @@ -52,8 +50,8 @@ class Indexer $robots = $meta->attributes->getNamedItem('content')->textContent; foreach (explode(',', $robots) as $value) { if (trim($value) == 'noindex') { - echo "URL does not want to be indexed: $url\n"; - exit(0); + $esDoc->status->findable = false; + return true; } } } @@ -188,7 +186,6 @@ class Indexer //var_dump($esDoc);die(); - $retrieved->esDoc = $esDoc; return true; } -- cgit v1.2.3