);
}
- $effUrl = $res->getEffectiveUrl();
+ $effUrl = Helper::removeAnchor($res->getEffectiveUrl());
if ($effUrl != $url) {
$this->storeRedirect($url, $effUrl);
$url = $effUrl;
protected function storeRedirect($url, $target)
{
- $esDoc = new \stdClass();
+ $esDoc = Helper::baseDoc($url);
$esDoc->status = (object) array(
- 'location' => $target
+ 'location' => $target,
+ 'findable' => false,
);
- $esDoc->url = $url;
$this->storeDoc($url, $esDoc);
}
$diff = microtime(true) - static::$timer[$timer];
echo '+timer: ' . number_format($diff, 3) . 'ms ' . $timer . "\n";
}
+
+ public static function baseDoc($url)
+ {
+ $esDoc = new \stdClass();
+ $esDoc->status = new \stdClass();
+ $esDoc->url = $url;
+ $esDoc->schemalessUrl = Helper::noSchema($url);
+ return $esDoc;
+ }
}
?>
}
if ($esDoc === null) {
- $esDoc = new \stdClass();
- }
- if (!isset($esDoc->status)) {
- $esDoc->status = new \stdClass();
+ $esDoc = Helper::baseDoc($url);
+ $retrieved->esDoc = $esDoc;
}
//FIXME: update index only if changed since last index time
$robots = $meta->attributes->getNamedItem('content')->textContent;
foreach (explode(',', $robots) as $value) {
if (trim($value) == 'noindex') {
- echo "URL does not want to be indexed: $url\n";
- exit(0);
+ $esDoc->status->findable = false;
+ return true;
}
}
}
//var_dump($esDoc);die();
- $retrieved->esDoc = $esDoc;
return true;
}