$res = $req->send();
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
- echo "Not modified since last fetch\n";
+ Log::info("Not modified since last fetch");
return false;
} else if ($res->getStatus() !== 200) {
throw new \Exception(
);
}
- $effUrl = $res->getEffectiveUrl();
+ $effUrl = Helper::removeAnchor($res->getEffectiveUrl());
if ($effUrl != $url) {
$this->storeRedirect($url, $effUrl);
$url = $effUrl;
protected function storeRedirect($url, $target)
{
- $esDoc = new \stdClass();
+ $esDoc = Helper::baseDoc($url);
$esDoc->status = (object) array(
- 'location' => $target
+ 'location' => $target,
+ 'findable' => false,
);
- $esDoc->url = $url;
$this->storeDoc($url, $esDoc);
}
public function storeDoc($url, $esDoc)
{
- echo "Store $url\n";
+ Log::info("Store $url");
$esDoc->status->processed = gmdate('c');
$r = new Elasticsearch_Request(
$GLOBALS['phinde']['elasticsearch'] . 'document/'