*/
public function fetch($url, $actions, $force = false)
{
+ $url = Helper::rewriteUrl($url);
+
$esDoc = $this->es->get($url);
if (isset($esDoc->status->location)
&& $esDoc->status->location != ''
) {
//TODO: what if location redirects change?
$url = $esDoc->status->location;
+ $url = Helper::rewriteUrl($url);
$esDoc = $this->es->get($url);
}
$res = $req->send();
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
- echo "Not modified since last fetch\n";
+ Log::info("Not modified since last fetch");
return false;
} else if ($res->getStatus() !== 200) {
throw new \Exception(
}
$effUrl = Helper::removeAnchor($res->getEffectiveUrl());
+ $effUrl = Helper::rewriteUrl($effUrl);
if ($effUrl != $url) {
$this->storeRedirect($url, $effUrl);
$url = $effUrl;
public function storeDoc($url, $esDoc)
{
- echo "Store $url\n";
+ Log::info("Store $url");
$esDoc->status->processed = gmdate('c');
$r = new Elasticsearch_Request(
$GLOBALS['phinde']['elasticsearch'] . 'document/'