X-Git-Url: https://git.cweiske.de/phinde.git/blobdiff_plain/31f0bc4f5a980b40ab8d6ebc6cf682e97f59f647..c32d1b6ffe81afb36fdcaebe0254ad191b72bff6:/src/phinde/Fetcher.php diff --git a/src/phinde/Fetcher.php b/src/phinde/Fetcher.php index 5ea0cf2..7cf11b7 100644 --- a/src/phinde/Fetcher.php +++ b/src/phinde/Fetcher.php @@ -15,12 +15,15 @@ class Fetcher */ public function fetch($url, $actions, $force = false) { + $url = Helper::rewriteUrl($url); + $esDoc = $this->es->get($url); if (isset($esDoc->status->location) && $esDoc->status->location != '' ) { //TODO: what if location redirects change? $url = $esDoc->status->location; + $url = Helper::rewriteUrl($url); $esDoc = $this->es->get($url); } @@ -43,7 +46,7 @@ class Fetcher $res = $req->send(); if ($res->getStatus() === 304) { //not modified since last time, so don't crawl again - echo "Not modified since last fetch\n"; + Log::info("Not modified since last fetch"); return false; } else if ($res->getStatus() !== 200) { throw new \Exception( @@ -53,6 +56,7 @@ class Fetcher } $effUrl = Helper::removeAnchor($res->getEffectiveUrl()); + $effUrl = Helper::rewriteUrl($effUrl); if ($effUrl != $url) { $this->storeRedirect($url, $effUrl); $url = $effUrl; @@ -79,7 +83,7 @@ class Fetcher public function storeDoc($url, $esDoc) { - echo "Store $url\n"; + Log::info("Store $url"); $esDoc->status->processed = gmdate('c'); $r = new Elasticsearch_Request( $GLOBALS['phinde']['elasticsearch'] . 'document/'