X-Git-Url: https://git.cweiske.de/phinde.git/blobdiff_plain/d7651fd96dcfa2829519504e4c8ec1ce511cd57f..c32d1b6ffe81afb36fdcaebe0254ad191b72bff6:/src/phinde/Fetcher.php diff --git a/src/phinde/Fetcher.php b/src/phinde/Fetcher.php index b5644af..7cf11b7 100644 --- a/src/phinde/Fetcher.php +++ b/src/phinde/Fetcher.php @@ -15,12 +15,15 @@ class Fetcher */ public function fetch($url, $actions, $force = false) { + $url = Helper::rewriteUrl($url); + $esDoc = $this->es->get($url); if (isset($esDoc->status->location) && $esDoc->status->location != '' ) { //TODO: what if location redirects change? $url = $esDoc->status->location; + $url = Helper::rewriteUrl($url); $esDoc = $this->es->get($url); } @@ -43,7 +46,7 @@ class Fetcher $res = $req->send(); if ($res->getStatus() === 304) { //not modified since last time, so don't crawl again - echo "Not modified since last fetch\n"; + Log::info("Not modified since last fetch"); return false; } else if ($res->getStatus() !== 200) { throw new \Exception( @@ -52,7 +55,8 @@ class Fetcher ); } - $effUrl = $res->getEffectiveUrl(); + $effUrl = Helper::removeAnchor($res->getEffectiveUrl()); + $effUrl = Helper::rewriteUrl($effUrl); if ($effUrl != $url) { $this->storeRedirect($url, $effUrl); $url = $effUrl; @@ -69,17 +73,17 @@ class Fetcher protected function storeRedirect($url, $target) { - $esDoc = new \stdClass(); + $esDoc = Helper::baseDoc($url); $esDoc->status = (object) array( - 'location' => $target + 'location' => $target, + 'findable' => false, ); - $esDoc->url = $url; $this->storeDoc($url, $esDoc); } public function storeDoc($url, $esDoc) { - echo "Store $url\n"; + Log::info("Store $url"); $esDoc->status->processed = gmdate('c'); $r = new Elasticsearch_Request( $GLOBALS['phinde']['elasticsearch'] . 'document/'