X-Git-Url: https://git.cweiske.de/phinde.git/blobdiff_plain/d7651fd96dcfa2829519504e4c8ec1ce511cd57f..d3cdabcac7feb8c62451ac12a22256c0eff16873:/src/phinde/Crawler.php diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php index 38e3c3f..4d596b4 100644 --- a/src/phinde/Crawler.php +++ b/src/phinde/Crawler.php @@ -40,7 +40,7 @@ class Crawler { $mimetype = explode(';', $res->getHeader('content-type'))[0]; if (!isset(static::$supportedTypes[$mimetype])) { - echo "MIME type not supported for indexing: $mimetype\n"; + Log::info("MIME type not supported for crawling: $mimetype"); return array(); } @@ -53,6 +53,7 @@ class Crawler { $filteredLinkInfos = array(); foreach ($linkInfos as $linkInfo) { + $linkInfo->url = Helper::rewriteUrl($linkInfo->url); $allowed = Helper::isUrlAllowed($linkInfo->url); $crawl = $allowed; $index = $GLOBALS['phinde']['indexNonAllowed'] || $allowed; @@ -98,13 +99,15 @@ class Crawler protected function showLinks($linkInfos) { foreach ($linkInfos as $linkInfo) { - echo $linkInfo->url . "\n"; + Log::msg($linkInfo->url); if ($linkInfo->title) { - echo ' title: ' . $linkInfo->title . "\n"; - echo ' source: ' . $linkInfo->source . "\n"; - echo ' known: ' . intval($linkInfo->known) + Log::msg(' title: ' . $linkInfo->title); + Log::msg(' source: ' . $linkInfo->source); + Log::msg( + ' known: ' . intval($linkInfo->known) . ', crawl: ' . intval($linkInfo->crawl) - . ', index: ' . intval($linkInfo->index) . "\n"; + . ', index: ' . intval($linkInfo->index) + ); } } }