From: Christian Weiske Date: Thu, 10 Nov 2016 19:52:35 +0000 (+0100) Subject: add log class X-Git-Tag: v0.2.0~12 X-Git-Url: https://git.cweiske.de/phinde.git/commitdiff_plain/696fcd7ce4d495e356667019493bf312e2a6e47b add log class --- diff --git a/bin/phinde-worker.php b/bin/phinde-worker.php index 9f8c35a..9b65e32 100755 --- a/bin/phinde-worker.php +++ b/bin/phinde-worker.php @@ -13,9 +13,10 @@ $gmworker->addFunction( $GLOBALS['phinde']['queuePrefix'] . 'phinde_process', function(\GearmanJob $job) { $data = unserialize($job->workload()); - echo "-- Processing " . $data['url'] + Log::info( + "-- Processing " . $data['url'] . ' (' . implode(',', $data['actions']) . ')' - . "\n"; + ); passthru( './process.php ' . escapeshellarg($data['url']) . ' ' . implode(' ', $data['actions']) @@ -26,7 +27,7 @@ $gmworker->addFunction( $gmworker->addFunction( $GLOBALS['phinde']['queuePrefix'] . 'phinde_quit', function(\GearmanJob $job) { - echo "Got exit job\n"; + Log::info('Got exit job'); $job->sendComplete(''); exit(0); } @@ -34,7 +35,7 @@ $gmworker->addFunction( while ($gmworker->work()) { if ($gmworker->returnCode() != GEARMAN_SUCCESS) { - echo 'Error running job: ' . $gmworker->returnCode() . "\n"; + Log::error('Error running job: ' . $gmworker->returnCode()); break; } } diff --git a/bin/process.php b/bin/process.php index ababb03..1bae7c4 100755 --- a/bin/process.php +++ b/bin/process.php @@ -54,7 +54,7 @@ $url = Helper::addSchema($url); $urlObj = new \Net_URL2($url); $url = $urlObj->getNormalizedURL(); if (!Helper::isUrlAllowed($url)) { - echo "Domain is not allowed; not crawling\n"; + Log::error("Domain is not allowed; not crawling"); exit(2); } @@ -78,7 +78,7 @@ try { $update = false; foreach ($actions as $key => $action) { - echo "step: $key\n"; + Log::info("step: $key"); $update |= $action->run($retrieved); } @@ -86,10 +86,10 @@ try { //FIXME: update index if it exists already $fetcher->storeDoc($retrieved->url, $retrieved->esDoc); } else { - echo "Not updating\n"; + Log::info("Not updating"); } } catch (\Exception $e) { - echo $e->getMessage() . "\n"; + Log::error($e->getMessage()); exit(10); } ?> \ No newline at end of file diff --git a/bin/setup.php b/bin/setup.php index 27c5c46..ba97493 100755 --- a/bin/setup.php +++ b/bin/setup.php @@ -9,7 +9,7 @@ require_once __DIR__ . '/../src/init.php'; $json = file_get_contents(__DIR__ . '/../data/elasticsearch-mapping.json'); if (json_decode($json) === null) { - echo "Error: Schema JSON is broken\n"; + Log::error("Error: Schema JSON is broken"); chdir(__DIR__ . '/../'); passthru('json_pp -t null < data/elasticsearch-mapping.json'); exit(1); diff --git a/data/config.php.dist b/data/config.php.dist index 5c095ca..9c432f9 100644 --- a/data/config.php.dist +++ b/data/config.php.dist @@ -19,6 +19,8 @@ $GLOBALS['phinde'] = array( 'subscriptions' => array( 'http://www.example.org/feed', ), + //verbose output + 'debug' => true, //time in seconds after which URLs may be re-indexed 'refreshtime' => 86400, //if directly linked URLs shall be indexed, even if they are diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php index 38e3c3f..1f63e60 100644 --- a/src/phinde/Crawler.php +++ b/src/phinde/Crawler.php @@ -40,7 +40,7 @@ class Crawler { $mimetype = explode(';', $res->getHeader('content-type'))[0]; if (!isset(static::$supportedTypes[$mimetype])) { - echo "MIME type not supported for indexing: $mimetype\n"; + Log::info("MIME type not supported for crawling: $mimetype"); return array(); } @@ -98,13 +98,15 @@ class Crawler protected function showLinks($linkInfos) { foreach ($linkInfos as $linkInfo) { - echo $linkInfo->url . "\n"; + Log::msg($linkInfo->url); if ($linkInfo->title) { - echo ' title: ' . $linkInfo->title . "\n"; - echo ' source: ' . $linkInfo->source . "\n"; - echo ' known: ' . intval($linkInfo->known) + Log::msg(' title: ' . $linkInfo->title); + Log::msg(' source: ' . $linkInfo->source); + Log::msg( + ' known: ' . intval($linkInfo->known) . ', crawl: ' . intval($linkInfo->crawl) - . ', index: ' . intval($linkInfo->index) . "\n"; + . ', index: ' . intval($linkInfo->index) + ); } } } diff --git a/src/phinde/Fetcher.php b/src/phinde/Fetcher.php index 5ea0cf2..dccb118 100644 --- a/src/phinde/Fetcher.php +++ b/src/phinde/Fetcher.php @@ -43,7 +43,7 @@ class Fetcher $res = $req->send(); if ($res->getStatus() === 304) { //not modified since last time, so don't crawl again - echo "Not modified since last fetch\n"; + Log::info("Not modified since last fetch"); return false; } else if ($res->getStatus() !== 200) { throw new \Exception( @@ -79,7 +79,7 @@ class Fetcher public function storeDoc($url, $esDoc) { - echo "Store $url\n"; + Log::info("Store $url"); $esDoc->status->processed = gmdate('c'); $r = new Elasticsearch_Request( $GLOBALS['phinde']['elasticsearch'] . 'document/' diff --git a/src/phinde/Indexer.php b/src/phinde/Indexer.php index 2e40ba9..bdd5236 100644 --- a/src/phinde/Indexer.php +++ b/src/phinde/Indexer.php @@ -16,7 +16,7 @@ class Indexer $mimetype = explode(';', $res->getHeader('content-type'))[0]; if (!in_array($mimetype, static::$supportedTypes)) { - echo "MIME type not supported for indexing: $mimetype\n"; + Log::info("MIME type not supported for indexing: $mimetype"); return false; } diff --git a/src/phinde/Log.php b/src/phinde/Log.php new file mode 100644 index 0000000..2369a2b --- /dev/null +++ b/src/phinde/Log.php @@ -0,0 +1,23 @@ + diff --git a/src/phinde/Queue.php b/src/phinde/Queue.php index 6c30faa..a58a257 100644 --- a/src/phinde/Queue.php +++ b/src/phinde/Queue.php @@ -13,9 +13,11 @@ class Queue public function addToProcessList($linkUrl, $actions) { - echo "Queuing for processing: $linkUrl" + Log::info( + "Queuing for processing: $linkUrl" . ' (' . implode(',', $actions) . ')' - . "\n"; + ); + $this->gmclient->doBackground( $GLOBALS['phinde']['queuePrefix'] . 'phinde_process', serialize( @@ -26,9 +28,11 @@ class Queue ) ); if ($this->gmclient->returnCode() != GEARMAN_SUCCESS) { - echo 'Error queueing URL processing for ' + Log::error( + 'Error queueing URL processing for ' . $linkUrl . "\n" - . 'Error code: ' . $this->gmclient->returnCode() . "\n"; + . 'Error code: ' . $this->gmclient->returnCode() + ); exit(2); } }