aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-11-10 20:52:35 +0100
committerChristian Weiske <cweiske@cweiske.de>2016-11-10 20:52:35 +0100
commit696fcd7ce4d495e356667019493bf312e2a6e47b (patch)
tree3e8d60dad08dbe31660f2532d8ce893d35d50130
parent9ff01ebe8cde0a07ad2a480d1a45eb1a29cb113d (diff)
downloadphinde-696fcd7ce4d495e356667019493bf312e2a6e47b.tar.gz
phinde-696fcd7ce4d495e356667019493bf312e2a6e47b.zip
add log class
-rwxr-xr-xbin/phinde-worker.php9
-rwxr-xr-xbin/process.php8
-rwxr-xr-xbin/setup.php2
-rw-r--r--data/config.php.dist2
-rw-r--r--src/phinde/Crawler.php14
-rw-r--r--src/phinde/Fetcher.php4
-rw-r--r--src/phinde/Indexer.php2
-rw-r--r--src/phinde/Log.php23
-rw-r--r--src/phinde/Queue.php12
9 files changed, 54 insertions, 22 deletions
diff --git a/bin/phinde-worker.php b/bin/phinde-worker.php
index 9f8c35a..9b65e32 100755
--- a/bin/phinde-worker.php
+++ b/bin/phinde-worker.php
@@ -13,9 +13,10 @@ $gmworker->addFunction(
$GLOBALS['phinde']['queuePrefix'] . 'phinde_process',
function(\GearmanJob $job) {
$data = unserialize($job->workload());
- echo "-- Processing " . $data['url']
+ Log::info(
+ "-- Processing " . $data['url']
. ' (' . implode(',', $data['actions']) . ')'
- . "\n";
+ );
passthru(
'./process.php ' . escapeshellarg($data['url'])
. ' ' . implode(' ', $data['actions'])
@@ -26,7 +27,7 @@ $gmworker->addFunction(
$gmworker->addFunction(
$GLOBALS['phinde']['queuePrefix'] . 'phinde_quit',
function(\GearmanJob $job) {
- echo "Got exit job\n";
+ Log::info('Got exit job');
$job->sendComplete('');
exit(0);
}
@@ -34,7 +35,7 @@ $gmworker->addFunction(
while ($gmworker->work()) {
if ($gmworker->returnCode() != GEARMAN_SUCCESS) {
- echo 'Error running job: ' . $gmworker->returnCode() . "\n";
+ Log::error('Error running job: ' . $gmworker->returnCode());
break;
}
}
diff --git a/bin/process.php b/bin/process.php
index ababb03..1bae7c4 100755
--- a/bin/process.php
+++ b/bin/process.php
@@ -54,7 +54,7 @@ $url = Helper::addSchema($url);
$urlObj = new \Net_URL2($url);
$url = $urlObj->getNormalizedURL();
if (!Helper::isUrlAllowed($url)) {
- echo "Domain is not allowed; not crawling\n";
+ Log::error("Domain is not allowed; not crawling");
exit(2);
}
@@ -78,7 +78,7 @@ try {
$update = false;
foreach ($actions as $key => $action) {
- echo "step: $key\n";
+ Log::info("step: $key");
$update |= $action->run($retrieved);
}
@@ -86,10 +86,10 @@ try {
//FIXME: update index if it exists already
$fetcher->storeDoc($retrieved->url, $retrieved->esDoc);
} else {
- echo "Not updating\n";
+ Log::info("Not updating");
}
} catch (\Exception $e) {
- echo $e->getMessage() . "\n";
+ Log::error($e->getMessage());
exit(10);
}
?> \ No newline at end of file
diff --git a/bin/setup.php b/bin/setup.php
index 27c5c46..ba97493 100755
--- a/bin/setup.php
+++ b/bin/setup.php
@@ -9,7 +9,7 @@ require_once __DIR__ . '/../src/init.php';
$json = file_get_contents(__DIR__ . '/../data/elasticsearch-mapping.json');
if (json_decode($json) === null) {
- echo "Error: Schema JSON is broken\n";
+ Log::error("Error: Schema JSON is broken");
chdir(__DIR__ . '/../');
passthru('json_pp -t null < data/elasticsearch-mapping.json');
exit(1);
diff --git a/data/config.php.dist b/data/config.php.dist
index 5c095ca..9c432f9 100644
--- a/data/config.php.dist
+++ b/data/config.php.dist
@@ -19,6 +19,8 @@ $GLOBALS['phinde'] = array(
'subscriptions' => array(
'http://www.example.org/feed',
),
+ //verbose output
+ 'debug' => true,
//time in seconds after which URLs may be re-indexed
'refreshtime' => 86400,
//if directly linked URLs shall be indexed, even if they are
diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php
index 38e3c3f..1f63e60 100644
--- a/src/phinde/Crawler.php
+++ b/src/phinde/Crawler.php
@@ -40,7 +40,7 @@ class Crawler
{
$mimetype = explode(';', $res->getHeader('content-type'))[0];
if (!isset(static::$supportedTypes[$mimetype])) {
- echo "MIME type not supported for indexing: $mimetype\n";
+ Log::info("MIME type not supported for crawling: $mimetype");
return array();
}
@@ -98,13 +98,15 @@ class Crawler
protected function showLinks($linkInfos)
{
foreach ($linkInfos as $linkInfo) {
- echo $linkInfo->url . "\n";
+ Log::msg($linkInfo->url);
if ($linkInfo->title) {
- echo ' title: ' . $linkInfo->title . "\n";
- echo ' source: ' . $linkInfo->source . "\n";
- echo ' known: ' . intval($linkInfo->known)
+ Log::msg(' title: ' . $linkInfo->title);
+ Log::msg(' source: ' . $linkInfo->source);
+ Log::msg(
+ ' known: ' . intval($linkInfo->known)
. ', crawl: ' . intval($linkInfo->crawl)
- . ', index: ' . intval($linkInfo->index) . "\n";
+ . ', index: ' . intval($linkInfo->index)
+ );
}
}
}
diff --git a/src/phinde/Fetcher.php b/src/phinde/Fetcher.php
index 5ea0cf2..dccb118 100644
--- a/src/phinde/Fetcher.php
+++ b/src/phinde/Fetcher.php
@@ -43,7 +43,7 @@ class Fetcher
$res = $req->send();
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
- echo "Not modified since last fetch\n";
+ Log::info("Not modified since last fetch");
return false;
} else if ($res->getStatus() !== 200) {
throw new \Exception(
@@ -79,7 +79,7 @@ class Fetcher
public function storeDoc($url, $esDoc)
{
- echo "Store $url\n";
+ Log::info("Store $url");
$esDoc->status->processed = gmdate('c');
$r = new Elasticsearch_Request(
$GLOBALS['phinde']['elasticsearch'] . 'document/'
diff --git a/src/phinde/Indexer.php b/src/phinde/Indexer.php
index 2e40ba9..bdd5236 100644
--- a/src/phinde/Indexer.php
+++ b/src/phinde/Indexer.php
@@ -16,7 +16,7 @@ class Indexer
$mimetype = explode(';', $res->getHeader('content-type'))[0];
if (!in_array($mimetype, static::$supportedTypes)) {
- echo "MIME type not supported for indexing: $mimetype\n";
+ Log::info("MIME type not supported for indexing: $mimetype");
return false;
}
diff --git a/src/phinde/Log.php b/src/phinde/Log.php
new file mode 100644
index 0000000..2369a2b
--- /dev/null
+++ b/src/phinde/Log.php
@@ -0,0 +1,23 @@
+<?php
+namespace phinde;
+
+class Log
+{
+ public static function error($msg)
+ {
+ static::log($msg);
+ }
+
+ public static function info($msg)
+ {
+ if ($GLOBALS['phinde']['debug']) {
+ static::log($msg);
+ }
+ }
+
+ public static function log($msg)
+ {
+ echo $msg . "\n";
+ }
+}
+?>
diff --git a/src/phinde/Queue.php b/src/phinde/Queue.php
index 6c30faa..a58a257 100644
--- a/src/phinde/Queue.php
+++ b/src/phinde/Queue.php
@@ -13,9 +13,11 @@ class Queue
public function addToProcessList($linkUrl, $actions)
{
- echo "Queuing for processing: $linkUrl"
+ Log::info(
+ "Queuing for processing: $linkUrl"
. ' (' . implode(',', $actions) . ')'
- . "\n";
+ );
+
$this->gmclient->doBackground(
$GLOBALS['phinde']['queuePrefix'] . 'phinde_process',
serialize(
@@ -26,9 +28,11 @@ class Queue
)
);
if ($this->gmclient->returnCode() != GEARMAN_SUCCESS) {
- echo 'Error queueing URL processing for '
+ Log::error(
+ 'Error queueing URL processing for '
. $linkUrl . "\n"
- . 'Error code: ' . $this->gmclient->returnCode() . "\n";
+ . 'Error code: ' . $this->gmclient->returnCode()
+ );
exit(2);
}
}