aboutsummaryrefslogtreecommitdiff
path: root/src/phinde
diff options
context:
space:
mode:
Diffstat (limited to 'src/phinde')
-rw-r--r--src/phinde/Crawler.php14
-rw-r--r--src/phinde/Fetcher.php4
-rw-r--r--src/phinde/Indexer.php2
-rw-r--r--src/phinde/Log.php23
-rw-r--r--src/phinde/Queue.php12
5 files changed, 42 insertions, 13 deletions
diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php
index 38e3c3f..1f63e60 100644
--- a/src/phinde/Crawler.php
+++ b/src/phinde/Crawler.php
@@ -40,7 +40,7 @@ class Crawler
{
$mimetype = explode(';', $res->getHeader('content-type'))[0];
if (!isset(static::$supportedTypes[$mimetype])) {
- echo "MIME type not supported for indexing: $mimetype\n";
+ Log::info("MIME type not supported for crawling: $mimetype");
return array();
}
@@ -98,13 +98,15 @@ class Crawler
protected function showLinks($linkInfos)
{
foreach ($linkInfos as $linkInfo) {
- echo $linkInfo->url . "\n";
+ Log::msg($linkInfo->url);
if ($linkInfo->title) {
- echo ' title: ' . $linkInfo->title . "\n";
- echo ' source: ' . $linkInfo->source . "\n";
- echo ' known: ' . intval($linkInfo->known)
+ Log::msg(' title: ' . $linkInfo->title);
+ Log::msg(' source: ' . $linkInfo->source);
+ Log::msg(
+ ' known: ' . intval($linkInfo->known)
. ', crawl: ' . intval($linkInfo->crawl)
- . ', index: ' . intval($linkInfo->index) . "\n";
+ . ', index: ' . intval($linkInfo->index)
+ );
}
}
}
diff --git a/src/phinde/Fetcher.php b/src/phinde/Fetcher.php
index 5ea0cf2..dccb118 100644
--- a/src/phinde/Fetcher.php
+++ b/src/phinde/Fetcher.php
@@ -43,7 +43,7 @@ class Fetcher
$res = $req->send();
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
- echo "Not modified since last fetch\n";
+ Log::info("Not modified since last fetch");
return false;
} else if ($res->getStatus() !== 200) {
throw new \Exception(
@@ -79,7 +79,7 @@ class Fetcher
public function storeDoc($url, $esDoc)
{
- echo "Store $url\n";
+ Log::info("Store $url");
$esDoc->status->processed = gmdate('c');
$r = new Elasticsearch_Request(
$GLOBALS['phinde']['elasticsearch'] . 'document/'
diff --git a/src/phinde/Indexer.php b/src/phinde/Indexer.php
index 2e40ba9..bdd5236 100644
--- a/src/phinde/Indexer.php
+++ b/src/phinde/Indexer.php
@@ -16,7 +16,7 @@ class Indexer
$mimetype = explode(';', $res->getHeader('content-type'))[0];
if (!in_array($mimetype, static::$supportedTypes)) {
- echo "MIME type not supported for indexing: $mimetype\n";
+ Log::info("MIME type not supported for indexing: $mimetype");
return false;
}
diff --git a/src/phinde/Log.php b/src/phinde/Log.php
new file mode 100644
index 0000000..2369a2b
--- /dev/null
+++ b/src/phinde/Log.php
@@ -0,0 +1,23 @@
+<?php
+namespace phinde;
+
+class Log
+{
+ public static function error($msg)
+ {
+ static::log($msg);
+ }
+
+ public static function info($msg)
+ {
+ if ($GLOBALS['phinde']['debug']) {
+ static::log($msg);
+ }
+ }
+
+ public static function log($msg)
+ {
+ echo $msg . "\n";
+ }
+}
+?>
diff --git a/src/phinde/Queue.php b/src/phinde/Queue.php
index 6c30faa..a58a257 100644
--- a/src/phinde/Queue.php
+++ b/src/phinde/Queue.php
@@ -13,9 +13,11 @@ class Queue
public function addToProcessList($linkUrl, $actions)
{
- echo "Queuing for processing: $linkUrl"
+ Log::info(
+ "Queuing for processing: $linkUrl"
. ' (' . implode(',', $actions) . ')'
- . "\n";
+ );
+
$this->gmclient->doBackground(
$GLOBALS['phinde']['queuePrefix'] . 'phinde_process',
serialize(
@@ -26,9 +28,11 @@ class Queue
)
);
if ($this->gmclient->returnCode() != GEARMAN_SUCCESS) {
- echo 'Error queueing URL processing for '
+ Log::error(
+ 'Error queueing URL processing for '
. $linkUrl . "\n"
- . 'Error code: ' . $this->gmclient->returnCode() . "\n";
+ . 'Error code: ' . $this->gmclient->returnCode()
+ );
exit(2);
}
}