From cd02bac646f42a0cb402ff2dc8240aa01f1f0fb8 Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Wed, 10 Feb 2016 14:56:20 +0100 Subject: rework crawler; add atom link extraction --- src/phinde/Queue.php | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/phinde/Queue.php (limited to 'src/phinde/Queue.php') diff --git a/src/phinde/Queue.php b/src/phinde/Queue.php new file mode 100644 index 0000000..98f6462 --- /dev/null +++ b/src/phinde/Queue.php @@ -0,0 +1,54 @@ +gmclient = new \GearmanClient(); + $this->gmclient->addServer('127.0.0.1'); + } + + public function addToIndex($linkUrl, $linkTitle, $sourceUrl) + { + echo "Queuing for indexing: $linkUrl\n"; + $this->gmclient->doBackground( + 'phinde_index', + serialize( + array( + 'url' => $linkUrl, + 'title' => $linkTitle, + 'source' => $sourceUrl + ) + ) + ); + if ($this->gmclient->returnCode() != GEARMAN_SUCCESS) { + echo 'Error queueing URL indexing for ' + . $linkUrl . "\n" + . 'Error code: ' . $this->gmclient->returnCode() . "\n"; + exit(2); + } + } + + public function addToCrawl($linkUrl) + { + echo "Queuing for crawling: $linkUrl\n"; + $this->gmclient->doBackground( + 'phinde_crawl', + serialize( + array( + 'url' => $linkUrl + ) + ) + ); + if ($this->gmclient->returnCode() != GEARMAN_SUCCESS) { + echo 'Error queueing URL crawling for ' + . $linkUrl . "\n" + . 'Error code: ' . $this->gmclient->returnCode() . "\n"; + exit(2); + } + } +} +?> -- cgit v1.2.3