From a2e7177d78911d219bc5be86c1cc86989b36983f Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Thu, 11 Feb 2016 08:43:01 +0100 Subject: debug option for crawler --- src/phinde/Crawler.php | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'src/phinde') diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php index f3158aa..9b14878 100644 --- a/src/phinde/Crawler.php +++ b/src/phinde/Crawler.php @@ -6,6 +6,11 @@ class Crawler protected $es; protected $queue; + /** + * If the links only should be shown, not queued + */ + protected $showLinksOnly = false; + static $supportedIndexTypes = array( 'application/atom+xml' => '\\phinde\\LinkExtractor\\Atom', 'application/xhtml+xml' => '\\phinde\\LinkExtractor\\Html', @@ -22,7 +27,11 @@ class Crawler { $res = $this->fetch($url); $linkInfos = $this->extractLinks($res); - $this->enqueue($linkInfos); + if ($this->showLinksOnly) { + $this->showLinks($linkInfos); + } else { + $this->enqueue($linkInfos); + } } protected function fetch($url) @@ -70,5 +79,21 @@ class Crawler } } } + + protected function showLinks($linkInfos) + { + foreach ($linkInfos as $linkInfo) { + echo $linkInfo->url . "\n"; + if ($linkInfo->title) { + echo ' title: ' . $linkInfo->title . "\n"; + echo ' source: ' . $linkInfo->source . "\n"; + } + } + } + + public function setShowLinksOnly($showLinksOnly) + { + $this->showLinksOnly = $showLinksOnly; + } } ?> -- cgit v1.2.3