diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-02-11 08:43:01 +0100 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-02-11 08:43:01 +0100 |
| commit | a2e7177d78911d219bc5be86c1cc86989b36983f (patch) | |
| tree | 7de453175d25983fca3ebd54f5520dd8c314cd65 /src | |
| parent | 87670eb07d1b0e82e0a8b4c1f9b9d20e3cafdb42 (diff) | |
| download | phinde-a2e7177d78911d219bc5be86c1cc86989b36983f.tar.gz phinde-a2e7177d78911d219bc5be86c1cc86989b36983f.zip | |
debug option for crawler
Diffstat (limited to 'src')
| -rw-r--r-- | src/phinde/Crawler.php | 27 |
1 files changed, 26 insertions, 1 deletions
diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php index f3158aa..9b14878 100644 --- a/src/phinde/Crawler.php +++ b/src/phinde/Crawler.php @@ -6,6 +6,11 @@ class Crawler protected $es; protected $queue; + /** + * If the links only should be shown, not queued + */ + protected $showLinksOnly = false; + static $supportedIndexTypes = array( 'application/atom+xml' => '\\phinde\\LinkExtractor\\Atom', 'application/xhtml+xml' => '\\phinde\\LinkExtractor\\Html', @@ -22,7 +27,11 @@ class Crawler { $res = $this->fetch($url); $linkInfos = $this->extractLinks($res); - $this->enqueue($linkInfos); + if ($this->showLinksOnly) { + $this->showLinks($linkInfos); + } else { + $this->enqueue($linkInfos); + } } protected function fetch($url) @@ -70,5 +79,21 @@ class Crawler } } } + + protected function showLinks($linkInfos) + { + foreach ($linkInfos as $linkInfo) { + echo $linkInfo->url . "\n"; + if ($linkInfo->title) { + echo ' title: ' . $linkInfo->title . "\n"; + echo ' source: ' . $linkInfo->source . "\n"; + } + } + } + + public function setShowLinksOnly($showLinksOnly) + { + $this->showLinksOnly = $showLinksOnly; + } } ?> |
