aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/phinde/Crawler.php27
1 files changed, 26 insertions, 1 deletions
diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php
index f3158aa..9b14878 100644
--- a/src/phinde/Crawler.php
+++ b/src/phinde/Crawler.php
@@ -6,6 +6,11 @@ class Crawler
protected $es;
protected $queue;
+ /**
+ * If the links only should be shown, not queued
+ */
+ protected $showLinksOnly = false;
+
static $supportedIndexTypes = array(
'application/atom+xml' => '\\phinde\\LinkExtractor\\Atom',
'application/xhtml+xml' => '\\phinde\\LinkExtractor\\Html',
@@ -22,7 +27,11 @@ class Crawler
{
$res = $this->fetch($url);
$linkInfos = $this->extractLinks($res);
- $this->enqueue($linkInfos);
+ if ($this->showLinksOnly) {
+ $this->showLinks($linkInfos);
+ } else {
+ $this->enqueue($linkInfos);
+ }
}
protected function fetch($url)
@@ -70,5 +79,21 @@ class Crawler
}
}
}
+
+ protected function showLinks($linkInfos)
+ {
+ foreach ($linkInfos as $linkInfo) {
+ echo $linkInfo->url . "\n";
+ if ($linkInfo->title) {
+ echo ' title: ' . $linkInfo->title . "\n";
+ echo ' source: ' . $linkInfo->source . "\n";
+ }
+ }
+ }
+
+ public function setShowLinksOnly($showLinksOnly)
+ {
+ $this->showLinksOnly = $showLinksOnly;
+ }
}
?>