aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/phinde/Helper.php11
-rw-r--r--src/phinde/LinkExtractor/Html.php5
2 files changed, 14 insertions, 2 deletions
diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php
index 40ea751..312c5e5 100644
--- a/src/phinde/Helper.php
+++ b/src/phinde/Helper.php
@@ -30,5 +30,16 @@ class Helper
}
return 'http://' . $url;
}
+
+ public static function sanitizeTitle($str)
+ {
+ return trim(
+ str_replace(
+ array("\r", "\n", ' ', ' '),
+ array('', ' ', ' ', ' '),
+ $str
+ )
+ );
+ }
}
?>
diff --git a/src/phinde/LinkExtractor/Html.php b/src/phinde/LinkExtractor/Html.php
index 4acd19f..a6fa8ef 100644
--- a/src/phinde/LinkExtractor/Html.php
+++ b/src/phinde/LinkExtractor/Html.php
@@ -2,6 +2,7 @@
namespace phinde\LinkExtractor;
use phinde\LinkInfo;
+use phinde\Helper;
class Html
{
@@ -36,10 +37,10 @@ class Html
$links = $dx->evaluate('//a');
//FIXME: link rel, img, video
- $alreadySeen = array();
+ $alreadySeen = array($url => true);
foreach ($links as $link) {
- $linkTitle = $link->textContent;
+ $linkTitle = Helper::sanitizeTitle($link->textContent);
$href = '';
foreach ($link->attributes as $attribute) {
if ($attribute->name == 'href') {