aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-02-11 17:37:12 +0100
committerChristian Weiske <cweiske@cweiske.de>2016-02-11 17:37:12 +0100
commitd8c39f2b0571b9734259b2f9dc218eed24412332 (patch)
tree8ef5d50aa86fe821147b578371a5c4038f1f6aaf /src
parentfd98bb30be8970309c52d3fc3a1585d7454b370a (diff)
downloadphinde-d8c39f2b0571b9734259b2f9dc218eed24412332.tar.gz
phinde-d8c39f2b0571b9734259b2f9dc218eed24412332.zip
sanitize title better
Diffstat (limited to 'src')
-rw-r--r--src/phinde/Helper.php11
-rw-r--r--src/phinde/LinkExtractor/Html.php5
2 files changed, 14 insertions, 2 deletions
diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php
index 40ea751..312c5e5 100644
--- a/src/phinde/Helper.php
+++ b/src/phinde/Helper.php
@@ -30,5 +30,16 @@ class Helper
}
return 'http://' . $url;
}
+
+ public static function sanitizeTitle($str)
+ {
+ return trim(
+ str_replace(
+ array("\r", "\n", ' ', ' '),
+ array('', ' ', ' ', ' '),
+ $str
+ )
+ );
+ }
}
?>
diff --git a/src/phinde/LinkExtractor/Html.php b/src/phinde/LinkExtractor/Html.php
index 4acd19f..a6fa8ef 100644
--- a/src/phinde/LinkExtractor/Html.php
+++ b/src/phinde/LinkExtractor/Html.php
@@ -2,6 +2,7 @@
namespace phinde\LinkExtractor;
use phinde\LinkInfo;
+use phinde\Helper;
class Html
{
@@ -36,10 +37,10 @@ class Html
$links = $dx->evaluate('//a');
//FIXME: link rel, img, video
- $alreadySeen = array();
+ $alreadySeen = array($url => true);
foreach ($links as $link) {
- $linkTitle = $link->textContent;
+ $linkTitle = Helper::sanitizeTitle($link->textContent);
$href = '';
foreach ($link->attributes as $attribute) {
if ($attribute->name == 'href') {