diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-02-11 17:37:12 +0100 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-02-11 17:37:12 +0100 |
| commit | d8c39f2b0571b9734259b2f9dc218eed24412332 (patch) | |
| tree | 8ef5d50aa86fe821147b578371a5c4038f1f6aaf /src | |
| parent | fd98bb30be8970309c52d3fc3a1585d7454b370a (diff) | |
| download | phinde-d8c39f2b0571b9734259b2f9dc218eed24412332.tar.gz phinde-d8c39f2b0571b9734259b2f9dc218eed24412332.zip | |
sanitize title better
Diffstat (limited to 'src')
| -rw-r--r-- | src/phinde/Helper.php | 11 | ||||
| -rw-r--r-- | src/phinde/LinkExtractor/Html.php | 5 |
2 files changed, 14 insertions, 2 deletions
diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php index 40ea751..312c5e5 100644 --- a/src/phinde/Helper.php +++ b/src/phinde/Helper.php @@ -30,5 +30,16 @@ class Helper } return 'http://' . $url; } + + public static function sanitizeTitle($str) + { + return trim( + str_replace( + array("\r", "\n", ' ', ' '), + array('', ' ', ' ', ' '), + $str + ) + ); + } } ?> diff --git a/src/phinde/LinkExtractor/Html.php b/src/phinde/LinkExtractor/Html.php index 4acd19f..a6fa8ef 100644 --- a/src/phinde/LinkExtractor/Html.php +++ b/src/phinde/LinkExtractor/Html.php @@ -2,6 +2,7 @@ namespace phinde\LinkExtractor; use phinde\LinkInfo; +use phinde\Helper; class Html { @@ -36,10 +37,10 @@ class Html $links = $dx->evaluate('//a'); //FIXME: link rel, img, video - $alreadySeen = array(); + $alreadySeen = array($url => true); foreach ($links as $link) { - $linkTitle = $link->textContent; + $linkTitle = Helper::sanitizeTitle($link->textContent); $href = ''; foreach ($link->attributes as $attribute) { if ($attribute->name == 'href') { |
