aboutsummaryrefslogtreecommitdiff
path: root/src/phinde/LinkExtractor/Html.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/phinde/LinkExtractor/Html.php')
-rw-r--r--src/phinde/LinkExtractor/Html.php5
1 files changed, 3 insertions, 2 deletions
diff --git a/src/phinde/LinkExtractor/Html.php b/src/phinde/LinkExtractor/Html.php
index 4acd19f..a6fa8ef 100644
--- a/src/phinde/LinkExtractor/Html.php
+++ b/src/phinde/LinkExtractor/Html.php
@@ -2,6 +2,7 @@
namespace phinde\LinkExtractor;
use phinde\LinkInfo;
+use phinde\Helper;
class Html
{
@@ -36,10 +37,10 @@ class Html
$links = $dx->evaluate('//a');
//FIXME: link rel, img, video
- $alreadySeen = array();
+ $alreadySeen = array($url => true);
foreach ($links as $link) {
- $linkTitle = $link->textContent;
+ $linkTitle = Helper::sanitizeTitle($link->textContent);
$href = '';
foreach ($link->attributes as $attribute) {
if ($attribute->name == 'href') {