diff options
| -rwxr-xr-x | bin/index.php | 8 | ||||
| -rw-r--r-- | src/phinde/LinkExtractor/Html.php | 7 |
2 files changed, 15 insertions, 0 deletions
diff --git a/bin/index.php b/bin/index.php index 7550ad3..d110423 100755 --- a/bin/index.php +++ b/bin/index.php @@ -72,6 +72,14 @@ $doc = new \DOMDocument(); @$doc->loadHTML($res->getBody()); $dx = new \DOMXPath($doc); +$xbase = $dx->evaluate('/html/head/base[@href]')->item(0); +if ($xbase) { + $base = $base->resolve( + $xbase->attributes->getNamedItem('href')->textContent + ); +} + + //remove script tags removeTags($doc, 'script'); removeTags($doc, 'style'); diff --git a/src/phinde/LinkExtractor/Html.php b/src/phinde/LinkExtractor/Html.php index a6fa8ef..7b987e3 100644 --- a/src/phinde/LinkExtractor/Html.php +++ b/src/phinde/LinkExtractor/Html.php @@ -22,6 +22,13 @@ class Html $dx = new \DOMXPath($doc); + $xbase = $dx->evaluate('/html/head/base[@href]')->item(0); + if ($xbase) { + $base = $base->resolve( + $xbase->attributes->getNamedItem('href')->textContent + ); + } + $meta = $dx->evaluate('/html/head/meta[@name="robots" and @content]') ->item(0); if ($meta) { |
