support base href
authorChristian Weiske <cweiske@cweiske.de>
Thu, 11 Feb 2016 19:02:30 +0000 (20:02 +0100)
committerChristian Weiske <cweiske@cweiske.de>
Thu, 11 Feb 2016 19:02:30 +0000 (20:02 +0100)
bin/index.php
src/phinde/LinkExtractor/Html.php

index 7550ad3c0667778e15852715c828b7cf5b162939..d110423b4c6b81a5065afec5e35d90ba0c1d6b70 100755 (executable)
@@ -72,6 +72,14 @@ $doc = new \DOMDocument();
 @$doc->loadHTML($res->getBody());
 $dx = new \DOMXPath($doc);
 
+$xbase = $dx->evaluate('/html/head/base[@href]')->item(0);
+if ($xbase) {
+    $base = $base->resolve(
+        $xbase->attributes->getNamedItem('href')->textContent
+    );
+}
+
+
 //remove script tags
 removeTags($doc, 'script');
 removeTags($doc, 'style');
index a6fa8efef8fe9725722bf94bbd137671b73e3afe..7b987e3280fa0b298c7c0f9b2ef774eb15cc5a87 100644 (file)
@@ -22,6 +22,13 @@ class Html
 
         $dx = new \DOMXPath($doc);
 
+        $xbase = $dx->evaluate('/html/head/base[@href]')->item(0);
+        if ($xbase) {
+            $base = $base->resolve(
+                $xbase->attributes->getNamedItem('href')->textContent
+            );
+        }
+
         $meta = $dx->evaluate('/html/head/meta[@name="robots" and @content]')
             ->item(0);
         if ($meta) {