remove anchor from source URLs
authorChristian Weiske <cweiske@cweiske.de>
Thu, 1 Sep 2016 05:38:08 +0000 (07:38 +0200)
committerChristian Weiske <cweiske@cweiske.de>
Thu, 1 Sep 2016 05:38:08 +0000 (07:38 +0200)
src/phinde/Helper.php
src/phinde/LinkExtractor/Html.php

index 43345ba..8e30a19 100644 (file)
@@ -31,6 +31,12 @@ class Helper
         return 'http://' . $url;
     }
 
+    public static function removeAnchor($url)
+    {
+        $parts = explode('#', $url, 2);
+        return $parts[0];
+    }
+
     public static function sanitizeTitle($str)
     {
         return trim(
index 7b987e3..b3a9ea6 100644 (file)
@@ -8,7 +8,7 @@ class Html
 {
     public function extract(\HTTP_Request2_Response $res)
     {
-        $url = $res->getEffectiveUrl();
+        $url = Helper::removeAnchor($res->getEffectiveUrl());
 
         $linkInfos = array();