Manually activate libxml external entity resolver
[phorkie.git] / src / phorkie / HtmlParser.php
index d613452c3050231d310d2b6360d79c26ecb34175..257dfe14e6f8a7bda71a5b2d3e384c4a515b8d4d 100644 (file)
@@ -44,7 +44,8 @@ class HtmlParser
         if ($scheme == 'https' && isset($arUrl['host'])
             && $arUrl['host'] == 'gist.github.com'
         ) {
-            //https://gist.github.com/maddy2101/5764473
+            //https://gist.github.com/cweiske/2400389
+            // clone URL: https://gist.github.com/2400389.git
             $parts = explode('/', ltrim($arUrl['path'], '/'));
             if (count($parts == 2)) {
                 //we only want the number, not the user name
@@ -54,10 +55,10 @@ class HtmlParser
             }
             $title = $this->getHtmlTitle($url);
             if ($title === null) {
-                $this->arGitUrls[][] = 'git://gist.github.com/'
+                $this->arGitUrls[][] = 'https://gist.github.com/'
                     . $path . '.git';
             } else {
-                $this->arGitUrls[$title][] = 'git://gist.github.com/'
+                $this->arGitUrls[$title][] = 'https://gist.github.com/'
                     . $path . '.git';
             }
             return true;
@@ -96,6 +97,7 @@ class HtmlParser
         } else {
             $sx = simplexml_import_dom(\DOMDocument::loadHTML($html));
         }
+        //FIXME: handle network error
 
         $elems = $sx->xpath('//*[@rel="vcs-git"]');
         $titles = $sx->xpath('/html/head/title');
@@ -172,6 +174,8 @@ class HtmlParser
     public function getHtmlTitle($url)
     {
         libxml_use_internal_errors(true);
+        //allow loading URLs in DOMDocument
+        libxml_disable_entity_loader(false);
         $doc = \DOMDocument::loadHTMLFile($url);
         if ($doc === false) {
             return null;