automatically configure git paths (dir + public clone url)
[phorkie.git] / src / phorkie / HtmlParser.php
index 6b5d26ad60d869874b976a5fa14d2afa7d49e16b..b8cd1aff3dbece084445bde9617321aba0ae64bf 100644 (file)
@@ -44,7 +44,8 @@ class HtmlParser
         if ($scheme == 'https' && isset($arUrl['host'])
             && $arUrl['host'] == 'gist.github.com'
         ) {
-            //https://gist.github.com/maddy2101/5764473
+            //https://gist.github.com/cweiske/2400389
+            // clone URL: https://gist.github.com/2400389.git
             $parts = explode('/', ltrim($arUrl['path'], '/'));
             if (count($parts == 2)) {
                 //we only want the number, not the user name
@@ -52,9 +53,14 @@ class HtmlParser
             } else {
                 $path = ltrim($arUrl['path'], '/');
             }
-            //FIXME: title
-            $this->arGitUrls[][] = 'git://gist.github.com/'
-                . $path . '.git';
+            $title = $this->getHtmlTitle($url);
+            if ($title === null) {
+                $this->arGitUrls[][] = 'https://gist.github.com/'
+                    . $path . '.git';
+            } else {
+                $this->arGitUrls[$title][] = 'https://gist.github.com/'
+                    . $path . '.git';
+            }
             return true;
         }
 
@@ -157,5 +163,26 @@ class HtmlParser
             || $scheme == 'http' || $scheme == 'https';
     }
 
+    /**
+     * Extract the title from a HTML URL
+     *
+     * @param string $url URL to a HTML page
+     *
+     * @return string|null NULL on error, title otherwise
+     */
+    public function getHtmlTitle($url)
+    {
+        libxml_use_internal_errors(true);
+        $doc = \DOMDocument::loadHTMLFile($url);
+        if ($doc === false) {
+            return null;
+        }
+        $sx = simplexml_import_dom($doc);
+        $title = (string) $sx->head->title;
+        if ($title == '') {
+            return null;
+        }
+        return $title;
+    }
 }
 ?>