X-Git-Url: https://git.cweiske.de/phorkie.git/blobdiff_plain/c3809e6b58401ccbdcae2fb6ab67010d4e6cfb90..f92fbaf636d620a0092fff8b715be9a493547b4f:/src/phorkie/HtmlParser.php diff --git a/src/phorkie/HtmlParser.php b/src/phorkie/HtmlParser.php index 6b5d26a..b8cd1af 100644 --- a/src/phorkie/HtmlParser.php +++ b/src/phorkie/HtmlParser.php @@ -44,7 +44,8 @@ class HtmlParser if ($scheme == 'https' && isset($arUrl['host']) && $arUrl['host'] == 'gist.github.com' ) { - //https://gist.github.com/maddy2101/5764473 + //https://gist.github.com/cweiske/2400389 + // clone URL: https://gist.github.com/2400389.git $parts = explode('/', ltrim($arUrl['path'], '/')); if (count($parts == 2)) { //we only want the number, not the user name @@ -52,9 +53,14 @@ class HtmlParser } else { $path = ltrim($arUrl['path'], '/'); } - //FIXME: title - $this->arGitUrls[][] = 'git://gist.github.com/' - . $path . '.git'; + $title = $this->getHtmlTitle($url); + if ($title === null) { + $this->arGitUrls[][] = 'https://gist.github.com/' + . $path . '.git'; + } else { + $this->arGitUrls[$title][] = 'https://gist.github.com/' + . $path . '.git'; + } return true; } @@ -157,5 +163,26 @@ class HtmlParser || $scheme == 'http' || $scheme == 'https'; } + /** + * Extract the title from a HTML URL + * + * @param string $url URL to a HTML page + * + * @return string|null NULL on error, title otherwise + */ + public function getHtmlTitle($url) + { + libxml_use_internal_errors(true); + $doc = \DOMDocument::loadHTMLFile($url); + if ($doc === false) { + return null; + } + $sx = simplexml_import_dom($doc); + $title = (string) $sx->head->title; + if ($title == '') { + return null; + } + return $title; + } } ?>