X-Git-Url: https://git.cweiske.de/phorkie.git/blobdiff_plain/f47ddf0758f120dfb26f03fb36be5cd897a10f23..f92fbaf636d620a0092fff8b715be9a493547b4f:/src/phorkie/HtmlParser.php diff --git a/src/phorkie/HtmlParser.php b/src/phorkie/HtmlParser.php index f751074..b8cd1af 100644 --- a/src/phorkie/HtmlParser.php +++ b/src/phorkie/HtmlParser.php @@ -44,9 +44,23 @@ class HtmlParser if ($scheme == 'https' && isset($arUrl['host']) && $arUrl['host'] == 'gist.github.com' ) { - //FIXME: title - $this->arGitUrls[][] = 'git://gist.github.com/' - . ltrim($arUrl['path'], '/') . '.git'; + //https://gist.github.com/cweiske/2400389 + // clone URL: https://gist.github.com/2400389.git + $parts = explode('/', ltrim($arUrl['path'], '/')); + if (count($parts == 2)) { + //we only want the number, not the user name + $path = $parts[1]; + } else { + $path = ltrim($arUrl['path'], '/'); + } + $title = $this->getHtmlTitle($url); + if ($title === null) { + $this->arGitUrls[][] = 'https://gist.github.com/' + . $path . '.git'; + } else { + $this->arGitUrls[$title][] = 'https://gist.github.com/' + . $path . '.git'; + } return true; } @@ -149,5 +163,26 @@ class HtmlParser || $scheme == 'http' || $scheme == 'https'; } + /** + * Extract the title from a HTML URL + * + * @param string $url URL to a HTML page + * + * @return string|null NULL on error, title otherwise + */ + public function getHtmlTitle($url) + { + libxml_use_internal_errors(true); + $doc = \DOMDocument::loadHTMLFile($url); + if ($doc === false) { + return null; + } + $sx = simplexml_import_dom($doc); + $title = (string) $sx->head->title; + if ($title == '') { + return null; + } + return $title; + } } ?>