From: Christian Weiske Date: Fri, 2 May 2014 16:39:17 +0000 (+0200) Subject: extract gist titles X-Git-Tag: v0.4.0~51 X-Git-Url: https://git.cweiske.de/phorkie.git/commitdiff_plain/c5fb118236c472237ac1fc43b0eb9a98d56b98f1 extract gist titles --- diff --git a/src/phorkie/HtmlParser.php b/src/phorkie/HtmlParser.php index 6b5d26a..d613452 100644 --- a/src/phorkie/HtmlParser.php +++ b/src/phorkie/HtmlParser.php @@ -52,9 +52,14 @@ class HtmlParser } else { $path = ltrim($arUrl['path'], '/'); } - //FIXME: title - $this->arGitUrls[][] = 'git://gist.github.com/' - . $path . '.git'; + $title = $this->getHtmlTitle($url); + if ($title === null) { + $this->arGitUrls[][] = 'git://gist.github.com/' + . $path . '.git'; + } else { + $this->arGitUrls[$title][] = 'git://gist.github.com/' + . $path . '.git'; + } return true; } @@ -157,5 +162,26 @@ class HtmlParser || $scheme == 'http' || $scheme == 'https'; } + /** + * Extract the title from a HTML URL + * + * @param string $url URL to a HTML page + * + * @return string|null NULL on error, title otherwise + */ + public function getHtmlTitle($url) + { + libxml_use_internal_errors(true); + $doc = \DOMDocument::loadHTMLFile($url); + if ($doc === false) { + return null; + } + $sx = simplexml_import_dom($doc); + $title = (string) $sx->head->title; + if ($title == '') { + return null; + } + return $title; + } } ?>