diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2014-05-02 18:39:17 +0200 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2014-05-02 18:39:17 +0200 |
| commit | c5fb118236c472237ac1fc43b0eb9a98d56b98f1 (patch) | |
| tree | fafbfca02891455c14f182138586ae433056d2af /src/phorkie/HtmlParser.php | |
| parent | d06f9e8ea6cefcfae1ad28bb203a7e4e562820a8 (diff) | |
| download | phorkie-c5fb118236c472237ac1fc43b0eb9a98d56b98f1.tar.gz phorkie-c5fb118236c472237ac1fc43b0eb9a98d56b98f1.zip | |
extract gist titles
Diffstat (limited to 'src/phorkie/HtmlParser.php')
| -rw-r--r-- | src/phorkie/HtmlParser.php | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/src/phorkie/HtmlParser.php b/src/phorkie/HtmlParser.php index 6b5d26a..d613452 100644 --- a/src/phorkie/HtmlParser.php +++ b/src/phorkie/HtmlParser.php @@ -52,9 +52,14 @@ class HtmlParser } else { $path = ltrim($arUrl['path'], '/'); } - //FIXME: title - $this->arGitUrls[][] = 'git://gist.github.com/' - . $path . '.git'; + $title = $this->getHtmlTitle($url); + if ($title === null) { + $this->arGitUrls[][] = 'git://gist.github.com/' + . $path . '.git'; + } else { + $this->arGitUrls[$title][] = 'git://gist.github.com/' + . $path . '.git'; + } return true; } @@ -157,5 +162,26 @@ class HtmlParser || $scheme == 'http' || $scheme == 'https'; } + /** + * Extract the title from a HTML URL + * + * @param string $url URL to a HTML page + * + * @return string|null NULL on error, title otherwise + */ + public function getHtmlTitle($url) + { + libxml_use_internal_errors(true); + $doc = \DOMDocument::loadHTMLFile($url); + if ($doc === false) { + return null; + } + $sx = simplexml_import_dom($doc); + $title = (string) $sx->head->title; + if ($title == '') { + return null; + } + return $title; + } } ?> |
