aboutsummaryrefslogtreecommitdiff
path: root/src/phorkie/HtmlParser.php
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2014-05-02 18:39:17 +0200
committerChristian Weiske <cweiske@cweiske.de>2014-05-02 18:39:17 +0200
commitc5fb118236c472237ac1fc43b0eb9a98d56b98f1 (patch)
treefafbfca02891455c14f182138586ae433056d2af /src/phorkie/HtmlParser.php
parentd06f9e8ea6cefcfae1ad28bb203a7e4e562820a8 (diff)
downloadphorkie-c5fb118236c472237ac1fc43b0eb9a98d56b98f1.tar.gz
phorkie-c5fb118236c472237ac1fc43b0eb9a98d56b98f1.zip
extract gist titles
Diffstat (limited to 'src/phorkie/HtmlParser.php')
-rw-r--r--src/phorkie/HtmlParser.php32
1 files changed, 29 insertions, 3 deletions
diff --git a/src/phorkie/HtmlParser.php b/src/phorkie/HtmlParser.php
index 6b5d26a..d613452 100644
--- a/src/phorkie/HtmlParser.php
+++ b/src/phorkie/HtmlParser.php
@@ -52,9 +52,14 @@ class HtmlParser
} else {
$path = ltrim($arUrl['path'], '/');
}
- //FIXME: title
- $this->arGitUrls[][] = 'git://gist.github.com/'
- . $path . '.git';
+ $title = $this->getHtmlTitle($url);
+ if ($title === null) {
+ $this->arGitUrls[][] = 'git://gist.github.com/'
+ . $path . '.git';
+ } else {
+ $this->arGitUrls[$title][] = 'git://gist.github.com/'
+ . $path . '.git';
+ }
return true;
}
@@ -157,5 +162,26 @@ class HtmlParser
|| $scheme == 'http' || $scheme == 'https';
}
+ /**
+ * Extract the title from a HTML URL
+ *
+ * @param string $url URL to a HTML page
+ *
+ * @return string|null NULL on error, title otherwise
+ */
+ public function getHtmlTitle($url)
+ {
+ libxml_use_internal_errors(true);
+ $doc = \DOMDocument::loadHTMLFile($url);
+ if ($doc === false) {
+ return null;
+ }
+ $sx = simplexml_import_dom($doc);
+ $title = (string) $sx->head->title;
+ if ($title == '') {
+ return null;
+ }
+ return $title;
+ }
}
?>