git.cweiske.de
/
phorkie.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
d06f9e8
)
extract gist titles
author
Christian Weiske
<cweiske@cweiske.de>
Fri, 2 May 2014 16:39:17 +0000
(18:39 +0200)
committer
Christian Weiske
<cweiske@cweiske.de>
Fri, 2 May 2014 16:39:17 +0000
(18:39 +0200)
src/phorkie/HtmlParser.php
patch
|
blob
|
history
diff --git
a/src/phorkie/HtmlParser.php
b/src/phorkie/HtmlParser.php
index 6b5d26ad60d869874b976a5fa14d2afa7d49e16b..d613452c3050231d310d2b6360d79c26ecb34175 100644
(file)
--- a/
src/phorkie/HtmlParser.php
+++ b/
src/phorkie/HtmlParser.php
@@
-52,9
+52,14
@@
class HtmlParser
} else {
$path = ltrim($arUrl['path'], '/');
}
} else {
$path = ltrim($arUrl['path'], '/');
}
- //FIXME: title
- $this->arGitUrls[][] = 'git://gist.github.com/'
- . $path . '.git';
+ $title = $this->getHtmlTitle($url);
+ if ($title === null) {
+ $this->arGitUrls[][] = 'git://gist.github.com/'
+ . $path . '.git';
+ } else {
+ $this->arGitUrls[$title][] = 'git://gist.github.com/'
+ . $path . '.git';
+ }
return true;
}
return true;
}
@@
-157,5
+162,26
@@
class HtmlParser
|| $scheme == 'http' || $scheme == 'https';
}
|| $scheme == 'http' || $scheme == 'https';
}
+ /**
+ * Extract the title from a HTML URL
+ *
+ * @param string $url URL to a HTML page
+ *
+ * @return string|null NULL on error, title otherwise
+ */
+ public function getHtmlTitle($url)
+ {
+ libxml_use_internal_errors(true);
+ $doc = \DOMDocument::loadHTMLFile($url);
+ if ($doc === false) {
+ return null;
+ }
+ $sx = simplexml_import_dom($doc);
+ $title = (string) $sx->head->title;
+ if ($title == '') {
+ return null;
+ }
+ return $title;
+ }
}
?>
}
?>