aboutsummaryrefslogtreecommitdiff
path: root/src/phinde/HubUrlExtractor.php
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2020-03-05 21:26:57 +0100
committerChristian Weiske <cweiske@cweiske.de>2020-03-05 21:26:57 +0100
commitab2ebeda104555928ef044c662b1e672c067e218 (patch)
tree53de0965644ea38f80fb82862a051756ebb6a2f8 /src/phinde/HubUrlExtractor.php
parentb5a753ded3d10f731f8aef95281a992e723547b4 (diff)
downloadphinde-ab2ebeda104555928ef044c662b1e672c067e218.tar.gz
phinde-ab2ebeda104555928ef044c662b1e672c067e218.zip
Add atom and rss feed link url extraction
Diffstat (limited to 'src/phinde/HubUrlExtractor.php')
-rw-r--r--src/phinde/HubUrlExtractor.php24
1 files changed, 18 insertions, 6 deletions
diff --git a/src/phinde/HubUrlExtractor.php b/src/phinde/HubUrlExtractor.php
index e2d328a..b33abfe 100644
--- a/src/phinde/HubUrlExtractor.php
+++ b/src/phinde/HubUrlExtractor.php
@@ -43,7 +43,8 @@ class HubUrlExtractor
list($type) = explode(';', $res->getHeader('Content-type'));
if ($type != 'text/html' && $type != 'text/xml'
&& $type != 'application/xhtml+xml'
- //FIXME: atom, rss
+ && $type != 'application/atom+xml'
+ && $type != 'application/rss+xml'
&& $res->getStatus() != 405//HEAD method not allowed
) {
//we will not be able to extract links from the content
@@ -65,18 +66,27 @@ class HubUrlExtractor
return $this->absolutifyUrls($urls, $base);
}
- //FIXME: atom/rss
$body = $res->getBody();
$doc = $this->loadHtml($body, $res);
$xpath = new \DOMXPath($doc);
$xpath->registerNamespace('h', 'http://www.w3.org/1999/xhtml');
+ $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom');
+ if ($type === 'application/atom+xml') {
+ $tagQuery = '/atom:feed/atom:link[';
+
+ } else if ($type === 'application/rss+xml') {
+ $tagQuery = '/rss/channel/link[';
+
+ } else {
+ $tagQuery = '/*[self::html or self::h:html]'
+ . '/*[self::head or self::h:head]'
+ . '/*[(self::link or self::h:link)'
+ . ' and';
+ }
$nodeList = $xpath->query(
- '/*[self::html or self::h:html]'
- . '/*[self::head or self::h:head]'
- . '/*[(self::link or self::h:link)'
- . ' and'
+ $tagQuery
. ' ('
. ' contains(concat(" ", normalize-space(@rel), " "), " hub ")'
. ' or'
@@ -163,6 +173,8 @@ class HubUrlExtractor
if ($type == 'application/xhtml+xml'
|| $type == 'application/xml'
|| $type == 'text/xml'
+ || $type == 'application/atom+xml'
+ || $type == 'application/rss+xml'
) {
$doc->loadXML($sourceBody);
} else {