<?php
namespace phinde;
+/**
+ * Perform WebSub discovery for "hub" and "self" URLs
+ *
+ * @link https://www.w3.org/TR/websub/#discovery
+ */
class HubUrlExtractor
{
/**
*
* @param string $url Topic URL
*
- * @return array Array of URLs with keys: hub, self
+ * @return array Array of URLs with keys: hub, self.
+ * - "self" value is the URL
+ * - "hub" value is an array of URLs
+ * Keys may be there but most not if the URL
+ * does not advertise them.
*/
public function getUrls($url)
{
if (intval($res->getStatus() / 100) >= 4
&& $res->getStatus() != 405 //method not supported/allowed
) {
- return null;
+ return [];
}
$url = $res->getEffectiveUrl();
return $this->absolutifyUrls($urls, $base);
}
+ $urls = [];//do not mix header and content links
+
$body = $res->getBody();
$doc = $this->loadHtml($body, $res);
if ($type == 'canonical') {
$type = 'self';
}
- if ($type == 'hub' || $type == 'self'
- && !isset($urls[$type])
- ) {
- $urls[$type] = $uri;
+ if ($type == 'self' && !isset($urls['self'])) {
+ $urls['self'] = $uri;
+ } else if ($type == 'hub') {
+ $urls['hub'][] = $uri;
}
}
}
- //FIXME: base href
+ //<base href=".."> extraction is not necessary; RFC 5988 says:
+ // Note that any base IRI from the message's content is not applied.
return $this->absolutifyUrls($urls, $base);
}
$links = $http->parseLinks($res->getHeader('Link'));
foreach ($links as $link) {
if (isset($link['_uri']) && isset($link['rel'])) {
- if (!isset($urls['hub'])
- && array_search('hub', $link['rel']) !== false
- ) {
- $urls['hub'] = $link['_uri'];
+ if (array_search('hub', $link['rel']) !== false) {
+ $urls['hub'][] = $link['_uri'];
}
if (!isset($urls['self'])
&& array_search('self', $link['rel']) !== false
/**
* Make the list of urls absolute
*
- * @param array $urls Array of maybe relative URLs
+ * @param array $urls Array of maybe relative URLs, or array of URLs
* @param object $base Base URL to resolve the relatives against
*
* @return array List of absolute URLs
protected function absolutifyUrls($urls, \Net_URL2 $base)
{
foreach ($urls as $key => $url) {
- $urls[$key] = (string) $base->resolve($url);
+ if (is_array($url)) {
+ foreach ($url as $singleKey => $singleUrl) {
+ $urls[$key][$singleKey] = (string) $base->resolve($singleUrl);
+ }
+ } else {
+ $urls[$key] = (string) $base->resolve($url);
+ }
}
return $urls;
}
$this->assertEquals(
[
- 'hub' => 'https://hub.example.com/',
+ 'hub' => ['https://hub.example.com/'],
+ 'self' => 'http://example.com/feed',
+ ],
+ $extractor->getUrls('http://example.org/')
+ );
+ }
+
+ public function testGetUrlsMultipleHubsHEAD()
+ {
+ $mock = new HTTP_Request2_Adapter_Mock();
+ $this->addResponse(
+ $mock,
+ "HTTP/1.0 200 OK\r\n"
+ . "Content-type: text/html\r\n"
+ . "Link: <https://hub.example.com/>; rel=\"hub\"\r\n"
+ . "Link: <https://hub2.example.com/>; rel=\"hub\"\r\n"
+ . "Link: <http://example.com/feed>; rel=\"self\"\r\n"
+ . "Link: <https://hub3.example.com/>; rel=\"hub\"\r\n"
+ . "\r\n",
+ 'http://example.org/'
+ );
+
+ $extractor = new phinde\HubUrlExtractor();
+ $extractor->setRequestTemplate(
+ new HTTP_Request2(null, null, ['adapter' => $mock])
+ );
+
+ $this->assertEquals(
+ [
+ 'hub' => [
+ 'https://hub.example.com/',
+ 'https://hub2.example.com/',
+ 'https://hub3.example.com/',
+ ],
'self' => 'http://example.com/feed',
],
$extractor->getUrls('http://example.org/')
$this->assertEquals(
[
- 'hub' => 'https://hub.example.com/',
+ 'hub' => ['https://hub.example.com/'],
+ 'self' => 'http://example.com/feed',
+ ],
+ $extractor->getUrls('http://example.org/')
+ );
+ }
+
+ public function testGetUrlsHtmlMultipleHubs()
+ {
+ $mock = new HTTP_Request2_Adapter_Mock();
+ //HEAD
+ $this->addResponse(
+ $mock,
+ "HTTP/1.0 200 OK\r\n"
+ . "Content-type: text/html\r\n"
+ . "\r\n",
+ 'http://example.org/'
+ );
+ //HEAD
+ $this->addResponse(
+ $mock,
+ "HTTP/1.0 200 OK\r\n"
+ . "Content-type: text/html\r\n"
+ . "\r\n"
+ . <<<HTM
+<html>
+ <head>
+ <link rel='hub' href='https://hub.example.com/'/>
+ <link rel='hub' href='https://hub2.example.com/'/>
+ <link rel='self' href='http://example.com/feed'/>
+ </head>
+</html>
+HTM,
+ 'http://example.org/'
+ );
+
+ $extractor = new phinde\HubUrlExtractor();
+ $extractor->setRequestTemplate(
+ new HTTP_Request2(null, null, ['adapter' => $mock])
+ );
+
+ $this->assertEquals(
+ [
+ 'hub' => [
+ 'https://hub.example.com/',
+ 'https://hub2.example.com/',
+ ],
'self' => 'http://example.com/feed',
],
$extractor->getUrls('http://example.org/')
$this->assertEquals(
[
- 'hub' => 'https://hub.example.com/',
+ 'hub' => ['https://hub.example.com/'],
'self' => 'http://example.com/feed',
],
$extractor->getUrls('http://example.org/')
$this->assertEquals(
[
- 'hub' => 'https://hub.example.com/',
+ 'hub' => ['https://hub.example.com/'],
'self' => 'http://example.com/feed',
],
$extractor->getUrls('http://example.org/')
$this->assertEquals(
[
- 'hub' => 'https://hub.example.com/',
+ 'hub' => ['https://hub.example.com/'],
'self' => 'http://example.com/feed',
],
$extractor->getUrls('http://example.org/')
$this->assertEquals(
[
- 'hub' => 'https://hub.example.com/',
+ 'hub' => ['https://hub.example.com/'],
'self' => 'http://example.com/feed',
],
$extractor->getUrls('http://example.org/')