Support multiple hub links for WebSub
authorChristian Weiske <cweiske@cweiske.de>
Sat, 7 Mar 2020 20:42:45 +0000 (21:42 +0100)
committerChristian Weiske <cweiske@cweiske.de>
Sat, 7 Mar 2020 20:42:45 +0000 (21:42 +0100)
src/phinde/HubUrlExtractor.php
src/phinde/Subscriptions.php
tests/HubUrlExtractorTest.php

index 81a612c..4f1baa3 100644 (file)
@@ -1,6 +1,11 @@
 <?php
 namespace phinde;
 
+/**
+ * Perform WebSub discovery for "hub" and "self" URLs
+ *
+ * @link https://www.w3.org/TR/websub/#discovery
+ */
 class HubUrlExtractor
 {
     /**
@@ -16,7 +21,11 @@ class HubUrlExtractor
      *
      * @param string $url Topic URL
      *
-     * @return array Array of URLs with keys: hub, self
+     * @return array Array of URLs with keys: hub, self.
+     *               - "self" value is the URL
+     *               - "hub"  value is an array of URLs
+     *               Keys may be there but most not if the URL
+     *               does not advertise them.
      */
     public function getUrls($url)
     {
@@ -29,7 +38,7 @@ class HubUrlExtractor
         if (intval($res->getStatus() / 100) >= 4
             && $res->getStatus() != 405 //method not supported/allowed
         ) {
-            return null;
+            return [];
         }
 
         $url  = $res->getEffectiveUrl();
@@ -66,6 +75,8 @@ class HubUrlExtractor
             return $this->absolutifyUrls($urls, $base);
         }
 
+        $urls = [];//do not mix header and content links
+
         $body = $res->getBody();
         $doc = $this->loadHtml($body, $res);
 
@@ -111,15 +122,16 @@ class HubUrlExtractor
                 if ($type == 'canonical') {
                     $type = 'self';
                 }
-                if ($type == 'hub' || $type == 'self'
-                    && !isset($urls[$type])
-                ) {
-                    $urls[$type] = $uri;
+                if ($type == 'self' && !isset($urls['self'])) {
+                    $urls['self'] = $uri;
+                } else if ($type == 'hub') {
+                    $urls['hub'][] = $uri;
                 }
             }
         }
 
-        //FIXME: base href
+        //<base href=".."> extraction is not necessary; RFC 5988 says:
+        // Note that any base IRI from the message's content is not applied.
         return $this->absolutifyUrls($urls, $base);
     }
 
@@ -138,10 +150,8 @@ class HubUrlExtractor
         $links = $http->parseLinks($res->getHeader('Link'));
         foreach ($links as $link) {
             if (isset($link['_uri']) && isset($link['rel'])) {
-                if (!isset($urls['hub'])
-                    && array_search('hub', $link['rel']) !== false
-                ) {
-                    $urls['hub'] = $link['_uri'];
+                if (array_search('hub', $link['rel']) !== false) {
+                    $urls['hub'][] = $link['_uri'];
                 }
                 if (!isset($urls['self'])
                     && array_search('self', $link['rel']) !== false
@@ -221,7 +231,7 @@ class HubUrlExtractor
     /**
      * Make the list of urls absolute
      *
-     * @param array  $urls Array of maybe relative URLs
+     * @param array  $urls Array of maybe relative URLs, or array of URLs
      * @param object $base Base URL to resolve the relatives against
      *
      * @return array List of absolute URLs
@@ -229,7 +239,13 @@ class HubUrlExtractor
     protected function absolutifyUrls($urls, \Net_URL2 $base)
     {
         foreach ($urls as $key => $url) {
-            $urls[$key] = (string) $base->resolve($url);
+            if (is_array($url)) {
+                foreach ($url as $singleKey => $singleUrl) {
+                    $urls[$key][$singleKey] = (string) $base->resolve($singleUrl);
+                }
+            } else {
+                $urls[$key] = (string) $base->resolve($url);
+            }
         }
         return $urls;
     }
index 403f5d4..4650a0c 100644 (file)
@@ -262,7 +262,7 @@ class Subscriptions
         $urls = $hue->getUrls($url);
         //we violate the spec by not requiring a self URL
         $topicUrl = isset($urls['self']) ? $urls['self'] : $url;
-        $hubUrl   = isset($urls['hub'])  ? $urls['hub'] : null;
+        $hubUrl   = isset($urls['hub'][0]) ? $urls['hub'][0] : null;
 
         return array($topicUrl, $hubUrl);
     }
index 418185e..2018467 100644 (file)
@@ -21,7 +21,40 @@ class HubUrlExtractorTest extends \PHPUnit\Framework\TestCase
 
         $this->assertEquals(
             [
-                'hub'  => 'https://hub.example.com/',
+                'hub'  => ['https://hub.example.com/'],
+                'self' => 'http://example.com/feed',
+            ],
+            $extractor->getUrls('http://example.org/')
+        );
+    }
+
+    public function testGetUrlsMultipleHubsHEAD()
+    {
+        $mock = new HTTP_Request2_Adapter_Mock();
+        $this->addResponse(
+            $mock,
+            "HTTP/1.0 200 OK\r\n"
+            . "Content-type: text/html\r\n"
+            . "Link: <https://hub.example.com/>; rel=\"hub\"\r\n"
+            . "Link: <https://hub2.example.com/>; rel=\"hub\"\r\n"
+            . "Link: <http://example.com/feed>; rel=\"self\"\r\n"
+            . "Link: <https://hub3.example.com/>; rel=\"hub\"\r\n"
+            . "\r\n",
+            'http://example.org/'
+        );
+
+        $extractor = new phinde\HubUrlExtractor();
+        $extractor->setRequestTemplate(
+            new HTTP_Request2(null, null, ['adapter' => $mock])
+        );
+
+        $this->assertEquals(
+            [
+                'hub'  => [
+                    'https://hub.example.com/',
+                    'https://hub2.example.com/',
+                    'https://hub3.example.com/',
+                ],
                 'self' => 'http://example.com/feed',
             ],
             $extractor->getUrls('http://example.org/')
@@ -63,7 +96,53 @@ HTM,
 
         $this->assertEquals(
             [
-                'hub'  => 'https://hub.example.com/',
+                'hub'  => ['https://hub.example.com/'],
+                'self' => 'http://example.com/feed',
+            ],
+            $extractor->getUrls('http://example.org/')
+        );
+    }
+
+    public function testGetUrlsHtmlMultipleHubs()
+    {
+        $mock = new HTTP_Request2_Adapter_Mock();
+        //HEAD
+        $this->addResponse(
+            $mock,
+            "HTTP/1.0 200 OK\r\n"
+            . "Content-type: text/html\r\n"
+            . "\r\n",
+            'http://example.org/'
+        );
+        //HEAD
+        $this->addResponse(
+            $mock,
+            "HTTP/1.0 200 OK\r\n"
+            . "Content-type: text/html\r\n"
+            . "\r\n"
+            . <<<HTM
+<html>
+ <head>
+  <link rel='hub' href='https://hub.example.com/'/>
+  <link rel='hub' href='https://hub2.example.com/'/>
+  <link rel='self' href='http://example.com/feed'/>
+ </head>
+</html>
+HTM,
+            'http://example.org/'
+        );
+
+        $extractor = new phinde\HubUrlExtractor();
+        $extractor->setRequestTemplate(
+            new HTTP_Request2(null, null, ['adapter' => $mock])
+        );
+
+        $this->assertEquals(
+            [
+                'hub'  => [
+                    'https://hub.example.com/',
+                    'https://hub2.example.com/',
+                ],
                 'self' => 'http://example.com/feed',
             ],
             $extractor->getUrls('http://example.org/')
@@ -105,7 +184,7 @@ HTM,
 
         $this->assertEquals(
             [
-                'hub'  => 'https://hub.example.com/',
+                'hub'  => ['https://hub.example.com/'],
                 'self' => 'http://example.com/feed',
             ],
             $extractor->getUrls('http://example.org/')
@@ -147,7 +226,7 @@ HTM,
 
         $this->assertEquals(
             [
-                'hub'  => 'https://hub.example.com/',
+                'hub'  => ['https://hub.example.com/'],
                 'self' => 'http://example.com/feed',
             ],
             $extractor->getUrls('http://example.org/')
@@ -191,7 +270,7 @@ HTM,
 
         $this->assertEquals(
             [
-                'hub'  => 'https://hub.example.com/',
+                'hub'  => ['https://hub.example.com/'],
                 'self' => 'http://example.com/feed',
             ],
             $extractor->getUrls('http://example.org/')
@@ -235,7 +314,7 @@ HTM,
 
         $this->assertEquals(
             [
-                'hub'  => 'https://hub.example.com/',
+                'hub'  => ['https://hub.example.com/'],
                 'self' => 'http://example.com/feed',
             ],
             $extractor->getUrls('http://example.org/')