From eca6e9af6dea38f5912c881a5dc05193e1b19848 Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Sat, 7 Mar 2020 22:26:59 +0100 Subject: [PATCH] Support subscriptions to redirect URLs Resolves: https://github.com/cweiske/phinde/issues/37 --- src/phinde/HubUrlExtractor.php | 15 ++++++- tests/HubUrlExtractorTest.php | 77 ++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/src/phinde/HubUrlExtractor.php b/src/phinde/HubUrlExtractor.php index 4f1baa3..da29650 100644 --- a/src/phinde/HubUrlExtractor.php +++ b/src/phinde/HubUrlExtractor.php @@ -19,7 +19,8 @@ class HubUrlExtractor * Get the hub and self/canonical URL of a given topic URL. * Uses link headers and parses HTML link rels. * - * @param string $url Topic URL + * @param string $url Topic URL + * @param int $redirects Number of redirects that were followed * * @return array Array of URLs with keys: hub, self. * - "self" value is the URL @@ -27,12 +28,13 @@ class HubUrlExtractor * Keys may be there but most not if the URL * does not advertise them. */ - public function getUrls($url) + public function getUrls($url, $redirects = 0) { //at first, try a HEAD request that does not transfer so much data $req = $this->getRequest(); $req->setUrl($url); $req->setMethod(\HTTP_Request2::METHOD_HEAD); + $req->setConfig('follow_redirects', false); $res = $req->send(); if (intval($res->getStatus() / 100) >= 4 @@ -49,6 +51,15 @@ class HubUrlExtractor return $this->absolutifyUrls($urls, $base); } + if ($res->isRedirect()) { + //we tried header links and that failed, now follow the redirect + if ($redirects > 5) { + return []; + } + $redirectUrl = (string) $base->resolve($res->getHeader('location')); + return $this->getUrls($redirectUrl, $redirects + 1); + } + list($type) = explode(';', $res->getHeader('Content-type')); if ($type != 'text/html' && $type != 'text/xml' && $type != 'application/xhtml+xml' diff --git a/tests/HubUrlExtractorTest.php b/tests/HubUrlExtractorTest.php index 2018467..a4d4651 100644 --- a/tests/HubUrlExtractorTest.php +++ b/tests/HubUrlExtractorTest.php @@ -344,5 +344,82 @@ HTM, } return $response; } + + /** + * It is possible to subscribe to URLs that redirect if + * they have a hub and self links in the HTTP headers. + * If they don't, we need to follow the redirect. + */ + public function testGetUrlsHEADLinksForRedirect() + { + $mock = new HTTP_Request2_Adapter_Mock(); + $this->addResponse( + $mock, + "HTTP/1.0 307 Temporary Redirect\r\n" + . "Content-type: text/html\r\n" + . "Location: http://example.org/redir-target\r\n" + . "Link: ; rel=\"hub\"\r\n" + . "Link: ; rel=\"self\"\r\n" + . "\r\n", + 'http://example.org/' + ); + $this->addResponse( + $mock, + "HTTP/1.0 200 OK\r\n" + . "Content-type: text/html\r\n" + . "Link: ; rel=\"hub\"\r\n" + . "Link: ; rel=\"self\"\r\n" + . "\r\n", + 'http://example.org/redir-target' + ); + + $extractor = new phinde\HubUrlExtractor(); + $extractor->setRequestTemplate( + new HTTP_Request2(null, null, ['adapter' => $mock]) + ); + + $this->assertEquals( + [ + 'hub' => ['https://hub.example.com/'], + 'self' => 'http://example.com/feed', + ], + $extractor->getUrls('http://example.org/') + ); + } + + public function testGetUrlsHEADLinksForRedirectNone() + { + $mock = new HTTP_Request2_Adapter_Mock(); + $this->addResponse( + $mock, + "HTTP/1.0 307 Temporary Redirect\r\n" + . "Content-type: text/html\r\n" + . "Location: http://example.org/redir-target\r\n" + . "\r\n", + 'http://example.org/' + ); + $this->addResponse( + $mock, + "HTTP/1.0 200 OK\r\n" + . "Content-type: text/html\r\n" + . "Link: ; rel=\"hub\"\r\n" + . "Link: ; rel=\"self\"\r\n" + . "\r\n", + 'http://example.org/redir-target' + ); + + $extractor = new phinde\HubUrlExtractor(); + $extractor->setRequestTemplate( + new HTTP_Request2(null, null, ['adapter' => $mock]) + ); + + $this->assertEquals( + [ + 'hub' => ['https://redir-hub.example.com/'], + 'self' => 'http://example.com/redir-feed', + ], + $extractor->getUrls('http://example.org/') + ); + } } ?> -- 2.30.2