From d3cdabcac7feb8c62451ac12a22256c0eff16873 Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Sat, 29 Feb 2020 22:08:30 +0100 Subject: Add URL rewrites/replacements --- src/phinde/Crawler.php | 1 + src/phinde/Fetcher.php | 4 ++++ src/phinde/Helper.php | 14 ++++++++++++++ 3 files changed, 19 insertions(+) (limited to 'src') diff --git a/src/phinde/Crawler.php b/src/phinde/Crawler.php index 1f63e60..4d596b4 100644 --- a/src/phinde/Crawler.php +++ b/src/phinde/Crawler.php @@ -53,6 +53,7 @@ class Crawler { $filteredLinkInfos = array(); foreach ($linkInfos as $linkInfo) { + $linkInfo->url = Helper::rewriteUrl($linkInfo->url); $allowed = Helper::isUrlAllowed($linkInfo->url); $crawl = $allowed; $index = $GLOBALS['phinde']['indexNonAllowed'] || $allowed; diff --git a/src/phinde/Fetcher.php b/src/phinde/Fetcher.php index dccb118..7cf11b7 100644 --- a/src/phinde/Fetcher.php +++ b/src/phinde/Fetcher.php @@ -15,12 +15,15 @@ class Fetcher */ public function fetch($url, $actions, $force = false) { + $url = Helper::rewriteUrl($url); + $esDoc = $this->es->get($url); if (isset($esDoc->status->location) && $esDoc->status->location != '' ) { //TODO: what if location redirects change? $url = $esDoc->status->location; + $url = Helper::rewriteUrl($url); $esDoc = $this->es->get($url); } @@ -53,6 +56,7 @@ class Fetcher } $effUrl = Helper::removeAnchor($res->getEffectiveUrl()); + $effUrl = Helper::rewriteUrl($effUrl); if ($effUrl != $url) { $this->storeRedirect($url, $effUrl); $url = $effUrl; diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php index aeb8ba5..d22b9c8 100644 --- a/src/phinde/Helper.php +++ b/src/phinde/Helper.php @@ -3,6 +3,20 @@ namespace phinde; class Helper { + public static function rewriteUrl($url) + { + if (!isset($GLOBALS['phinde']['urlRewrites']) + || count($GLOBALS['phinde']['urlRewrites']) == 0 + ) { + return $url; + } + + foreach ($GLOBALS['phinde']['urlRewrites'] as $pattern => $replacement) { + $url = preg_replace('#' . $pattern . '#', $replacement, $url); + } + return $url; + } + public static function isUrlAllowed($url) { $urlDomain = parse_url($url, PHP_URL_HOST); -- cgit v1.2.3