git.cweiske.de
/
phinde.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
e9a75e1
)
Support location redirect changes
author
Christian Weiske
<cweiske@cweiske.de>
Sun, 8 Mar 2020 22:54:16 +0000
(23:54 +0100)
committer
Christian Weiske
<cweiske@cweiske.de>
Sun, 8 Mar 2020 22:54:16 +0000
(23:54 +0100)
src/phinde/Fetcher.php
patch
|
blob
|
history
diff --git
a/src/phinde/Fetcher.php
b/src/phinde/Fetcher.php
index 666d7eda2449a6240a14d657bca6844922ecf3da..c80032d48bf67a5fb5a0bffe8826554c2db35244 100644
(file)
--- a/
src/phinde/Fetcher.php
+++ b/
src/phinde/Fetcher.php
@@
-17,14
+17,15
@@
class Fetcher
{
$url = Helper::rewriteUrl($url);
{
$url = Helper::rewriteUrl($url);
- $esDoc = $this->es->get($url);
+ $esDoc = $this->es->get($url);
+ $locUrl = null;
if (isset($esDoc->status->location)
&& $esDoc->status->location != ''
) {
if (isset($esDoc->status->location)
&& $esDoc->status->location != ''
) {
- //
TODO: what if location redirects change?
- $
u
rl = $esDoc->status->location;
- $
url = Helper::rewriteUrl($u
rl);
- $esDoc = $this->es->get($
u
rl);
+ //
Location redirect: Use modified time of known target
+ $
locU
rl = $esDoc->status->location;
+ $
locUrl = Helper::rewriteUrl($locU
rl);
+ $esDoc = $this->es->get($
locU
rl);
}
$types = array();
}
$types = array();
@@
-44,8
+45,17
@@
class Fetcher
}
$res = $req->send();
}
$res = $req->send();
+ $effUrl = Helper::removeAnchor($res->getEffectiveUrl());
+ $effUrl = Helper::rewriteUrl($effUrl);
+
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
+ if ($locUrl !== null && $effUrl != $locUrl) {
+ //location URL changed, and we used the wrong crawl timestampx
+ $this->storeRedirect($url, $effUrl);
+ return $this->fetch($url, $actions, $force);
+ }
+
Log::info("Not modified since last fetch");
return false;
} else if ($res->getStatus() !== 200) {
Log::info("Not modified since last fetch");
return false;
} else if ($res->getStatus() !== 200) {
@@
-55,8
+65,6
@@
class Fetcher
);
}
);
}
- $effUrl = Helper::removeAnchor($res->getEffectiveUrl());
- $effUrl = Helper::rewriteUrl($effUrl);
if ($effUrl != $url) {
$this->storeRedirect($url, $effUrl);
$url = $effUrl;
if ($effUrl != $url) {
$this->storeRedirect($url, $effUrl);
$url = $effUrl;