git.cweiske.de
/
phinde.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Configuration for default sort order
[phinde.git]
/
src
/
phinde
/
Fetcher.php
diff --git
a/src/phinde/Fetcher.php
b/src/phinde/Fetcher.php
index b5644af1310d16f2dbe107b36e75d2b059d57868..dccb118c0ac52f0bd47fd73029b65cb7b5780a69 100644
(file)
--- a/
src/phinde/Fetcher.php
+++ b/
src/phinde/Fetcher.php
@@
-43,7
+43,7
@@
class Fetcher
$res = $req->send();
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
$res = $req->send();
if ($res->getStatus() === 304) {
//not modified since last time, so don't crawl again
-
echo "Not modified since last fetch\n"
;
+
Log::info("Not modified since last fetch")
;
return false;
} else if ($res->getStatus() !== 200) {
throw new \Exception(
return false;
} else if ($res->getStatus() !== 200) {
throw new \Exception(
@@
-52,7
+52,7
@@
class Fetcher
);
}
);
}
- $effUrl =
$res->getEffectiveUrl(
);
+ $effUrl =
Helper::removeAnchor($res->getEffectiveUrl()
);
if ($effUrl != $url) {
$this->storeRedirect($url, $effUrl);
$url = $effUrl;
if ($effUrl != $url) {
$this->storeRedirect($url, $effUrl);
$url = $effUrl;
@@
-69,17
+69,17
@@
class Fetcher
protected function storeRedirect($url, $target)
{
protected function storeRedirect($url, $target)
{
- $esDoc =
new \stdClass(
);
+ $esDoc =
Helper::baseDoc($url
);
$esDoc->status = (object) array(
$esDoc->status = (object) array(
- 'location' => $target
+ 'location' => $target,
+ 'findable' => false,
);
);
- $esDoc->url = $url;
$this->storeDoc($url, $esDoc);
}
public function storeDoc($url, $esDoc)
{
$this->storeDoc($url, $esDoc);
}
public function storeDoc($url, $esDoc)
{
-
echo "Store $url\n"
;
+
Log::info("Store $url")
;
$esDoc->status->processed = gmdate('c');
$r = new Elasticsearch_Request(
$GLOBALS['phinde']['elasticsearch'] . 'document/'
$esDoc->status->processed = gmdate('c');
$r = new Elasticsearch_Request(
$GLOBALS['phinde']['elasticsearch'] . 'document/'