aboutsummaryrefslogtreecommitdiff
path: root/src/phinde
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-09-02 11:01:28 +0200
committerChristian Weiske <cweiske@cweiske.de>2016-09-02 11:01:28 +0200
commitbbf8fd9b04169a94ddba002ca8877910df4e0929 (patch)
tree82a44910b4b9c0ff4818324a9593ce7aee80382a /src/phinde
parent931c65bc8f5addd0a67764283b76b7e05104d535 (diff)
downloadphinde-bbf8fd9b04169a94ddba002ca8877910df4e0929.tar.gz
phinde-bbf8fd9b04169a94ddba002ca8877910df4e0929.zip
massively improve crawl speed by ditching "exists" queries
Diffstat (limited to 'src/phinde')
-rw-r--r--src/phinde/Elasticsearch.php26
1 files changed, 4 insertions, 22 deletions
diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php
index 43ef4f9..1732bbb 100644
--- a/src/phinde/Elasticsearch.php
+++ b/src/phinde/Elasticsearch.php
@@ -10,33 +10,15 @@ class Elasticsearch
$this->baseUrl = $baseUrl;
}
- /**
- * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html
- */
public function isKnown($url)
{
$r = new Elasticsearch_Request(
- $this->baseUrl . 'document/_search/exists',
- \HTTP_Request2::METHOD_GET
+ $this->baseUrl . 'document/' . rawurlencode($url),
+ \HTTP_Request2::METHOD_HEAD
);
$r->allow404 = true;
- $r->setBody(
- json_encode(
- array(
- 'query' => array(
- 'filtered' => array(
- 'filter' => array(
- 'term' => array(
- 'url' => $url
- )
- )
- )
- )
- )
- )
- );
- $status = $r->send()->getStatus();
- return $status !== 404;
+ $res = $r->send();
+ return $res->getStatus() == 200;
}
public function get($url)