From bbf8fd9b04169a94ddba002ca8877910df4e0929 Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Fri, 2 Sep 2016 11:01:28 +0200 Subject: massively improve crawl speed by ditching "exists" queries --- src/phinde/Elasticsearch.php | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'src/phinde') diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php index 43ef4f9..1732bbb 100644 --- a/src/phinde/Elasticsearch.php +++ b/src/phinde/Elasticsearch.php @@ -10,33 +10,15 @@ class Elasticsearch $this->baseUrl = $baseUrl; } - /** - * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html - */ public function isKnown($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/_search/exists', - \HTTP_Request2::METHOD_GET + $this->baseUrl . 'document/' . rawurlencode($url), + \HTTP_Request2::METHOD_HEAD ); $r->allow404 = true; - $r->setBody( - json_encode( - array( - 'query' => array( - 'filtered' => array( - 'filter' => array( - 'term' => array( - 'url' => $url - ) - ) - ) - ) - ) - ) - ); - $status = $r->send()->getStatus(); - return $status !== 404; + $res = $r->send(); + return $res->getStatus() == 200; } public function get($url) -- cgit v1.2.3