massively improve crawl speed by ditching "exists" queries
[phinde.git] / src / phinde / Elasticsearch.php
index c437036794c1724d19e444c12b1fd1b8802ebc4a..1732bbb824cbaf8b072379a844f808af06926b72 100644 (file)
@@ -10,33 +10,15 @@ class Elasticsearch
         $this->baseUrl = $baseUrl;
     }
 
-    /**
-     * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html
-     */
     public function isKnown($url)
     {
         $r = new Elasticsearch_Request(
-            $this->baseUrl . 'document/_search/exists',
-            \HTTP_Request2::METHOD_GET
+            $this->baseUrl . 'document/' . rawurlencode($url),
+            \HTTP_Request2::METHOD_HEAD
         );
         $r->allow404 = true;
-        $r->setBody(
-            json_encode(
-                array(
-                    'query' => array(
-                        'filtered' => array(
-                            'filter' => array(
-                                'term' => array(
-                                    'url' => $url
-                                )
-                            )
-                        )
-                    )
-                )
-            )
-        );
-        $res = json_decode($r->send()->getBody());
-        return $res->exists;
+        $res = $r->send();
+        return $res->getStatus() == 200;
     }
 
     public function get($url)