diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-09-02 11:01:28 +0200 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-09-02 11:01:28 +0200 |
| commit | bbf8fd9b04169a94ddba002ca8877910df4e0929 (patch) | |
| tree | 82a44910b4b9c0ff4818324a9593ce7aee80382a /src/phinde/Elasticsearch.php | |
| parent | 931c65bc8f5addd0a67764283b76b7e05104d535 (diff) | |
| download | phinde-bbf8fd9b04169a94ddba002ca8877910df4e0929.tar.gz phinde-bbf8fd9b04169a94ddba002ca8877910df4e0929.zip | |
massively improve crawl speed by ditching "exists" queries
Diffstat (limited to 'src/phinde/Elasticsearch.php')
| -rw-r--r-- | src/phinde/Elasticsearch.php | 26 |
1 files changed, 4 insertions, 22 deletions
diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php index 43ef4f9..1732bbb 100644 --- a/src/phinde/Elasticsearch.php +++ b/src/phinde/Elasticsearch.php @@ -10,33 +10,15 @@ class Elasticsearch $this->baseUrl = $baseUrl; } - /** - * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html - */ public function isKnown($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/_search/exists', - \HTTP_Request2::METHOD_GET + $this->baseUrl . 'document/' . rawurlencode($url), + \HTTP_Request2::METHOD_HEAD ); $r->allow404 = true; - $r->setBody( - json_encode( - array( - 'query' => array( - 'filtered' => array( - 'filter' => array( - 'term' => array( - 'url' => $url - ) - ) - ) - ) - ) - ) - ); - $status = $r->send()->getStatus(); - return $status !== 404; + $res = $r->send(); + return $res->getStatus() == 200; } public function get($url) |
