diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-11-07 21:41:36 +0100 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-11-07 21:41:36 +0100 |
| commit | d7651fd96dcfa2829519504e4c8ec1ce511cd57f (patch) | |
| tree | e24d7a9f90060b0fee5a652de43bd0627f1c5bde /src/phinde/Elasticsearch.php | |
| parent | f90790c6b2a54c9b1c8a0aeaf1f23e6aa67d7aca (diff) | |
| download | phinde-d7651fd96dcfa2829519504e4c8ec1ce511cd57f.tar.gz phinde-d7651fd96dcfa2829519504e4c8ec1ce511cd57f.zip | |
Big patch merging crawling+indexing into one command, new json document structure
Diffstat (limited to 'src/phinde/Elasticsearch.php')
| -rw-r--r-- | src/phinde/Elasticsearch.php | 26 |
1 files changed, 17 insertions, 9 deletions
diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php index 2887beb..9babfee 100644 --- a/src/phinde/Elasticsearch.php +++ b/src/phinde/Elasticsearch.php @@ -10,10 +10,15 @@ class Elasticsearch $this->baseUrl = $baseUrl; } + public static function getDocId($url) + { + return hash('sha256', $url); + } + public function isKnown($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_HEAD ); $r->allow404 = true; @@ -24,7 +29,7 @@ class Elasticsearch public function get($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_GET ); $r->allow404 = true; @@ -39,12 +44,15 @@ class Elasticsearch public function markQueued($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_PUT ); - $doc = array( - 'status' => 'queued', - 'url' => $url + $doc = (object) array( + 'url' => $url, + 'status' => (object) array( + 'processed' => null, + 'findable' => false, + ) ); $r->setBody(json_encode($doc)); $r->send(); @@ -109,12 +117,12 @@ class Elasticsearch ); $qMust[] = array( 'term' => array( - 'status' => 'indexed' + 'status.findable' => true ) ); if ($sort == 'date') { - $sortCfg = array('modate' => array('order' => 'desc')); + $sortCfg = array('status.modate' => array('order' => 'desc')); } else { $sortCfg = array(); } @@ -133,7 +141,7 @@ class Elasticsearch 'url', 'title', 'author', - 'modate', + 'status.modate', ), 'query' => array( 'bool' => array( |
