X-Git-Url: https://git.cweiske.de/phinde.git/blobdiff_plain/bbf8fd9b04169a94ddba002ca8877910df4e0929..c32d1b6ffe81afb36fdcaebe0254ad191b72bff6:/src/phinde/Elasticsearch.php diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php index 1732bbb..fd60f95 100644 --- a/src/phinde/Elasticsearch.php +++ b/src/phinde/Elasticsearch.php @@ -10,10 +10,15 @@ class Elasticsearch $this->baseUrl = $baseUrl; } + public static function getDocId($url) + { + return hash('sha256', $url); + } + public function isKnown($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_HEAD ); $r->allow404 = true; @@ -24,7 +29,7 @@ class Elasticsearch public function get($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_GET ); $r->allow404 = true; @@ -39,26 +44,44 @@ class Elasticsearch public function markQueued($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_PUT ); - $doc = array( - 'status' => 'queued', - 'url' => $url + $doc = (object) array( + 'url' => $url, + 'status' => (object) array( + 'processed' => null, + 'findable' => false, + ) ); $r->setBody(json_encode($doc)); $r->send(); } + public function getIndexStatus() + { + $r = new Elasticsearch_Request( + $this->baseUrl . '_stats/docs,store', + \HTTP_Request2::METHOD_GET + ); + $res = $r->send(); + $data = json_decode($res->getBody()); + return array( + 'documents' => $data->_all->total->docs->count, + 'size' => $data->_all->total->store->size_in_bytes, + ); + } + public function search($query, $filters, $site, $page, $perPage, $sort) { - if (preg_match('#nick:([^ ]*)#', $query, $matches)) { - $authorName = $matches[1]; - $query = str_replace( - 'nick:' . $authorName, - 'author.name:' . $authorName, - $query - ); + if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) { + foreach ($matches[1] as $authorName) { + $query = str_replace( + 'nick:' . $authorName, + 'author.name:' . $authorName, + $query + ); + } } $qMust = array();//query parts for the MUST section @@ -69,7 +92,7 @@ class Elasticsearch $query = trim(str_replace($matches[0], '', $query)); $qMust[] = array( 'range' => array( - 'modate' => array( + 'status.modate' => array( 'gt' => $dateAfter . '||/d', ) ) @@ -80,7 +103,7 @@ class Elasticsearch $query = trim(str_replace($matches[0], '', $query)); $qMust[] = array( 'range' => array( - 'modate' => array( + 'status.modate' => array( 'lt' => $dateBefore . '||/d', ) ) @@ -91,7 +114,7 @@ class Elasticsearch $query = trim(str_replace($matches[0], '', $query)); $qMust[] = array( 'range' => array( - 'modate' => array( + 'status.modate' => array( 'gte' => $dateExact . '||/d', 'lte' => $dateExact . '||/d', ) @@ -99,6 +122,11 @@ class Elasticsearch ); } + if (strpos($query, '/') !== false && strpos($query, '"') === false) { + //add quotes when there is a slash and no quotes + // https://stackoverflow.com/questions/31963643/escaping-forward-slashes-in-elasticsearch + $query = '"' . $query . '"'; + } $qMust[] = array( 'query_string' => array( 'default_field' => '_all', @@ -108,12 +136,15 @@ class Elasticsearch ); $qMust[] = array( 'term' => array( - 'status' => 'indexed' + 'status.findable' => true ) ); + if ($sort == '' && $GLOBALS['phinde']['defaultSort'] == 'date') { + $sort = 'date'; + } if ($sort == 'date') { - $sortCfg = array('modate' => array('order' => 'desc')); + $sortCfg = array('status.modate' => array('order' => 'desc')); } else { $sortCfg = array(); } @@ -132,7 +163,7 @@ class Elasticsearch 'url', 'title', 'author', - 'modate', + 'status.modate', ), 'query' => array( 'bool' => array(