X-Git-Url: https://git.cweiske.de/phinde.git/blobdiff_plain/226508cd8d3e8c147ad314a0de483e08be71c254..08fc60226f224de87d665aa7c55b6eaa9f66d768:/src/phinde/Elasticsearch.php diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php index 4bc4637..069cf1f 100644 --- a/src/phinde/Elasticsearch.php +++ b/src/phinde/Elasticsearch.php @@ -10,39 +10,26 @@ class Elasticsearch $this->baseUrl = $baseUrl; } - /** - * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html - */ + public static function getDocId($url) + { + return hash('sha256', $url); + } + public function isKnown($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/_search/exists', - \HTTP_Request2::METHOD_GET + $this->baseUrl . 'document/' . static::getDocId($url), + \HTTP_Request2::METHOD_HEAD ); $r->allow404 = true; - $r->setBody( - json_encode( - array( - 'query' => array( - 'filtered' => array( - 'filter' => array( - 'term' => array( - 'url' => $url - ) - ) - ) - ) - ) - ) - ); - $res = json_decode($r->send()->getBody()); - return $res->exists; + $res = $r->send(); + return $res->getStatus() == 200; } public function get($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_GET ); $r->allow404 = true; @@ -57,19 +44,104 @@ class Elasticsearch public function markQueued($url) { $r = new Elasticsearch_Request( - $this->baseUrl . 'document/' . rawurlencode($url), + $this->baseUrl . 'document/' . static::getDocId($url), \HTTP_Request2::METHOD_PUT ); - $doc = array( - 'status' => 'queued', - 'url' => $url + $doc = (object) array( + 'url' => $url, + 'status' => (object) array( + 'processed' => null, + 'findable' => false, + ) ); $r->setBody(json_encode($doc)); $r->send(); } - public function search($query, $filters, $page, $perPage) + public function countDocuments() + { + $r = new Elasticsearch_Request( + $this->baseUrl . 'document/_count', + \HTTP_Request2::METHOD_GET + ); + $res = $r->send(); + return json_decode($res->getBody())->count; + } + + public function search($query, $filters, $site, $page, $perPage, $sort) { + if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) { + foreach ($matches[1] as $authorName) { + $query = str_replace( + 'nick:' . $authorName, + 'author.name:' . $authorName, + $query + ); + } + } + + $qMust = array();//query parts for the MUST section + + //modification date filters + if (preg_match('#after:([^ ]+)#', $query, $matches)) { + $dateAfter = $matches[1]; + $query = trim(str_replace($matches[0], '', $query)); + $qMust[] = array( + 'range' => array( + 'modate' => array( + 'gt' => $dateAfter . '||/d', + ) + ) + ); + } + if (preg_match('#before:([^ ]+)#', $query, $matches)) { + $dateBefore = $matches[1]; + $query = trim(str_replace($matches[0], '', $query)); + $qMust[] = array( + 'range' => array( + 'modate' => array( + 'lt' => $dateBefore . '||/d', + ) + ) + ); + } + if (preg_match('#date:([^ ]+)#', $query, $matches)) { + $dateExact = $matches[1]; + $query = trim(str_replace($matches[0], '', $query)); + $qMust[] = array( + 'range' => array( + 'modate' => array( + 'gte' => $dateExact . '||/d', + 'lte' => $dateExact . '||/d', + ) + ) + ); + } + + $qMust[] = array( + 'query_string' => array( + 'default_field' => '_all', + 'default_operator' => 'AND', + 'query' => $query + ) + ); + $qMust[] = array( + 'term' => array( + 'status.findable' => true + ) + ); + + if ($sort == 'date') { + $sortCfg = array('status.modate' => array('order' => 'desc')); + } else { + $sortCfg = array(); + } + + $contentMatchSize = 100; + if ($GLOBALS['phinde']['showFullContent']) { + $contentMatchSize = 999999; + } + $r = new Elasticsearch_Request( $this->baseUrl . 'document/_search', \HTTP_Request2::METHOD_GET @@ -79,23 +151,32 @@ class Elasticsearch 'url', 'title', 'author', - 'modate', + 'status.modate', ), 'query' => array( 'bool' => array( - 'must' => array( - array( - 'query_string' => array( - 'default_field' => '_all', - 'query' => $query - ) - ), - array( - 'term' => array( - 'status' => 'indexed' - ) - ), - ) + 'must' => $qMust + ) + ), + 'highlight' => array( + 'pre_tags' => array(''), + 'order' => 'score', + 'encoder' => 'html', + 'fields' => array( + 'title' => array( + 'require_field_match' => false, + 'number_of_fragments' => 0, + ), + 'url' => array( + 'require_field_match' => false, + 'number_of_fragments' => 0, + ), + 'text' => array( + 'require_field_match' => false, + 'number_of_fragments' => 1, + 'fragment_size' => $contentMatchSize, + 'no_match_size' => $contentMatchSize, + ), ) ), 'aggregations' => array( @@ -122,9 +203,7 @@ class Elasticsearch ), 'from' => $page * $perPage, 'size' => $perPage, - 'sort' => array( - //array('modate' => array('order' => 'desc')) - ) + 'sort' => $sortCfg, ); foreach ($filters as $type => $value) { $doc['query']['bool']['must'][] = array( @@ -133,11 +212,20 @@ class Elasticsearch ) ); } + if ($site != '') { + $doc['query']['bool']['must'][] = array( + 'prefix' => array( + 'schemalessUrl' => array( + 'value' => $site + ) + ) + ); + } //unset($doc['_source']); //ini_set('xdebug.var_display_max_depth', 10); - //return json_decode(json_encode($doc)); + //echo json_encode($doc);die(); $r->setBody(json_encode($doc)); $res = $r->send(); return json_decode($res->getBody());