Fix date searches
[phinde.git] / src / phinde / Elasticsearch.php
index 5ad46e74030e9d8c2c27cc05083a547fe493837d..5ca2180c8b66a936d731f8e34691af2385a95174 100644 (file)
@@ -10,39 +10,26 @@ class Elasticsearch
         $this->baseUrl = $baseUrl;
     }
 
-    /**
-     * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html
-     */
+    public static function getDocId($url)
+    {
+        return hash('sha256', $url);
+    }
+
     public function isKnown($url)
     {
         $r = new Elasticsearch_Request(
-            $this->baseUrl . 'document/_search/exists',
-            \HTTP_Request2::METHOD_GET
+            $this->baseUrl . 'document/' . static::getDocId($url),
+            \HTTP_Request2::METHOD_HEAD
         );
         $r->allow404 = true;
-        $r->setBody(
-            json_encode(
-                array(
-                    'query' => array(
-                        'filtered' => array(
-                            'filter' => array(
-                                'term' => array(
-                                    'url' => $url
-                                )
-                            )
-                        )
-                    )
-                )
-            )
-        );
-        $res = json_decode($r->send()->getBody());
-        return $res->exists;
+        $res = $r->send();
+        return $res->getStatus() == 200;
     }
 
     public function get($url)
     {
         $r = new Elasticsearch_Request(
-            $this->baseUrl . 'document/' . rawurlencode($url),
+            $this->baseUrl . 'document/' . static::getDocId($url),
             \HTTP_Request2::METHOD_GET
         );
         $r->allow404 = true;
@@ -57,19 +44,111 @@ class Elasticsearch
     public function markQueued($url)
     {
         $r = new Elasticsearch_Request(
-            $this->baseUrl . 'document/' . rawurlencode($url),
+            $this->baseUrl . 'document/' . static::getDocId($url),
             \HTTP_Request2::METHOD_PUT
         );
-        $doc = array(
-            'status' => 'queued',
-            'url' => $url
+        $doc = (object) array(
+            'url' => $url,
+            'status' => (object) array(
+                'processed' => null,
+                'findable'  => false,
+            )
         );
         $r->setBody(json_encode($doc));
         $r->send();
     }
 
-    public function search($query, $filters, $site, $page, $perPage)
+    public function getIndexStatus()
     {
+        $r = new Elasticsearch_Request(
+            $this->baseUrl . '_stats/docs,store',
+            \HTTP_Request2::METHOD_GET
+        );
+        $res = $r->send();
+        $data = json_decode($res->getBody());
+        return array(
+            'documents' => $data->_all->total->docs->count,
+            'size'      => $data->_all->total->store->size_in_bytes,
+        );
+    }
+
+    public function search($query, $filters, $site, $page, $perPage, $sort)
+    {
+        if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) {
+            foreach ($matches[1] as $authorName) {
+                $query = str_replace(
+                    'nick:' . $authorName,
+                    'author.name:' . $authorName,
+                    $query
+                );
+            }
+        }
+
+        $qMust = array();//query parts for the MUST section
+
+        //modification date filters
+        if (preg_match('#after:([^ ]+)#', $query, $matches)) {
+            $dateAfter = $matches[1];
+            $query      = trim(str_replace($matches[0], '', $query));
+            $qMust[]    = array(
+                'range' => array(
+                    'status.modate' => array(
+                        'gt' => $dateAfter . '||/d',
+                    )
+                )
+            );
+        }
+        if (preg_match('#before:([^ ]+)#', $query, $matches)) {
+            $dateBefore = $matches[1];
+            $query      = trim(str_replace($matches[0], '', $query));
+            $qMust[]    = array(
+                'range' => array(
+                    'status.modate' => array(
+                        'lt' => $dateBefore . '||/d',
+                    )
+                )
+            );
+        }
+        if (preg_match('#date:([^ ]+)#', $query, $matches)) {
+            $dateExact = $matches[1];
+            $query      = trim(str_replace($matches[0], '', $query));
+            $qMust[]    = array(
+                'range' => array(
+                    'status.modate' => array(
+                        'gte' => $dateExact . '||/d',
+                        'lte' => $dateExact . '||/d',
+                    )
+                )
+            );
+        }
+
+        $qMust[] = array(
+            'query_string' => array(
+                'default_field' => '_all',
+                'default_operator' => 'AND',
+                'query' => $query
+            )
+        );
+        $qMust[] = array(
+            'term' => array(
+                'status.findable' => true
+            )
+        );
+
+        if ($sort == '' && $GLOBALS['phinde']['defaultSort'] == 'date') {
+            $sort = 'date';
+        }
+        if ($sort == 'date') {
+            $sortCfg = array('status.modate' => array('order' => 'desc'));
+        } else {
+            $sortCfg = array();
+        }
+
+        $contentMatchSize = 100;
+        if ($GLOBALS['phinde']['showFullContent']) {
+            $contentMatchSize = 999999;
+        }
+
         $r = new Elasticsearch_Request(
             $this->baseUrl . 'document/_search',
             \HTTP_Request2::METHOD_GET
@@ -79,23 +158,11 @@ class Elasticsearch
                 'url',
                 'title',
                 'author',
-                'modate',
+                'status.modate',
             ),
             'query' => array(
                 'bool' => array(
-                    'must' => array(
-                        array(
-                            'query_string' => array(
-                                'default_field' => '_all',
-                                'query' => $query
-                            )
-                        ),
-                        array(
-                            'term' => array(
-                                'status' => 'indexed'
-                            )
-                        ),
-                    )
+                    'must' => $qMust
                 )
             ),
             'highlight' => array(
@@ -114,6 +181,8 @@ class Elasticsearch
                     'text' => array(
                         'require_field_match' => false,
                         'number_of_fragments' => 1,
+                        'fragment_size' => $contentMatchSize,
+                        'no_match_size' => $contentMatchSize,
                     ),
                 )
             ),
@@ -141,9 +210,7 @@ class Elasticsearch
             ),
             'from' => $page * $perPage,
             'size' => $perPage,
-            'sort' => array(
-                //array('modate' => array('order' => 'desc'))
-            )
+            'sort' => $sortCfg,
         );
         foreach ($filters as $type => $value) {
             $doc['query']['bool']['must'][] = array(