improve status page
[phinde.git] / src / phinde / Elasticsearch.php
index 40358611edb1cb3dc69e929cd13e6fe141869991..6c90480475d152a361ea0eb645dfb842265310a7 100644 (file)
@@ -10,39 +10,26 @@ class Elasticsearch
         $this->baseUrl = $baseUrl;
     }
 
-    /**
-     * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html
-     */
+    public static function getDocId($url)
+    {
+        return hash('sha256', $url);
+    }
+
     public function isKnown($url)
     {
         $r = new Elasticsearch_Request(
-            $this->baseUrl . 'document/_search/exists',
-            \HTTP_Request2::METHOD_GET
+            $this->baseUrl . 'document/' . static::getDocId($url),
+            \HTTP_Request2::METHOD_HEAD
         );
         $r->allow404 = true;
-        $r->setBody(
-            json_encode(
-                array(
-                    'query' => array(
-                        'filtered' => array(
-                            'filter' => array(
-                                'term' => array(
-                                    'url' => $url
-                                )
-                            )
-                        )
-                    )
-                )
-            )
-        );
-        $res = json_decode($r->send()->getBody());
-        return $res->exists;
+        $res = $r->send();
+        return $res->getStatus() == 200;
     }
 
     public function get($url)
     {
         $r = new Elasticsearch_Request(
-            $this->baseUrl . 'document/' . rawurlencode($url),
+            $this->baseUrl . 'document/' . static::getDocId($url),
             \HTTP_Request2::METHOD_GET
         );
         $r->allow404 = true;
@@ -57,26 +44,44 @@ class Elasticsearch
     public function markQueued($url)
     {
         $r = new Elasticsearch_Request(
-            $this->baseUrl . 'document/' . rawurlencode($url),
+            $this->baseUrl . 'document/' . static::getDocId($url),
             \HTTP_Request2::METHOD_PUT
         );
-        $doc = array(
-            'status' => 'queued',
-            'url' => $url
+        $doc = (object) array(
+            'url' => $url,
+            'status' => (object) array(
+                'processed' => null,
+                'findable'  => false,
+            )
         );
         $r->setBody(json_encode($doc));
         $r->send();
     }
 
+    public function getIndexStatus()
+    {
+        $r = new Elasticsearch_Request(
+            $this->baseUrl . '_stats/docs,store',
+            \HTTP_Request2::METHOD_GET
+        );
+        $res = $r->send();
+        $data = json_decode($res->getBody());
+        return array(
+            'documents' => $data->_all->total->docs->count,
+            'size'      => $data->_all->total->store->size_in_bytes,
+        );
+    }
+
     public function search($query, $filters, $site, $page, $perPage, $sort)
     {
-        if (preg_match('#nick:([^ ]*)#', $query, $matches)) {
-            $authorName = $matches[1];
-            $query = str_replace(
-                'nick:' . $authorName,
-                'author.name:' . $authorName,
-                $query
-            );
+        if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) {
+            foreach ($matches[1] as $authorName) {
+                $query = str_replace(
+                    'nick:' . $authorName,
+                    'author.name:' . $authorName,
+                    $query
+                );
+            }
         }
 
         $qMust = array();//query parts for the MUST section
@@ -126,16 +131,21 @@ class Elasticsearch
         );
         $qMust[] = array(
             'term' => array(
-                'status' => 'indexed'
+                'status.findable' => true
             )
         );
 
         if ($sort == 'date') {
-            $sortCfg = array('modate' => array('order' => 'desc'));
+            $sortCfg = array('status.modate' => array('order' => 'desc'));
         } else {
             $sortCfg = array();
         }
 
+        $contentMatchSize = 100;
+        if ($GLOBALS['phinde']['showFullContent']) {
+            $contentMatchSize = 999999;
+        }
+
         $r = new Elasticsearch_Request(
             $this->baseUrl . 'document/_search',
             \HTTP_Request2::METHOD_GET
@@ -145,7 +155,7 @@ class Elasticsearch
                 'url',
                 'title',
                 'author',
-                'modate',
+                'status.modate',
             ),
             'query' => array(
                 'bool' => array(
@@ -168,6 +178,8 @@ class Elasticsearch
                     'text' => array(
                         'require_field_match' => false,
                         'number_of_fragments' => 1,
+                        'fragment_size' => $contentMatchSize,
+                        'no_match_size' => $contentMatchSize,
                     ),
                 )
             ),