add site search, highlighting
authorChristian Weiske <cweiske@cweiske.de>
Wed, 3 Feb 2016 20:12:17 +0000 (21:12 +0100)
committerChristian Weiske <cweiske@cweiske.de>
Wed, 3 Feb 2016 20:12:17 +0000 (21:12 +0100)
README.rst
bin/index.php
data/templates/search.htm
data/templates/search/hit.htm
data/templates/search/list.htm
src/phinde/Elasticsearch.php
src/phinde/Helper.php
www/css/phinde.css
www/index.php

index 121893c..ba8a681 100644 (file)
@@ -1,3 +1,9 @@
+Features
+========
+- Site search
+
+  - Query: ``foo bar site:www.example.org/dir/``
+
 Dependencies
 ============
 - PHP 5.5+
index eb82df2..8ac08ce 100755 (executable)
@@ -61,6 +61,7 @@ $doc = new \DOMDocument();
 $sx = simplexml_import_dom($doc);
 
 $indexDoc->url = $url;
+$indexDoc->schemalessUrl = Helper::noSchema($url);
 $indexDoc->type = 'html';
 $indexDoc->subtype = '';
 $indexDoc->mimetype = $mimetype;
index 8eb077b..4f3a252 100644 (file)
@@ -12,7 +12,8 @@
       </li>
      </ul>
      <form class="navbar-form pull-left">
-      <input type="text" name="q" placeholder="Search" value="{{query}}"/>
+      <input type="text" name="q" placeholder="Search"
+             value="{{query}}" class="input-xxlarge"/>
       <button type="submit" class="btn">Find</button>
      </form>
     </div>
index 4981017..dababcb 100644 (file)
@@ -1,7 +1,7 @@
 {% set doc = hit._source %}
 <li class="hit">
  <span class="title">
-  <a href="{{doc.url}}">{{doc.title}}</a>
+  <a href="{{doc.url}}">{{doc.htmlTitle|raw}}</a>
   {% if doc.author.name %}
   by
   <em>
@@ -19,5 +19,6 @@
   {% if doc.modate %}
   <span class="date">{{doc.extra.day|date("Y-m-d")}}</span>
   {% endif %}
+  {{doc.htmlText|raw}}
  </span>
 </li>
index ef8d454..97584d6 100644 (file)
@@ -4,7 +4,13 @@
  </p>
 {% else %}
  <p>
-  Found {{hitcount}} search results for "<tt>{{query}}</tt>" in {{queryTime}}:
+  Found {{hitcount}} search
+  {% if hitcount == 1 %}result{% else %}results{%endif%}
+  for "<tt>{{cleanQuery}}</tt>"
+  {% if site %}
+  on <tt>{{site}}</tt>
+  {% endif %}
+  in {{queryTime}}:
  </p>
  <ul class="hits">
  {% for hit in hits %}
index 4bc4637..735b64f 100644 (file)
@@ -68,7 +68,7 @@ class Elasticsearch
         $r->send();
     }
 
-    public function search($query, $filters, $page, $perPage)
+    public function search($query, $filters, $site, $page, $perPage)
     {
         $r = new Elasticsearch_Request(
             $this->baseUrl . 'document/_search',
@@ -98,6 +98,24 @@ class Elasticsearch
                     )
                 )
             ),
+            'highlight' => array(
+                'pre_tags' => array('<em class="hl">'),
+                'order' => 'score',
+                'fields' => array(
+                    'title' => array(
+                        'require_field_match' => false,
+                        'number_of_fragments' => 0,
+                    ),
+                    'url' => array(
+                        'require_field_match' => false,
+                        'number_of_fragments' => 0,
+                    ),
+                    'text' => array(
+                        'require_field_match' => false,
+                        'number_of_fragments' => 1,
+                    ),
+                )
+            ),
             'aggregations' => array(
                 'tags' => array(
                     'terms' => array(
@@ -133,11 +151,20 @@ class Elasticsearch
                 )
             );
         }
+        if ($site != '') {
+            $doc['query']['bool']['must'][] = array(
+                'prefix' => array(
+                    'schemalessUrl' => array(
+                        'value' => $site
+                    )
+                )
+            );
+        }
 
         //unset($doc['_source']);
 
         //ini_set('xdebug.var_display_max_depth', 10);
-        //return json_decode(json_encode($doc));
+        //echo json_encode($doc);die();
         $r->setBody(json_encode($doc));
         $res = $r->send();
         return json_decode($res->getBody());
index 4863961..0b98521 100644 (file)
@@ -11,5 +11,14 @@ class Helper
         }
         return true;
     }
+
+    public static function noSchema($url)
+    {
+        return str_replace(
+            array('http://', 'https://'),
+            '',
+            $url
+        );
+    }
 }
 ?>
index a3d593e..6d9ad20 100644 (file)
@@ -24,4 +24,7 @@
 }
 .hit .date {
     color: #666;
+}
+.hit em.hl {
+    font-weight: bold;
 }
\ No newline at end of file
index 498cf93..12befbc 100644 (file)
@@ -54,9 +54,18 @@ function buildLink($baseLink, $filters, $addFilterType, $addFilterValue)
     return $baseLink;
 }
 
+$site = null;
+if (preg_match('#site:([^ ]*)#', $query, $matches)) {
+    $site = $matches[1];
+    $cleanQuery = trim(str_replace('site:' . $site, '', $query));
+    $site = Helper::noSchema($site);
+} else {
+    $cleanQuery = $query;
+}
+
 $timeBegin = microtime(true);
 $es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']);
-$res = $es->search($query, $filters, $page, $perPage);
+$res = $es->search($cleanQuery, $filters, $site, $page, $perPage);
 $timeEnd = microtime(true);
 
 $pager = new Html_Pager(
@@ -67,8 +76,19 @@ $pager = new Html_Pager(
 foreach ($res->hits->hits as &$hit) {
     $doc = $hit->_source;
     if ($doc->title == '') {
-        $doc->title = '(no title)';
+        $doc->htmlTitle = '(no title)';
+    }
+    if (isset($hit->highlight->title[0])) {
+        $doc->htmlTitle = $hit->highlight->title[0];
+    } else {
+        $doc->htmlTitle = htmlspecialchars($doc->title);
     }
+    if (isset($hit->highlight->text[0])) {
+        $doc->htmlText = $hit->highlight->text[0];
+    } else {
+        $doc->htmlText = null;
+    }
+
     $doc->extra = new \stdClass();
     $doc->extra->cleanUrl = preg_replace('#^.*://#', '', $doc->url);
     if (isset($doc->modate)) {
@@ -81,13 +101,14 @@ foreach ($res->aggregations as $key => &$aggregation) {
         $bucket->url = buildLink($baseLink, $filters, $key, $bucket->key);
     }
 }
-//var_dump($res->aggregations);
 
 render(
     'search',
     array(
         'queryTime' => round($timeEnd - $timeBegin, 2) . 'ms',
         'query' => $query,
+        'cleanQuery' => $cleanQuery,
+        'site' => $site,
         'hitcount' => $res->hits->total,
         'hits' => $res->hits->hits,
         'aggregations' => $res->aggregations,