From: Christian Weiske
Date: Wed, 3 Feb 2016 20:12:17 +0000 (+0100)
Subject: add site search, highlighting
X-Git-Tag: v0.1.0~25
X-Git-Url: https://git.cweiske.de/phinde.git/commitdiff_plain/88c741c09b664260f826ff947bfaab071ac70d05
add site search, highlighting
---
diff --git a/README.rst b/README.rst
index 121893c..ba8a681 100644
--- a/README.rst
+++ b/README.rst
@@ -1,3 +1,9 @@
+Features
+========
+- Site search
+
+ - Query: ``foo bar site:www.example.org/dir/``
+
Dependencies
============
- PHP 5.5+
diff --git a/bin/index.php b/bin/index.php
index eb82df2..8ac08ce 100755
--- a/bin/index.php
+++ b/bin/index.php
@@ -61,6 +61,7 @@ $doc = new \DOMDocument();
$sx = simplexml_import_dom($doc);
$indexDoc->url = $url;
+$indexDoc->schemalessUrl = Helper::noSchema($url);
$indexDoc->type = 'html';
$indexDoc->subtype = '';
$indexDoc->mimetype = $mimetype;
diff --git a/data/templates/search.htm b/data/templates/search.htm
index 8eb077b..4f3a252 100644
--- a/data/templates/search.htm
+++ b/data/templates/search.htm
@@ -12,7 +12,8 @@
diff --git a/data/templates/search/hit.htm b/data/templates/search/hit.htm
index 4981017..dababcb 100644
--- a/data/templates/search/hit.htm
+++ b/data/templates/search/hit.htm
@@ -1,7 +1,7 @@
{% set doc = hit._source %}
- {{doc.title}}
+ {{doc.htmlTitle|raw}}
{% if doc.author.name %}
by
@@ -19,5 +19,6 @@
{% if doc.modate %}
{{doc.extra.day|date("Y-m-d")}}
{% endif %}
+ {{doc.htmlText|raw}}
diff --git a/data/templates/search/list.htm b/data/templates/search/list.htm
index ef8d454..97584d6 100644
--- a/data/templates/search/list.htm
+++ b/data/templates/search/list.htm
@@ -4,7 +4,13 @@
{% else %}
- Found {{hitcount}} search results for "{{query}}" in {{queryTime}}:
+ Found {{hitcount}} search
+ {% if hitcount == 1 %}result{% else %}results{%endif%}
+ for "{{cleanQuery}}"
+ {% if site %}
+ on {{site}}
+ {% endif %}
+ in {{queryTime}}:
{% for hit in hits %}
diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php
index 4bc4637..735b64f 100644
--- a/src/phinde/Elasticsearch.php
+++ b/src/phinde/Elasticsearch.php
@@ -68,7 +68,7 @@ class Elasticsearch
$r->send();
}
- public function search($query, $filters, $page, $perPage)
+ public function search($query, $filters, $site, $page, $perPage)
{
$r = new Elasticsearch_Request(
$this->baseUrl . 'document/_search',
@@ -98,6 +98,24 @@ class Elasticsearch
)
)
),
+ 'highlight' => array(
+ 'pre_tags' => array(''),
+ 'order' => 'score',
+ 'fields' => array(
+ 'title' => array(
+ 'require_field_match' => false,
+ 'number_of_fragments' => 0,
+ ),
+ 'url' => array(
+ 'require_field_match' => false,
+ 'number_of_fragments' => 0,
+ ),
+ 'text' => array(
+ 'require_field_match' => false,
+ 'number_of_fragments' => 1,
+ ),
+ )
+ ),
'aggregations' => array(
'tags' => array(
'terms' => array(
@@ -133,11 +151,20 @@ class Elasticsearch
)
);
}
+ if ($site != '') {
+ $doc['query']['bool']['must'][] = array(
+ 'prefix' => array(
+ 'schemalessUrl' => array(
+ 'value' => $site
+ )
+ )
+ );
+ }
//unset($doc['_source']);
//ini_set('xdebug.var_display_max_depth', 10);
- //return json_decode(json_encode($doc));
+ //echo json_encode($doc);die();
$r->setBody(json_encode($doc));
$res = $r->send();
return json_decode($res->getBody());
diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php
index 4863961..0b98521 100644
--- a/src/phinde/Helper.php
+++ b/src/phinde/Helper.php
@@ -11,5 +11,14 @@ class Helper
}
return true;
}
+
+ public static function noSchema($url)
+ {
+ return str_replace(
+ array('http://', 'https://'),
+ '',
+ $url
+ );
+ }
}
?>
diff --git a/www/css/phinde.css b/www/css/phinde.css
index a3d593e..6d9ad20 100644
--- a/www/css/phinde.css
+++ b/www/css/phinde.css
@@ -24,4 +24,7 @@
}
.hit .date {
color: #666;
+}
+.hit em.hl {
+ font-weight: bold;
}
\ No newline at end of file
diff --git a/www/index.php b/www/index.php
index 498cf93..12befbc 100644
--- a/www/index.php
+++ b/www/index.php
@@ -54,9 +54,18 @@ function buildLink($baseLink, $filters, $addFilterType, $addFilterValue)
return $baseLink;
}
+$site = null;
+if (preg_match('#site:([^ ]*)#', $query, $matches)) {
+ $site = $matches[1];
+ $cleanQuery = trim(str_replace('site:' . $site, '', $query));
+ $site = Helper::noSchema($site);
+} else {
+ $cleanQuery = $query;
+}
+
$timeBegin = microtime(true);
$es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']);
-$res = $es->search($query, $filters, $page, $perPage);
+$res = $es->search($cleanQuery, $filters, $site, $page, $perPage);
$timeEnd = microtime(true);
$pager = new Html_Pager(
@@ -67,8 +76,19 @@ $pager = new Html_Pager(
foreach ($res->hits->hits as &$hit) {
$doc = $hit->_source;
if ($doc->title == '') {
- $doc->title = '(no title)';
+ $doc->htmlTitle = '(no title)';
+ }
+ if (isset($hit->highlight->title[0])) {
+ $doc->htmlTitle = $hit->highlight->title[0];
+ } else {
+ $doc->htmlTitle = htmlspecialchars($doc->title);
}
+ if (isset($hit->highlight->text[0])) {
+ $doc->htmlText = $hit->highlight->text[0];
+ } else {
+ $doc->htmlText = null;
+ }
+
$doc->extra = new \stdClass();
$doc->extra->cleanUrl = preg_replace('#^.*://#', '', $doc->url);
if (isset($doc->modate)) {
@@ -81,13 +101,14 @@ foreach ($res->aggregations as $key => &$aggregation) {
$bucket->url = buildLink($baseLink, $filters, $key, $bucket->key);
}
}
-//var_dump($res->aggregations);
render(
'search',
array(
'queryTime' => round($timeEnd - $timeBegin, 2) . 'ms',
'query' => $query,
+ 'cleanQuery' => $cleanQuery,
+ 'site' => $site,
'hitcount' => $res->hits->total,
'hits' => $res->hits->hits,
'aggregations' => $res->aggregations,