aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-02-03 21:12:17 +0100
committerChristian Weiske <cweiske@cweiske.de>2016-02-03 21:12:17 +0100
commit88c741c09b664260f826ff947bfaab071ac70d05 (patch)
treeb2f7417aee5979d4bd994b929f71d518a3604aa0
parentfab8cabe19ac99e3c8a95abcea3c0569b67045bb (diff)
downloadphinde-88c741c09b664260f826ff947bfaab071ac70d05.tar.gz
phinde-88c741c09b664260f826ff947bfaab071ac70d05.zip
add site search, highlighting
-rw-r--r--README.rst6
-rwxr-xr-xbin/index.php1
-rw-r--r--data/templates/search.htm3
-rw-r--r--data/templates/search/hit.htm3
-rw-r--r--data/templates/search/list.htm8
-rw-r--r--src/phinde/Elasticsearch.php31
-rw-r--r--src/phinde/Helper.php9
-rw-r--r--www/css/phinde.css3
-rw-r--r--www/index.php27
9 files changed, 83 insertions, 8 deletions
diff --git a/README.rst b/README.rst
index 121893c..ba8a681 100644
--- a/README.rst
+++ b/README.rst
@@ -1,3 +1,9 @@
+Features
+========
+- Site search
+
+ - Query: ``foo bar site:www.example.org/dir/``
+
Dependencies
============
- PHP 5.5+
diff --git a/bin/index.php b/bin/index.php
index eb82df2..8ac08ce 100755
--- a/bin/index.php
+++ b/bin/index.php
@@ -61,6 +61,7 @@ $doc = new \DOMDocument();
$sx = simplexml_import_dom($doc);
$indexDoc->url = $url;
+$indexDoc->schemalessUrl = Helper::noSchema($url);
$indexDoc->type = 'html';
$indexDoc->subtype = '';
$indexDoc->mimetype = $mimetype;
diff --git a/data/templates/search.htm b/data/templates/search.htm
index 8eb077b..4f3a252 100644
--- a/data/templates/search.htm
+++ b/data/templates/search.htm
@@ -12,7 +12,8 @@
</li>
</ul>
<form class="navbar-form pull-left">
- <input type="text" name="q" placeholder="Search" value="{{query}}"/>
+ <input type="text" name="q" placeholder="Search"
+ value="{{query}}" class="input-xxlarge"/>
<button type="submit" class="btn">Find</button>
</form>
</div>
diff --git a/data/templates/search/hit.htm b/data/templates/search/hit.htm
index 4981017..dababcb 100644
--- a/data/templates/search/hit.htm
+++ b/data/templates/search/hit.htm
@@ -1,7 +1,7 @@
{% set doc = hit._source %}
<li class="hit">
<span class="title">
- <a href="{{doc.url}}">{{doc.title}}</a>
+ <a href="{{doc.url}}">{{doc.htmlTitle|raw}}</a>
{% if doc.author.name %}
by
<em>
@@ -19,5 +19,6 @@
{% if doc.modate %}
<span class="date">{{doc.extra.day|date("Y-m-d")}}</span>
{% endif %}
+ {{doc.htmlText|raw}}
</span>
</li>
diff --git a/data/templates/search/list.htm b/data/templates/search/list.htm
index ef8d454..97584d6 100644
--- a/data/templates/search/list.htm
+++ b/data/templates/search/list.htm
@@ -4,7 +4,13 @@
</p>
{% else %}
<p>
- Found {{hitcount}} search results for "<tt>{{query}}</tt>" in {{queryTime}}:
+ Found {{hitcount}} search
+ {% if hitcount == 1 %}result{% else %}results{%endif%}
+ for "<tt>{{cleanQuery}}</tt>"
+ {% if site %}
+ on <tt>{{site}}</tt>
+ {% endif %}
+ in {{queryTime}}:
</p>
<ul class="hits">
{% for hit in hits %}
diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php
index 4bc4637..735b64f 100644
--- a/src/phinde/Elasticsearch.php
+++ b/src/phinde/Elasticsearch.php
@@ -68,7 +68,7 @@ class Elasticsearch
$r->send();
}
- public function search($query, $filters, $page, $perPage)
+ public function search($query, $filters, $site, $page, $perPage)
{
$r = new Elasticsearch_Request(
$this->baseUrl . 'document/_search',
@@ -98,6 +98,24 @@ class Elasticsearch
)
)
),
+ 'highlight' => array(
+ 'pre_tags' => array('<em class="hl">'),
+ 'order' => 'score',
+ 'fields' => array(
+ 'title' => array(
+ 'require_field_match' => false,
+ 'number_of_fragments' => 0,
+ ),
+ 'url' => array(
+ 'require_field_match' => false,
+ 'number_of_fragments' => 0,
+ ),
+ 'text' => array(
+ 'require_field_match' => false,
+ 'number_of_fragments' => 1,
+ ),
+ )
+ ),
'aggregations' => array(
'tags' => array(
'terms' => array(
@@ -133,11 +151,20 @@ class Elasticsearch
)
);
}
+ if ($site != '') {
+ $doc['query']['bool']['must'][] = array(
+ 'prefix' => array(
+ 'schemalessUrl' => array(
+ 'value' => $site
+ )
+ )
+ );
+ }
//unset($doc['_source']);
//ini_set('xdebug.var_display_max_depth', 10);
- //return json_decode(json_encode($doc));
+ //echo json_encode($doc);die();
$r->setBody(json_encode($doc));
$res = $r->send();
return json_decode($res->getBody());
diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php
index 4863961..0b98521 100644
--- a/src/phinde/Helper.php
+++ b/src/phinde/Helper.php
@@ -11,5 +11,14 @@ class Helper
}
return true;
}
+
+ public static function noSchema($url)
+ {
+ return str_replace(
+ array('http://', 'https://'),
+ '',
+ $url
+ );
+ }
}
?>
diff --git a/www/css/phinde.css b/www/css/phinde.css
index a3d593e..6d9ad20 100644
--- a/www/css/phinde.css
+++ b/www/css/phinde.css
@@ -24,4 +24,7 @@
}
.hit .date {
color: #666;
+}
+.hit em.hl {
+ font-weight: bold;
} \ No newline at end of file
diff --git a/www/index.php b/www/index.php
index 498cf93..12befbc 100644
--- a/www/index.php
+++ b/www/index.php
@@ -54,9 +54,18 @@ function buildLink($baseLink, $filters, $addFilterType, $addFilterValue)
return $baseLink;
}
+$site = null;
+if (preg_match('#site:([^ ]*)#', $query, $matches)) {
+ $site = $matches[1];
+ $cleanQuery = trim(str_replace('site:' . $site, '', $query));
+ $site = Helper::noSchema($site);
+} else {
+ $cleanQuery = $query;
+}
+
$timeBegin = microtime(true);
$es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']);
-$res = $es->search($query, $filters, $page, $perPage);
+$res = $es->search($cleanQuery, $filters, $site, $page, $perPage);
$timeEnd = microtime(true);
$pager = new Html_Pager(
@@ -67,8 +76,19 @@ $pager = new Html_Pager(
foreach ($res->hits->hits as &$hit) {
$doc = $hit->_source;
if ($doc->title == '') {
- $doc->title = '(no title)';
+ $doc->htmlTitle = '(no title)';
+ }
+ if (isset($hit->highlight->title[0])) {
+ $doc->htmlTitle = $hit->highlight->title[0];
+ } else {
+ $doc->htmlTitle = htmlspecialchars($doc->title);
}
+ if (isset($hit->highlight->text[0])) {
+ $doc->htmlText = $hit->highlight->text[0];
+ } else {
+ $doc->htmlText = null;
+ }
+
$doc->extra = new \stdClass();
$doc->extra->cleanUrl = preg_replace('#^.*://#', '', $doc->url);
if (isset($doc->modate)) {
@@ -81,13 +101,14 @@ foreach ($res->aggregations as $key => &$aggregation) {
$bucket->url = buildLink($baseLink, $filters, $key, $bucket->key);
}
}
-//var_dump($res->aggregations);
render(
'search',
array(
'queryTime' => round($timeEnd - $timeBegin, 2) . 'ms',
'query' => $query,
+ 'cleanQuery' => $cleanQuery,
+ 'site' => $site,
'hitcount' => $res->hits->total,
'hits' => $res->hits->hits,
'aggregations' => $res->aggregations,