diff options
| author | Christian Weiske <cweiske@cweiske.de> | 2016-02-03 21:12:17 +0100 |
|---|---|---|
| committer | Christian Weiske <cweiske@cweiske.de> | 2016-02-03 21:12:17 +0100 |
| commit | 88c741c09b664260f826ff947bfaab071ac70d05 (patch) | |
| tree | b2f7417aee5979d4bd994b929f71d518a3604aa0 | |
| parent | fab8cabe19ac99e3c8a95abcea3c0569b67045bb (diff) | |
| download | phinde-88c741c09b664260f826ff947bfaab071ac70d05.tar.gz phinde-88c741c09b664260f826ff947bfaab071ac70d05.zip | |
add site search, highlighting
| -rw-r--r-- | README.rst | 6 | ||||
| -rwxr-xr-x | bin/index.php | 1 | ||||
| -rw-r--r-- | data/templates/search.htm | 3 | ||||
| -rw-r--r-- | data/templates/search/hit.htm | 3 | ||||
| -rw-r--r-- | data/templates/search/list.htm | 8 | ||||
| -rw-r--r-- | src/phinde/Elasticsearch.php | 31 | ||||
| -rw-r--r-- | src/phinde/Helper.php | 9 | ||||
| -rw-r--r-- | www/css/phinde.css | 3 | ||||
| -rw-r--r-- | www/index.php | 27 |
9 files changed, 83 insertions, 8 deletions
@@ -1,3 +1,9 @@ +Features +======== +- Site search + + - Query: ``foo bar site:www.example.org/dir/`` + Dependencies ============ - PHP 5.5+ diff --git a/bin/index.php b/bin/index.php index eb82df2..8ac08ce 100755 --- a/bin/index.php +++ b/bin/index.php @@ -61,6 +61,7 @@ $doc = new \DOMDocument(); $sx = simplexml_import_dom($doc); $indexDoc->url = $url; +$indexDoc->schemalessUrl = Helper::noSchema($url); $indexDoc->type = 'html'; $indexDoc->subtype = ''; $indexDoc->mimetype = $mimetype; diff --git a/data/templates/search.htm b/data/templates/search.htm index 8eb077b..4f3a252 100644 --- a/data/templates/search.htm +++ b/data/templates/search.htm @@ -12,7 +12,8 @@ </li> </ul> <form class="navbar-form pull-left"> - <input type="text" name="q" placeholder="Search" value="{{query}}"/> + <input type="text" name="q" placeholder="Search" + value="{{query}}" class="input-xxlarge"/> <button type="submit" class="btn">Find</button> </form> </div> diff --git a/data/templates/search/hit.htm b/data/templates/search/hit.htm index 4981017..dababcb 100644 --- a/data/templates/search/hit.htm +++ b/data/templates/search/hit.htm @@ -1,7 +1,7 @@ {% set doc = hit._source %} <li class="hit"> <span class="title"> - <a href="{{doc.url}}">{{doc.title}}</a> + <a href="{{doc.url}}">{{doc.htmlTitle|raw}}</a> {% if doc.author.name %} by <em> @@ -19,5 +19,6 @@ {% if doc.modate %} <span class="date">{{doc.extra.day|date("Y-m-d")}}</span> {% endif %} + {{doc.htmlText|raw}} </span> </li> diff --git a/data/templates/search/list.htm b/data/templates/search/list.htm index ef8d454..97584d6 100644 --- a/data/templates/search/list.htm +++ b/data/templates/search/list.htm @@ -4,7 +4,13 @@ </p> {% else %} <p> - Found {{hitcount}} search results for "<tt>{{query}}</tt>" in {{queryTime}}: + Found {{hitcount}} search + {% if hitcount == 1 %}result{% else %}results{%endif%} + for "<tt>{{cleanQuery}}</tt>" + {% if site %} + on <tt>{{site}}</tt> + {% endif %} + in {{queryTime}}: </p> <ul class="hits"> {% for hit in hits %} diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php index 4bc4637..735b64f 100644 --- a/src/phinde/Elasticsearch.php +++ b/src/phinde/Elasticsearch.php @@ -68,7 +68,7 @@ class Elasticsearch $r->send(); } - public function search($query, $filters, $page, $perPage) + public function search($query, $filters, $site, $page, $perPage) { $r = new Elasticsearch_Request( $this->baseUrl . 'document/_search', @@ -98,6 +98,24 @@ class Elasticsearch ) ) ), + 'highlight' => array( + 'pre_tags' => array('<em class="hl">'), + 'order' => 'score', + 'fields' => array( + 'title' => array( + 'require_field_match' => false, + 'number_of_fragments' => 0, + ), + 'url' => array( + 'require_field_match' => false, + 'number_of_fragments' => 0, + ), + 'text' => array( + 'require_field_match' => false, + 'number_of_fragments' => 1, + ), + ) + ), 'aggregations' => array( 'tags' => array( 'terms' => array( @@ -133,11 +151,20 @@ class Elasticsearch ) ); } + if ($site != '') { + $doc['query']['bool']['must'][] = array( + 'prefix' => array( + 'schemalessUrl' => array( + 'value' => $site + ) + ) + ); + } //unset($doc['_source']); //ini_set('xdebug.var_display_max_depth', 10); - //return json_decode(json_encode($doc)); + //echo json_encode($doc);die(); $r->setBody(json_encode($doc)); $res = $r->send(); return json_decode($res->getBody()); diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php index 4863961..0b98521 100644 --- a/src/phinde/Helper.php +++ b/src/phinde/Helper.php @@ -11,5 +11,14 @@ class Helper } return true; } + + public static function noSchema($url) + { + return str_replace( + array('http://', 'https://'), + '', + $url + ); + } } ?> diff --git a/www/css/phinde.css b/www/css/phinde.css index a3d593e..6d9ad20 100644 --- a/www/css/phinde.css +++ b/www/css/phinde.css @@ -24,4 +24,7 @@ } .hit .date { color: #666; +} +.hit em.hl { + font-weight: bold; }
\ No newline at end of file diff --git a/www/index.php b/www/index.php index 498cf93..12befbc 100644 --- a/www/index.php +++ b/www/index.php @@ -54,9 +54,18 @@ function buildLink($baseLink, $filters, $addFilterType, $addFilterValue) return $baseLink; } +$site = null; +if (preg_match('#site:([^ ]*)#', $query, $matches)) { + $site = $matches[1]; + $cleanQuery = trim(str_replace('site:' . $site, '', $query)); + $site = Helper::noSchema($site); +} else { + $cleanQuery = $query; +} + $timeBegin = microtime(true); $es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']); -$res = $es->search($query, $filters, $page, $perPage); +$res = $es->search($cleanQuery, $filters, $site, $page, $perPage); $timeEnd = microtime(true); $pager = new Html_Pager( @@ -67,8 +76,19 @@ $pager = new Html_Pager( foreach ($res->hits->hits as &$hit) { $doc = $hit->_source; if ($doc->title == '') { - $doc->title = '(no title)'; + $doc->htmlTitle = '(no title)'; + } + if (isset($hit->highlight->title[0])) { + $doc->htmlTitle = $hit->highlight->title[0]; + } else { + $doc->htmlTitle = htmlspecialchars($doc->title); } + if (isset($hit->highlight->text[0])) { + $doc->htmlText = $hit->highlight->text[0]; + } else { + $doc->htmlText = null; + } + $doc->extra = new \stdClass(); $doc->extra->cleanUrl = preg_replace('#^.*://#', '', $doc->url); if (isset($doc->modate)) { @@ -81,13 +101,14 @@ foreach ($res->aggregations as $key => &$aggregation) { $bucket->url = buildLink($baseLink, $filters, $key, $bucket->key); } } -//var_dump($res->aggregations); render( 'search', array( 'queryTime' => round($timeEnd - $timeBegin, 2) . 'ms', 'query' => $query, + 'cleanQuery' => $cleanQuery, + 'site' => $site, 'hitcount' => $res->hits->total, 'hits' => $res->hits->hits, 'aggregations' => $res->aggregations, |
