Support multiple "nick:" terms in search field
[phinde.git] / src / phinde / Elasticsearch.php
1 <?php
2 namespace phinde;
3
4 class Elasticsearch
5 {
6     protected $baseUrl;
7
8     public function __construct($baseUrl)
9     {
10         $this->baseUrl = $baseUrl;
11     }
12
13     public function isKnown($url)
14     {
15         $r = new Elasticsearch_Request(
16             $this->baseUrl . 'document/' . rawurlencode($url),
17             \HTTP_Request2::METHOD_HEAD
18         );
19         $r->allow404 = true;
20         $res = $r->send();
21         return $res->getStatus() == 200;
22     }
23
24     public function get($url)
25     {
26         $r = new Elasticsearch_Request(
27             $this->baseUrl . 'document/' . rawurlencode($url),
28             \HTTP_Request2::METHOD_GET
29         );
30         $r->allow404 = true;
31         $res = $r->send();
32         if ($res->getStatus() != 200) {
33             return null;
34         }
35         $d = json_decode($res->getBody());
36         return $d->_source;
37     }
38
39     public function markQueued($url)
40     {
41         $r = new Elasticsearch_Request(
42             $this->baseUrl . 'document/' . rawurlencode($url),
43             \HTTP_Request2::METHOD_PUT
44         );
45         $doc = array(
46             'status' => 'queued',
47             'url' => $url
48         );
49         $r->setBody(json_encode($doc));
50         $r->send();
51     }
52
53     public function search($query, $filters, $site, $page, $perPage, $sort)
54     {
55         if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) {
56             foreach ($matches[1] as $authorName) {
57                 $query = str_replace(
58                     'nick:' . $authorName,
59                     'author.name:' . $authorName,
60                     $query
61                 );
62             }
63         }
64
65         $qMust = array();//query parts for the MUST section
66
67         //modification date filters
68         if (preg_match('#after:([^ ]+)#', $query, $matches)) {
69             $dateAfter = $matches[1];
70             $query      = trim(str_replace($matches[0], '', $query));
71             $qMust[]    = array(
72                 'range' => array(
73                     'modate' => array(
74                         'gt' => $dateAfter . '||/d',
75                     )
76                 )
77             );
78         }
79         if (preg_match('#before:([^ ]+)#', $query, $matches)) {
80             $dateBefore = $matches[1];
81             $query      = trim(str_replace($matches[0], '', $query));
82             $qMust[]    = array(
83                 'range' => array(
84                     'modate' => array(
85                         'lt' => $dateBefore . '||/d',
86                     )
87                 )
88             );
89         }
90         if (preg_match('#date:([^ ]+)#', $query, $matches)) {
91             $dateExact = $matches[1];
92             $query      = trim(str_replace($matches[0], '', $query));
93             $qMust[]    = array(
94                 'range' => array(
95                     'modate' => array(
96                         'gte' => $dateExact . '||/d',
97                         'lte' => $dateExact . '||/d',
98                     )
99                 )
100             );
101         }
102
103         $qMust[] = array(
104             'query_string' => array(
105                 'default_field' => '_all',
106                 'default_operator' => 'AND',
107                 'query' => $query
108             )
109         );
110         $qMust[] = array(
111             'term' => array(
112                 'status' => 'indexed'
113             )
114         );
115
116         if ($sort == 'date') {
117             $sortCfg = array('modate' => array('order' => 'desc'));
118         } else {
119             $sortCfg = array();
120         }
121
122         $contentMatchSize = 100;
123         if ($GLOBALS['phinde']['showFullContent']) {
124             $contentMatchSize = 999999;
125         }
126
127         $r = new Elasticsearch_Request(
128             $this->baseUrl . 'document/_search',
129             \HTTP_Request2::METHOD_GET
130         );
131         $doc = array(
132             '_source' => array(
133                 'url',
134                 'title',
135                 'author',
136                 'modate',
137             ),
138             'query' => array(
139                 'bool' => array(
140                     'must' => $qMust
141                 )
142             ),
143             'highlight' => array(
144                 'pre_tags' => array('<em class="hl">'),
145                 'order' => 'score',
146                 'encoder' => 'html',
147                 'fields' => array(
148                     'title' => array(
149                         'require_field_match' => false,
150                         'number_of_fragments' => 0,
151                     ),
152                     'url' => array(
153                         'require_field_match' => false,
154                         'number_of_fragments' => 0,
155                     ),
156                     'text' => array(
157                         'require_field_match' => false,
158                         'number_of_fragments' => 1,
159                         'fragment_size' => $contentMatchSize,
160                         'no_match_size' => $contentMatchSize,
161                     ),
162                 )
163             ),
164             'aggregations' => array(
165                 'tags' => array(
166                     'terms' => array(
167                         'field' => 'tags'
168                     )
169                 ),
170                 'language' => array(
171                     'terms' => array(
172                         'field' => 'language'
173                     )
174                 ),
175                 'domain' => array(
176                     'terms' => array(
177                         'field' => 'domain'
178                     )
179                 ),
180                 'type' => array(
181                     'terms' => array(
182                         'field' => 'type'
183                     )
184                 )
185             ),
186             'from' => $page * $perPage,
187             'size' => $perPage,
188             'sort' => $sortCfg,
189         );
190         foreach ($filters as $type => $value) {
191             $doc['query']['bool']['must'][] = array(
192                 'term' => array(
193                     $type => $value
194                 )
195             );
196         }
197         if ($site != '') {
198             $doc['query']['bool']['must'][] = array(
199                 'prefix' => array(
200                     'schemalessUrl' => array(
201                         'value' => $site
202                     )
203                 )
204             );
205         }
206
207         //unset($doc['_source']);
208
209         //ini_set('xdebug.var_display_max_depth', 10);
210         //echo json_encode($doc);die();
211         $r->setBody(json_encode($doc));
212         $res = $r->send();
213         return json_decode($res->getBody());
214     }
215 }
216 ?>