43ef4f9eda3ca9123b419978770a40f58a54bd4d
[phinde.git] / src / phinde / Elasticsearch.php
1 <?php
2 namespace phinde;
3
4 class Elasticsearch
5 {
6     protected $baseUrl;
7
8     public function __construct($baseUrl)
9     {
10         $this->baseUrl = $baseUrl;
11     }
12
13     /**
14      * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html
15      */
16     public function isKnown($url)
17     {
18         $r = new Elasticsearch_Request(
19             $this->baseUrl . 'document/_search/exists',
20             \HTTP_Request2::METHOD_GET
21         );
22         $r->allow404 = true;
23         $r->setBody(
24             json_encode(
25                 array(
26                     'query' => array(
27                         'filtered' => array(
28                             'filter' => array(
29                                 'term' => array(
30                                     'url' => $url
31                                 )
32                             )
33                         )
34                     )
35                 )
36             )
37         );
38         $status = $r->send()->getStatus();
39         return $status !== 404;
40     }
41
42     public function get($url)
43     {
44         $r = new Elasticsearch_Request(
45             $this->baseUrl . 'document/' . rawurlencode($url),
46             \HTTP_Request2::METHOD_GET
47         );
48         $r->allow404 = true;
49         $res = $r->send();
50         if ($res->getStatus() != 200) {
51             return null;
52         }
53         $d = json_decode($res->getBody());
54         return $d->_source;
55     }
56
57     public function markQueued($url)
58     {
59         $r = new Elasticsearch_Request(
60             $this->baseUrl . 'document/' . rawurlencode($url),
61             \HTTP_Request2::METHOD_PUT
62         );
63         $doc = array(
64             'status' => 'queued',
65             'url' => $url
66         );
67         $r->setBody(json_encode($doc));
68         $r->send();
69     }
70
71     public function search($query, $filters, $site, $page, $perPage, $sort)
72     {
73         if (preg_match('#nick:([^ ]*)#', $query, $matches)) {
74             $authorName = $matches[1];
75             $query = str_replace(
76                 'nick:' . $authorName,
77                 'author.name:' . $authorName,
78                 $query
79             );
80         }
81
82         $qMust = array();//query parts for the MUST section
83
84         //modification date filters
85         if (preg_match('#after:([^ ]+)#', $query, $matches)) {
86             $dateAfter = $matches[1];
87             $query      = trim(str_replace($matches[0], '', $query));
88             $qMust[]    = array(
89                 'range' => array(
90                     'modate' => array(
91                         'gt' => $dateAfter . '||/d',
92                     )
93                 )
94             );
95         }
96         if (preg_match('#before:([^ ]+)#', $query, $matches)) {
97             $dateBefore = $matches[1];
98             $query      = trim(str_replace($matches[0], '', $query));
99             $qMust[]    = array(
100                 'range' => array(
101                     'modate' => array(
102                         'lt' => $dateBefore . '||/d',
103                     )
104                 )
105             );
106         }
107         if (preg_match('#date:([^ ]+)#', $query, $matches)) {
108             $dateExact = $matches[1];
109             $query      = trim(str_replace($matches[0], '', $query));
110             $qMust[]    = array(
111                 'range' => array(
112                     'modate' => array(
113                         'gte' => $dateExact . '||/d',
114                         'lte' => $dateExact . '||/d',
115                     )
116                 )
117             );
118         }
119
120         $qMust[] = array(
121             'query_string' => array(
122                 'default_field' => '_all',
123                 'default_operator' => 'AND',
124                 'query' => $query
125             )
126         );
127         $qMust[] = array(
128             'term' => array(
129                 'status' => 'indexed'
130             )
131         );
132
133         if ($sort == 'date') {
134             $sortCfg = array('modate' => array('order' => 'desc'));
135         } else {
136             $sortCfg = array();
137         }
138
139         $contentMatchSize = 100;
140         if ($GLOBALS['phinde']['showFullContent']) {
141             $contentMatchSize = 999999;
142         }
143
144         $r = new Elasticsearch_Request(
145             $this->baseUrl . 'document/_search',
146             \HTTP_Request2::METHOD_GET
147         );
148         $doc = array(
149             '_source' => array(
150                 'url',
151                 'title',
152                 'author',
153                 'modate',
154             ),
155             'query' => array(
156                 'bool' => array(
157                     'must' => $qMust
158                 )
159             ),
160             'highlight' => array(
161                 'pre_tags' => array('<em class="hl">'),
162                 'order' => 'score',
163                 'encoder' => 'html',
164                 'fields' => array(
165                     'title' => array(
166                         'require_field_match' => false,
167                         'number_of_fragments' => 0,
168                     ),
169                     'url' => array(
170                         'require_field_match' => false,
171                         'number_of_fragments' => 0,
172                     ),
173                     'text' => array(
174                         'require_field_match' => false,
175                         'number_of_fragments' => 1,
176                         'fragment_size' => $contentMatchSize,
177                         'no_match_size' => $contentMatchSize,
178                     ),
179                 )
180             ),
181             'aggregations' => array(
182                 'tags' => array(
183                     'terms' => array(
184                         'field' => 'tags'
185                     )
186                 ),
187                 'language' => array(
188                     'terms' => array(
189                         'field' => 'language'
190                     )
191                 ),
192                 'domain' => array(
193                     'terms' => array(
194                         'field' => 'domain'
195                     )
196                 ),
197                 'type' => array(
198                     'terms' => array(
199                         'field' => 'type'
200                     )
201                 )
202             ),
203             'from' => $page * $perPage,
204             'size' => $perPage,
205             'sort' => $sortCfg,
206         );
207         foreach ($filters as $type => $value) {
208             $doc['query']['bool']['must'][] = array(
209                 'term' => array(
210                     $type => $value
211                 )
212             );
213         }
214         if ($site != '') {
215             $doc['query']['bool']['must'][] = array(
216                 'prefix' => array(
217                     'schemalessUrl' => array(
218                         'value' => $site
219                     )
220                 )
221             );
222         }
223
224         //unset($doc['_source']);
225
226         //ini_set('xdebug.var_display_max_depth', 10);
227         //echo json_encode($doc);die();
228         $r->setBody(json_encode($doc));
229         $res = $r->send();
230         return json_decode($res->getBody());
231     }
232 }
233 ?>