1732bbb824cbaf8b072379a844f808af06926b72
[phinde.git] / src / phinde / Elasticsearch.php
1 <?php
2 namespace phinde;
3
4 class Elasticsearch
5 {
6     protected $baseUrl;
7
8     public function __construct($baseUrl)
9     {
10         $this->baseUrl = $baseUrl;
11     }
12
13     public function isKnown($url)
14     {
15         $r = new Elasticsearch_Request(
16             $this->baseUrl . 'document/' . rawurlencode($url),
17             \HTTP_Request2::METHOD_HEAD
18         );
19         $r->allow404 = true;
20         $res = $r->send();
21         return $res->getStatus() == 200;
22     }
23
24     public function get($url)
25     {
26         $r = new Elasticsearch_Request(
27             $this->baseUrl . 'document/' . rawurlencode($url),
28             \HTTP_Request2::METHOD_GET
29         );
30         $r->allow404 = true;
31         $res = $r->send();
32         if ($res->getStatus() != 200) {
33             return null;
34         }
35         $d = json_decode($res->getBody());
36         return $d->_source;
37     }
38
39     public function markQueued($url)
40     {
41         $r = new Elasticsearch_Request(
42             $this->baseUrl . 'document/' . rawurlencode($url),
43             \HTTP_Request2::METHOD_PUT
44         );
45         $doc = array(
46             'status' => 'queued',
47             'url' => $url
48         );
49         $r->setBody(json_encode($doc));
50         $r->send();
51     }
52
53     public function search($query, $filters, $site, $page, $perPage, $sort)
54     {
55         if (preg_match('#nick:([^ ]*)#', $query, $matches)) {
56             $authorName = $matches[1];
57             $query = str_replace(
58                 'nick:' . $authorName,
59                 'author.name:' . $authorName,
60                 $query
61             );
62         }
63
64         $qMust = array();//query parts for the MUST section
65
66         //modification date filters
67         if (preg_match('#after:([^ ]+)#', $query, $matches)) {
68             $dateAfter = $matches[1];
69             $query      = trim(str_replace($matches[0], '', $query));
70             $qMust[]    = array(
71                 'range' => array(
72                     'modate' => array(
73                         'gt' => $dateAfter . '||/d',
74                     )
75                 )
76             );
77         }
78         if (preg_match('#before:([^ ]+)#', $query, $matches)) {
79             $dateBefore = $matches[1];
80             $query      = trim(str_replace($matches[0], '', $query));
81             $qMust[]    = array(
82                 'range' => array(
83                     'modate' => array(
84                         'lt' => $dateBefore . '||/d',
85                     )
86                 )
87             );
88         }
89         if (preg_match('#date:([^ ]+)#', $query, $matches)) {
90             $dateExact = $matches[1];
91             $query      = trim(str_replace($matches[0], '', $query));
92             $qMust[]    = array(
93                 'range' => array(
94                     'modate' => array(
95                         'gte' => $dateExact . '||/d',
96                         'lte' => $dateExact . '||/d',
97                     )
98                 )
99             );
100         }
101
102         $qMust[] = array(
103             'query_string' => array(
104                 'default_field' => '_all',
105                 'default_operator' => 'AND',
106                 'query' => $query
107             )
108         );
109         $qMust[] = array(
110             'term' => array(
111                 'status' => 'indexed'
112             )
113         );
114
115         if ($sort == 'date') {
116             $sortCfg = array('modate' => array('order' => 'desc'));
117         } else {
118             $sortCfg = array();
119         }
120
121         $contentMatchSize = 100;
122         if ($GLOBALS['phinde']['showFullContent']) {
123             $contentMatchSize = 999999;
124         }
125
126         $r = new Elasticsearch_Request(
127             $this->baseUrl . 'document/_search',
128             \HTTP_Request2::METHOD_GET
129         );
130         $doc = array(
131             '_source' => array(
132                 'url',
133                 'title',
134                 'author',
135                 'modate',
136             ),
137             'query' => array(
138                 'bool' => array(
139                     'must' => $qMust
140                 )
141             ),
142             'highlight' => array(
143                 'pre_tags' => array('<em class="hl">'),
144                 'order' => 'score',
145                 'encoder' => 'html',
146                 'fields' => array(
147                     'title' => array(
148                         'require_field_match' => false,
149                         'number_of_fragments' => 0,
150                     ),
151                     'url' => array(
152                         'require_field_match' => false,
153                         'number_of_fragments' => 0,
154                     ),
155                     'text' => array(
156                         'require_field_match' => false,
157                         'number_of_fragments' => 1,
158                         'fragment_size' => $contentMatchSize,
159                         'no_match_size' => $contentMatchSize,
160                     ),
161                 )
162             ),
163             'aggregations' => array(
164                 'tags' => array(
165                     'terms' => array(
166                         'field' => 'tags'
167                     )
168                 ),
169                 'language' => array(
170                     'terms' => array(
171                         'field' => 'language'
172                     )
173                 ),
174                 'domain' => array(
175                     'terms' => array(
176                         'field' => 'domain'
177                     )
178                 ),
179                 'type' => array(
180                     'terms' => array(
181                         'field' => 'type'
182                     )
183                 )
184             ),
185             'from' => $page * $perPage,
186             'size' => $perPage,
187             'sort' => $sortCfg,
188         );
189         foreach ($filters as $type => $value) {
190             $doc['query']['bool']['must'][] = array(
191                 'term' => array(
192                     $type => $value
193                 )
194             );
195         }
196         if ($site != '') {
197             $doc['query']['bool']['must'][] = array(
198                 'prefix' => array(
199                     'schemalessUrl' => array(
200                         'value' => $site
201                     )
202                 )
203             );
204         }
205
206         //unset($doc['_source']);
207
208         //ini_set('xdebug.var_display_max_depth', 10);
209         //echo json_encode($doc);die();
210         $r->setBody(json_encode($doc));
211         $res = $r->send();
212         return json_decode($res->getBody());
213     }
214 }
215 ?>