status page
[phinde.git] / src / phinde / Elasticsearch.php
1 <?php
2 namespace phinde;
3
4 class Elasticsearch
5 {
6     protected $baseUrl;
7
8     public function __construct($baseUrl)
9     {
10         $this->baseUrl = $baseUrl;
11     }
12
13     public static function getDocId($url)
14     {
15         return hash('sha256', $url);
16     }
17
18     public function isKnown($url)
19     {
20         $r = new Elasticsearch_Request(
21             $this->baseUrl . 'document/' . static::getDocId($url),
22             \HTTP_Request2::METHOD_HEAD
23         );
24         $r->allow404 = true;
25         $res = $r->send();
26         return $res->getStatus() == 200;
27     }
28
29     public function get($url)
30     {
31         $r = new Elasticsearch_Request(
32             $this->baseUrl . 'document/' . static::getDocId($url),
33             \HTTP_Request2::METHOD_GET
34         );
35         $r->allow404 = true;
36         $res = $r->send();
37         if ($res->getStatus() != 200) {
38             return null;
39         }
40         $d = json_decode($res->getBody());
41         return $d->_source;
42     }
43
44     public function markQueued($url)
45     {
46         $r = new Elasticsearch_Request(
47             $this->baseUrl . 'document/' . static::getDocId($url),
48             \HTTP_Request2::METHOD_PUT
49         );
50         $doc = (object) array(
51             'url' => $url,
52             'status' => (object) array(
53                 'processed' => null,
54                 'findable'  => false,
55             )
56         );
57         $r->setBody(json_encode($doc));
58         $r->send();
59     }
60
61     public function countDocuments()
62     {
63         $r = new Elasticsearch_Request(
64             $this->baseUrl . 'document/_count',
65             \HTTP_Request2::METHOD_GET
66         );
67         $res = $r->send();
68         return json_decode($res->getBody())->count;
69     }
70
71     public function search($query, $filters, $site, $page, $perPage, $sort)
72     {
73         if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) {
74             foreach ($matches[1] as $authorName) {
75                 $query = str_replace(
76                     'nick:' . $authorName,
77                     'author.name:' . $authorName,
78                     $query
79                 );
80             }
81         }
82
83         $qMust = array();//query parts for the MUST section
84
85         //modification date filters
86         if (preg_match('#after:([^ ]+)#', $query, $matches)) {
87             $dateAfter = $matches[1];
88             $query      = trim(str_replace($matches[0], '', $query));
89             $qMust[]    = array(
90                 'range' => array(
91                     'modate' => array(
92                         'gt' => $dateAfter . '||/d',
93                     )
94                 )
95             );
96         }
97         if (preg_match('#before:([^ ]+)#', $query, $matches)) {
98             $dateBefore = $matches[1];
99             $query      = trim(str_replace($matches[0], '', $query));
100             $qMust[]    = array(
101                 'range' => array(
102                     'modate' => array(
103                         'lt' => $dateBefore . '||/d',
104                     )
105                 )
106             );
107         }
108         if (preg_match('#date:([^ ]+)#', $query, $matches)) {
109             $dateExact = $matches[1];
110             $query      = trim(str_replace($matches[0], '', $query));
111             $qMust[]    = array(
112                 'range' => array(
113                     'modate' => array(
114                         'gte' => $dateExact . '||/d',
115                         'lte' => $dateExact . '||/d',
116                     )
117                 )
118             );
119         }
120
121         $qMust[] = array(
122             'query_string' => array(
123                 'default_field' => '_all',
124                 'default_operator' => 'AND',
125                 'query' => $query
126             )
127         );
128         $qMust[] = array(
129             'term' => array(
130                 'status.findable' => true
131             )
132         );
133
134         if ($sort == 'date') {
135             $sortCfg = array('status.modate' => array('order' => 'desc'));
136         } else {
137             $sortCfg = array();
138         }
139
140         $contentMatchSize = 100;
141         if ($GLOBALS['phinde']['showFullContent']) {
142             $contentMatchSize = 999999;
143         }
144
145         $r = new Elasticsearch_Request(
146             $this->baseUrl . 'document/_search',
147             \HTTP_Request2::METHOD_GET
148         );
149         $doc = array(
150             '_source' => array(
151                 'url',
152                 'title',
153                 'author',
154                 'status.modate',
155             ),
156             'query' => array(
157                 'bool' => array(
158                     'must' => $qMust
159                 )
160             ),
161             'highlight' => array(
162                 'pre_tags' => array('<em class="hl">'),
163                 'order' => 'score',
164                 'encoder' => 'html',
165                 'fields' => array(
166                     'title' => array(
167                         'require_field_match' => false,
168                         'number_of_fragments' => 0,
169                     ),
170                     'url' => array(
171                         'require_field_match' => false,
172                         'number_of_fragments' => 0,
173                     ),
174                     'text' => array(
175                         'require_field_match' => false,
176                         'number_of_fragments' => 1,
177                         'fragment_size' => $contentMatchSize,
178                         'no_match_size' => $contentMatchSize,
179                     ),
180                 )
181             ),
182             'aggregations' => array(
183                 'tags' => array(
184                     'terms' => array(
185                         'field' => 'tags'
186                     )
187                 ),
188                 'language' => array(
189                     'terms' => array(
190                         'field' => 'language'
191                     )
192                 ),
193                 'domain' => array(
194                     'terms' => array(
195                         'field' => 'domain'
196                     )
197                 ),
198                 'type' => array(
199                     'terms' => array(
200                         'field' => 'type'
201                     )
202                 )
203             ),
204             'from' => $page * $perPage,
205             'size' => $perPage,
206             'sort' => $sortCfg,
207         );
208         foreach ($filters as $type => $value) {
209             $doc['query']['bool']['must'][] = array(
210                 'term' => array(
211                     $type => $value
212                 )
213             );
214         }
215         if ($site != '') {
216             $doc['query']['bool']['must'][] = array(
217                 'prefix' => array(
218                     'schemalessUrl' => array(
219                         'value' => $site
220                     )
221                 )
222             );
223         }
224
225         //unset($doc['_source']);
226
227         //ini_set('xdebug.var_display_max_depth', 10);
228         //echo json_encode($doc);die();
229         $r->setBody(json_encode($doc));
230         $res = $r->send();
231         return json_decode($res->getBody());
232     }
233 }
234 ?>