Respect <meta name="robots" content="noindex"/>
[phinde.git] / src / phinde / Elasticsearch.php
1 <?php
2 namespace phinde;
3
4 class Elasticsearch
5 {
6     protected $baseUrl;
7
8     public function __construct($baseUrl)
9     {
10         $this->baseUrl = $baseUrl;
11     }
12
13     /**
14      * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html
15      */
16     public function isKnown($url)
17     {
18         $r = new Elasticsearch_Request(
19             $this->baseUrl . 'document/_search/exists',
20             \HTTP_Request2::METHOD_GET
21         );
22         $r->allow404 = true;
23         $r->setBody(
24             json_encode(
25                 array(
26                     'query' => array(
27                         'filtered' => array(
28                             'filter' => array(
29                                 'term' => array(
30                                     'url' => $url
31                                 )
32                             )
33                         )
34                     )
35                 )
36             )
37         );
38         $res = json_decode($r->send()->getBody());
39         return $res->exists;
40     }
41
42     public function get($url)
43     {
44         $r = new Elasticsearch_Request(
45             $this->baseUrl . 'document/' . rawurlencode($url),
46             \HTTP_Request2::METHOD_GET
47         );
48         $r->allow404 = true;
49         $res = $r->send();
50         if ($res->getStatus() != 200) {
51             return null;
52         }
53         $d = json_decode($res->getBody());
54         return $d->_source;
55     }
56
57     public function markQueued($url)
58     {
59         $r = new Elasticsearch_Request(
60             $this->baseUrl . 'document/' . rawurlencode($url),
61             \HTTP_Request2::METHOD_PUT
62         );
63         $doc = array(
64             'status' => 'queued',
65             'url' => $url
66         );
67         $r->setBody(json_encode($doc));
68         $r->send();
69     }
70
71     public function search($query, $filters, $site, $page, $perPage, $sort)
72     {
73         if ($sort == 'date') {
74             $sortCfg = array('modate' => array('order' => 'desc'));
75         } else {
76             $sortCfg = array();
77         }
78
79         $r = new Elasticsearch_Request(
80             $this->baseUrl . 'document/_search',
81             \HTTP_Request2::METHOD_GET
82         );
83         $doc = array(
84             '_source' => array(
85                 'url',
86                 'title',
87                 'author',
88                 'modate',
89             ),
90             'query' => array(
91                 'bool' => array(
92                     'must' => array(
93                         array(
94                             'query_string' => array(
95                                 'default_field' => '_all',
96                                 'default_operator' => 'AND',
97                                 'query' => $query
98                             )
99                         ),
100                         array(
101                             'term' => array(
102                                 'status' => 'indexed'
103                             )
104                         ),
105                     )
106                 )
107             ),
108             'highlight' => array(
109                 'pre_tags' => array('<em class="hl">'),
110                 'order' => 'score',
111                 'encoder' => 'html',
112                 'fields' => array(
113                     'title' => array(
114                         'require_field_match' => false,
115                         'number_of_fragments' => 0,
116                     ),
117                     'url' => array(
118                         'require_field_match' => false,
119                         'number_of_fragments' => 0,
120                     ),
121                     'text' => array(
122                         'require_field_match' => false,
123                         'number_of_fragments' => 1,
124                     ),
125                 )
126             ),
127             'aggregations' => array(
128                 'tags' => array(
129                     'terms' => array(
130                         'field' => 'tags'
131                     )
132                 ),
133                 'language' => array(
134                     'terms' => array(
135                         'field' => 'language'
136                     )
137                 ),
138                 'domain' => array(
139                     'terms' => array(
140                         'field' => 'domain'
141                     )
142                 ),
143                 'type' => array(
144                     'terms' => array(
145                         'field' => 'type'
146                     )
147                 )
148             ),
149             'from' => $page * $perPage,
150             'size' => $perPage,
151             'sort' => $sortCfg,
152         );
153         foreach ($filters as $type => $value) {
154             $doc['query']['bool']['must'][] = array(
155                 'term' => array(
156                     $type => $value
157                 )
158             );
159         }
160         if ($site != '') {
161             $doc['query']['bool']['must'][] = array(
162                 'prefix' => array(
163                     'schemalessUrl' => array(
164                         'value' => $site
165                     )
166                 )
167             );
168         }
169
170         //unset($doc['_source']);
171
172         //ini_set('xdebug.var_display_max_depth', 10);
173         //echo json_encode($doc);die();
174         $r->setBody(json_encode($doc));
175         $res = $r->send();
176         return json_decode($res->getBody());
177     }
178 }
179 ?>