$this->baseUrl = $baseUrl;
}
+ public static function getDocId($url)
+ {
+ return hash('sha256', $url);
+ }
+
public function isKnown($url)
{
$r = new Elasticsearch_Request(
- $this->baseUrl . 'document/' . rawurlencode($url),
+ $this->baseUrl . 'document/' . static::getDocId($url),
\HTTP_Request2::METHOD_HEAD
);
$r->allow404 = true;
public function get($url)
{
$r = new Elasticsearch_Request(
- $this->baseUrl . 'document/' . rawurlencode($url),
+ $this->baseUrl . 'document/' . static::getDocId($url),
\HTTP_Request2::METHOD_GET
);
$r->allow404 = true;
public function markQueued($url)
{
$r = new Elasticsearch_Request(
- $this->baseUrl . 'document/' . rawurlencode($url),
+ $this->baseUrl . 'document/' . static::getDocId($url),
\HTTP_Request2::METHOD_PUT
);
- $doc = array(
- 'status' => 'queued',
- 'url' => $url
+ $doc = (object) array(
+ 'url' => $url,
+ 'status' => (object) array(
+ 'processed' => null,
+ 'findable' => false,
+ )
);
$r->setBody(json_encode($doc));
$r->send();
}
+ public function getIndexStatus()
+ {
+ $r = new Elasticsearch_Request(
+ $this->baseUrl . '_stats/docs,store',
+ \HTTP_Request2::METHOD_GET
+ );
+ $res = $r->send();
+ $data = json_decode($res->getBody());
+ return array(
+ 'documents' => $data->_all->total->docs->count,
+ 'size' => $data->_all->total->store->size_in_bytes,
+ );
+ }
+
public function search($query, $filters, $site, $page, $perPage, $sort)
{
- if (preg_match('#nick:([^ ]*)#', $query, $matches)) {
- $authorName = $matches[1];
- $query = str_replace(
- 'nick:' . $authorName,
- 'author.name:' . $authorName,
- $query
- );
+ if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) {
+ foreach ($matches[1] as $authorName) {
+ $query = str_replace(
+ 'nick:' . $authorName,
+ 'author.name:' . $authorName,
+ $query
+ );
+ }
}
$qMust = array();//query parts for the MUST section
$query = trim(str_replace($matches[0], '', $query));
$qMust[] = array(
'range' => array(
- 'modate' => array(
+ 'status.modate' => array(
'gt' => $dateAfter . '||/d',
)
)
$query = trim(str_replace($matches[0], '', $query));
$qMust[] = array(
'range' => array(
- 'modate' => array(
+ 'status.modate' => array(
'lt' => $dateBefore . '||/d',
)
)
$query = trim(str_replace($matches[0], '', $query));
$qMust[] = array(
'range' => array(
- 'modate' => array(
+ 'status.modate' => array(
'gte' => $dateExact . '||/d',
'lte' => $dateExact . '||/d',
)
);
$qMust[] = array(
'term' => array(
- 'status' => 'indexed'
+ 'status.findable' => true
)
);
+ if ($sort == '' && $GLOBALS['phinde']['defaultSort'] == 'date') {
+ $sort = 'date';
+ }
if ($sort == 'date') {
- $sortCfg = array('modate' => array('order' => 'desc'));
+ $sortCfg = array('status.modate' => array('order' => 'desc'));
} else {
$sortCfg = array();
}
'url',
'title',
'author',
- 'modate',
+ 'status.modate',
),
'query' => array(
'bool' => array(
),
'highlight' => array(
'pre_tags' => array('<em class="hl">'),
+ 'post_tags' => array('</em>'),
'order' => 'score',
'encoder' => 'html',
'fields' => array(
//unset($doc['_source']);
- //ini_set('xdebug.var_display_max_depth', 10);
- //echo json_encode($doc);die();
+ if (false) {
+ ini_set('xdebug.var_display_max_depth', 10);
+ header('Content-type: application/json');
+ echo json_encode($doc, JSON_PRETTY_PRINT);die();
+ }
+
$r->setBody(json_encode($doc));
$res = $r->send();
return json_decode($res->getBody());