$this->baseUrl = $baseUrl;
}
- /**
- * @link https://www.elastic.co/guide/en/elasticsearch/guide/current/_finding_exact_values.html
- */
+ public static function getDocId($url)
+ {
+ return hash('sha256', $url);
+ }
+
public function isKnown($url)
{
$r = new Elasticsearch_Request(
- $this->baseUrl . 'document/_search/exists',
- \HTTP_Request2::METHOD_GET
+ $this->baseUrl . 'document/' . static::getDocId($url),
+ \HTTP_Request2::METHOD_HEAD
);
$r->allow404 = true;
- $r->setBody(
- json_encode(
- array(
- 'query' => array(
- 'filtered' => array(
- 'filter' => array(
- 'term' => array(
- 'url' => $url
- )
- )
- )
- )
- )
- )
- );
- $res = json_decode($r->send()->getBody());
- return $res->exists;
+ $res = $r->send();
+ return $res->getStatus() == 200;
}
public function get($url)
{
$r = new Elasticsearch_Request(
- $this->baseUrl . 'document/' . rawurlencode($url),
+ $this->baseUrl . 'document/' . static::getDocId($url),
\HTTP_Request2::METHOD_GET
);
$r->allow404 = true;
public function markQueued($url)
{
$r = new Elasticsearch_Request(
- $this->baseUrl . 'document/' . rawurlencode($url),
+ $this->baseUrl . 'document/' . static::getDocId($url),
\HTTP_Request2::METHOD_PUT
);
- $doc = array(
- 'status' => 'queued',
- 'url' => $url
+ $doc = (object) array(
+ 'url' => $url,
+ 'status' => (object) array(
+ 'processed' => null,
+ 'findable' => false,
+ )
);
$r->setBody(json_encode($doc));
$r->send();
}
- public function search($query, $filters, $site, $page, $perPage)
+ public function countDocuments()
{
+ $r = new Elasticsearch_Request(
+ $this->baseUrl . 'document/_count',
+ \HTTP_Request2::METHOD_GET
+ );
+ $res = $r->send();
+ return json_decode($res->getBody())->count;
+ }
+
+ public function search($query, $filters, $site, $page, $perPage, $sort)
+ {
+ if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) {
+ foreach ($matches[1] as $authorName) {
+ $query = str_replace(
+ 'nick:' . $authorName,
+ 'author.name:' . $authorName,
+ $query
+ );
+ }
+ }
+
+ $qMust = array();//query parts for the MUST section
+
+ //modification date filters
+ if (preg_match('#after:([^ ]+)#', $query, $matches)) {
+ $dateAfter = $matches[1];
+ $query = trim(str_replace($matches[0], '', $query));
+ $qMust[] = array(
+ 'range' => array(
+ 'modate' => array(
+ 'gt' => $dateAfter . '||/d',
+ )
+ )
+ );
+ }
+ if (preg_match('#before:([^ ]+)#', $query, $matches)) {
+ $dateBefore = $matches[1];
+ $query = trim(str_replace($matches[0], '', $query));
+ $qMust[] = array(
+ 'range' => array(
+ 'modate' => array(
+ 'lt' => $dateBefore . '||/d',
+ )
+ )
+ );
+ }
+ if (preg_match('#date:([^ ]+)#', $query, $matches)) {
+ $dateExact = $matches[1];
+ $query = trim(str_replace($matches[0], '', $query));
+ $qMust[] = array(
+ 'range' => array(
+ 'modate' => array(
+ 'gte' => $dateExact . '||/d',
+ 'lte' => $dateExact . '||/d',
+ )
+ )
+ );
+ }
+
+ $qMust[] = array(
+ 'query_string' => array(
+ 'default_field' => '_all',
+ 'default_operator' => 'AND',
+ 'query' => $query
+ )
+ );
+ $qMust[] = array(
+ 'term' => array(
+ 'status.findable' => true
+ )
+ );
+
+ if ($sort == 'date') {
+ $sortCfg = array('status.modate' => array('order' => 'desc'));
+ } else {
+ $sortCfg = array();
+ }
+
+ $contentMatchSize = 100;
+ if ($GLOBALS['phinde']['showFullContent']) {
+ $contentMatchSize = 999999;
+ }
+
$r = new Elasticsearch_Request(
$this->baseUrl . 'document/_search',
\HTTP_Request2::METHOD_GET
'url',
'title',
'author',
- 'modate',
+ 'status.modate',
),
'query' => array(
'bool' => array(
- 'must' => array(
- array(
- 'query_string' => array(
- 'default_field' => '_all',
- 'default_operator' => 'AND',
- 'query' => $query
- )
- ),
- array(
- 'term' => array(
- 'status' => 'indexed'
- )
- ),
- )
+ 'must' => $qMust
)
),
'highlight' => array(
'text' => array(
'require_field_match' => false,
'number_of_fragments' => 1,
+ 'fragment_size' => $contentMatchSize,
+ 'no_match_size' => $contentMatchSize,
),
)
),
),
'from' => $page * $perPage,
'size' => $perPage,
- 'sort' => array(
- //array('modate' => array('order' => 'desc'))
- )
+ 'sort' => $sortCfg,
);
foreach ($filters as $type => $value) {
$doc['query']['bool']['must'][] = array(