aboutsummaryrefslogtreecommitdiff
path: root/src/phinde
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2020-02-29 21:16:44 +0100
committerChristian Weiske <cweiske@cweiske.de>2020-02-29 21:16:44 +0100
commit18d36cb052c42c20edda2814545c9bdf3fb1cbc5 (patch)
treed6ebd28dd7c575ba4e1c8b6eb4ad5a503e7884c6 /src/phinde
parente8e4778a7550636790d7b33d96a83bbbd3ac21ae (diff)
downloadphinde-elastic6.tar.gz
phinde-elastic6.zip
Diffstat (limited to 'src/phinde')
-rw-r--r--src/phinde/Elasticsearch.php9
-rw-r--r--src/phinde/Elasticsearch/Request.php20
-rw-r--r--src/phinde/Fetcher.php15
-rw-r--r--src/phinde/Helper.php8
-rw-r--r--src/phinde/Indexer.php3
5 files changed, 48 insertions, 7 deletions
diff --git a/src/phinde/Elasticsearch.php b/src/phinde/Elasticsearch.php
index 5ca2180..8a6d8c7 100644
--- a/src/phinde/Elasticsearch.php
+++ b/src/phinde/Elasticsearch.php
@@ -167,6 +167,7 @@ class Elasticsearch
),
'highlight' => array(
'pre_tags' => array('<em class="hl">'),
+ 'post_tags' => array('</em>'),
'order' => 'score',
'encoder' => 'html',
'fields' => array(
@@ -231,8 +232,12 @@ class Elasticsearch
//unset($doc['_source']);
- //ini_set('xdebug.var_display_max_depth', 10);
- //echo json_encode($doc);die();
+ if (false) {
+ ini_set('xdebug.var_display_max_depth', 10);
+ header('Content-type: application/json');
+ echo json_encode($doc, JSON_PRETTY_PRINT);die();
+ }
+
$r->setBody(json_encode($doc));
$res = $r->send();
return json_decode($res->getBody());
diff --git a/src/phinde/Elasticsearch/Request.php b/src/phinde/Elasticsearch/Request.php
index 7bb6add..1f9cd99 100644
--- a/src/phinde/Elasticsearch/Request.php
+++ b/src/phinde/Elasticsearch/Request.php
@@ -30,6 +30,24 @@ class Elasticsearch_Request extends \HTTP_Request2
. $error
);
}
-}
+ /**
+ * Sets the request body - inject content type
+ *
+ * @param mixed $body Either a string with the body or filename
+ * containing body or pointer to an open file or
+ * object with multipart body data
+ * @param bool $isFilename Whether first parameter is a filename
+ *
+ * @return HTTP_Request2
+ * @throws HTTP_Request2_LogicException
+ *
+ * @link https://www.elastic.co/blog/strict-content-type-checking-for-elasticsearch-rest-requests
+ */
+ public function setBody($body, $isFilename = false)
+ {
+ $this->setHeader('content-type', 'application/json');
+ return parent::setBody($body, $isFilename);
+ }
+}
?>
diff --git a/src/phinde/Fetcher.php b/src/phinde/Fetcher.php
index dccb118..63f5a43 100644
--- a/src/phinde/Fetcher.php
+++ b/src/phinde/Fetcher.php
@@ -60,6 +60,21 @@ class Fetcher
}
//FIXME: etag, hash on content
+ if ($esDoc === null) {
+ //not known yet
+ $esDoc = Helper::baseDoc($url);
+ }
+
+ $lm = $res->getHeader('last-modified');
+ if ($lm !== null) {
+ $esDoc->status->modate = gmdate('c', strtotime($lm));
+ } else {
+ $esDoc->status->modate = gmdate('c');
+ }
+ if ($esDoc->status->crdate == '') {
+ $esDoc->status->crdate = $esDoc->status->modate;
+ }
+
$retrieved = new Retrieved();
$retrieved->httpRes = $res;
$retrieved->esDoc = $esDoc;
diff --git a/src/phinde/Helper.php b/src/phinde/Helper.php
index aeb8ba5..55c8bbd 100644
--- a/src/phinde/Helper.php
+++ b/src/phinde/Helper.php
@@ -81,9 +81,15 @@ class Helper
public static function baseDoc($url)
{
$esDoc = new \stdClass();
- $esDoc->status = new \stdClass();
+ $esDoc->status = (object) array(
+ 'findable' => false,
+ 'modate' => '',
+ 'crdate' => '',
+ 'processed' => '',
+ );
$esDoc->url = $url;
$esDoc->schemalessUrl = Helper::noSchema($url);
+ $esDoc->domain = parse_url($url, PHP_URL_HOST);
return $esDoc;
}
}
diff --git a/src/phinde/Indexer.php b/src/phinde/Indexer.php
index bdd5236..4efef42 100644
--- a/src/phinde/Indexer.php
+++ b/src/phinde/Indexer.php
@@ -77,12 +77,9 @@ class Indexer
$xpContext = $doc->getElementById('content');
}
- $esDoc->url = $url;
- $esDoc->schemalessUrl = Helper::noSchema($url);
$esDoc->type = 'html';
$esDoc->subtype = '';
$esDoc->mimetype = $mimetype;
- $esDoc->domain = parse_url($url, PHP_URL_HOST);
//$esDoc->source = 'FIXME';
//$esDoc->sourcetitle = 'FIXME';