class Indexer
{
static $supportedTypes = array(
- 'application/xhtml+xml',
- 'text/html',
+ 'application/xhtml+xml' => true,
+ 'text/html' => true,
);
public function run(Retrieved $retrieved)
$mimetype = explode(';', $res->getHeader('content-type'))[0];
if (!in_array($mimetype, static::$supportedTypes)) {
- echo "MIME type not supported for indexing: $mimetype\n";
+ Log::info("MIME type not supported for indexing: $mimetype");
return false;
}
if ($esDoc === null) {
- $esDoc = new \stdClass();
- }
- if (!isset($esDoc->status)) {
- $esDoc->status = new \stdClass();
+ $esDoc = Helper::baseDoc($url);
+ $retrieved->esDoc = $esDoc;
}
//FIXME: update index only if changed since last index time
$robots = $meta->attributes->getNamedItem('content')->textContent;
foreach (explode(',', $robots) as $value) {
if (trim($value) == 'noindex') {
- echo "URL does not want to be indexed: $url\n";
- exit(0);
+ $esDoc->status->findable = false;
+ return true;
}
}
}
//var_dump($esDoc);die();
- $retrieved->esDoc = $esDoc;
return true;
}