Fix notice
[phinde.git] / src / phinde / Crawler.php
index 43d9459328ce9cf8fa4bc998f7de4b96555a1a7f..a63815de69f93e008e5ceed04b4b0958f8f0e83c 100644 (file)
@@ -47,7 +47,7 @@ class Crawler
             'accept',
             implode(',', array_keys(static::$supportedIndexTypes))
         );
-        if ($existingDoc) {
+        if ($existingDoc && isset($existingDoc->modate)) {
             $nMoDate = strtotime($existingDoc->modate);
             $req->setHeader('If-Modified-Since: ' . date('r', $nMoDate));
         }
@@ -84,11 +84,19 @@ class Crawler
             if ($this->es->isKnown($linkInfo->url)) {
                 continue;
             }
-            $this->es->markQueued($linkInfo->url);
-            $this->queue->addToIndex(
-                $linkInfo->url, $linkInfo->title, $linkInfo->source
-            );
-            if (Helper::isUrlAllowed($linkInfo->url)) {
+            $allowed = Helper::isUrlAllowed($linkInfo->url);
+            $crawl   = $allowed;
+            $index   = $GLOBALS['phinde']['indexNonAllowed'] || $allowed;
+
+            if ($crawl || $index) {
+                $this->es->markQueued($linkInfo->url);
+            }
+            if ($index) {
+                $this->queue->addToIndex(
+                    $linkInfo->url, $linkInfo->title, $linkInfo->source
+                );
+            }
+            if ($allowed) {
                 $this->queue->addToCrawl($linkInfo->url);
             }
         }