aboutsummaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-02-03 06:21:30 +0100
committerChristian Weiske <cweiske@cweiske.de>2016-02-03 06:21:30 +0100
commit226508cd8d3e8c147ad314a0de483e08be71c254 (patch)
tree4142696d28830efa13835be79fd3ee888a4ab0a4 /bin
parent7b4425b096fa8c18d0db9fd9b1ae96d63ee8af55 (diff)
downloadphinde-226508cd8d3e8c147ad314a0de483e08be71c254.tar.gz
phinde-226508cd8d3e8c147ad314a0de483e08be71c254.zip
first frontend
Diffstat (limited to 'bin')
-rwxr-xr-xbin/crawl.php11
-rwxr-xr-xbin/index.php9
2 files changed, 4 insertions, 16 deletions
diff --git a/bin/crawl.php b/bin/crawl.php
index 26cf994..17b1fc3 100755
--- a/bin/crawl.php
+++ b/bin/crawl.php
@@ -1,14 +1,7 @@
#!/usr/bin/env php
<?php
namespace phinde;
-
-set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path());
-require_once __DIR__ . '/../data/config.php';
-require_once 'HTTP/Request2.php';
-require_once 'Elasticsearch.php';
-require_once 'Elasticsearch/Request.php';
-require_once 'Net/URL2.php';
-require_once 'functions.php';
+require_once __DIR__ . '/../src/init.php';
$supportedCrawlTypes = array(
'text/html', 'application/xhtml+xml'
@@ -23,7 +16,7 @@ if ($argc < 2) {
$es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']);
$url = $argv[1];
-if (!isUrlAllowed($url)) {
+if (!Helper::isUrlAllowed($url)) {
echo "Domain is not allowed; not crawling\n";
exit(2);
}
diff --git a/bin/index.php b/bin/index.php
index 6a13afd..eb82df2 100755
--- a/bin/index.php
+++ b/bin/index.php
@@ -2,12 +2,7 @@
<?php
namespace phinde;
// index a given URL
-set_include_path(__DIR__ . '/../src/' . PATH_SEPARATOR . get_include_path());
-require_once __DIR__ . '/../data/config.php';
-require_once 'HTTP/Request2.php';
-require_once 'Elasticsearch.php';
-require_once 'Elasticsearch/Request.php';
-require_once 'functions.php';
+require_once __DIR__ . '/../src/init.php';
$supportedIndexTypes = array(
'application/xhtml+xml',
@@ -143,7 +138,7 @@ if (count($arSxdates)) {
//language
//there may be "en-US" and "de-DE"
-$indexDoc->language = substr((string) $sx['lang'], 0, 2);
+$indexDoc->language = strtolower(substr((string) $sx['lang'], 0, 2));
//FIXME: fallback, autodetection
//FIXME: check noindex