4 require_once __DIR__ . '/../src/init.php';
6 $cc = new \Console_CommandLine();
7 $cc->description = 'phinde URL processor';
8 $cc->version = '0.0.1';
13 'long_name' => '--force',
14 'description' => 'Always process URL, even when it did not change',
15 'action' => 'StoreTrue',
23 'long_name' => '--show-links',
24 'description' => 'Only show which URLs were found',
25 'action' => 'StoreTrue',
32 'description' => 'URL to process',
39 'description' => 'Actions to take',
42 'choices' => array('index', 'crawl'),
43 'default' => array('index', 'crawl'),
48 } catch (\Exception $e) {
49 $cc->displayError($e->getMessage());
52 $url = $res->args['url'];
53 $url = Helper::addSchema($url);
54 $urlObj = new \Net_URL2($url);
55 $url = $urlObj->getNormalizedURL();
59 foreach ($res->args['actions'] as $action) {
60 if ($action == 'crawl') {
61 $crawler = new Crawler();
62 $crawler->setShowLinksOnly($res->options['showLinksOnly']);
63 $actions[$action] = $crawler;
64 } else if ($action == 'index') {
65 $actions[$action] = new Indexer();
69 $fetcher = new Fetcher();
70 $retrieved = $fetcher->fetch($url, $actions, $res->options['force']);
71 if ($retrieved === false) {
76 foreach ($actions as $key => $action) {
77 Log::info("step: $key");
78 $update |= $action->run($retrieved);
82 //FIXME: update index if it exists already
83 $fetcher->storeDoc($retrieved->url, $retrieved->esDoc);
85 Log::info("Not updating");
87 } catch (\Exception $e) {
88 Log::error($e->getMessage());