aboutsummaryrefslogtreecommitdiff
path: root/bin/index.php
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2016-02-04 23:46:45 +0100
committerChristian Weiske <cweiske@cweiske.de>2016-02-04 23:46:45 +0100
commit9f0bdf5bf6d5a40c3673647c5861d91ccd2f9225 (patch)
tree24524bb0582bb366a40dfbbec8357748f34da28a /bin/index.php
parent1de186bb0fbe997a1595b0cdeac146f9a3647368 (diff)
downloadphinde-9f0bdf5bf6d5a40c3673647c5861d91ccd2f9225.tar.gz
phinde-9f0bdf5bf6d5a40c3673647c5861d91ccd2f9225.zip
remove multiple tags
Diffstat (limited to 'bin/index.php')
-rwxr-xr-xbin/index.php22
1 files changed, 14 insertions, 8 deletions
diff --git a/bin/index.php b/bin/index.php
index 374923c..04cc9ac 100755
--- a/bin/index.php
+++ b/bin/index.php
@@ -14,6 +14,16 @@ if ($argc < 2) {
exit(1);
}
+function removeTags($doc, $tag) {
+ $elems = array();
+ foreach ($doc->getElementsbyTagName($tag) as $elem) {
+ $elems[] = $elem;
+ }
+ foreach ($elems as $elem) {
+ $elem->parentNode->removeChild($elem);
+ }
+}
+
$es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']);
$url = $argv[1];
@@ -61,13 +71,9 @@ $doc = new \DOMDocument();
$dx = new \DOMXPath($doc);
//remove script tags
-$elems = array();
-foreach ($doc->getElementsbyTagName('script') as $elem) {
- $elems[] = $elem;
-}
-foreach ($elems as $elem) {
- $elem->parentNode->removeChild($elem);
-}
+removeTags($doc, 'script');
+removeTags($doc, 'style');
+removeTags($doc, 'nav');
//default content: <body>
$xpContext = $doc->getElementsByTagName('body')->item(0);
@@ -128,7 +134,7 @@ foreach (array('h1', 'h2', 'h3', 'h4', 'h5', 'h6') as $headlinetype) {
}
}
-//FIXME: limit to h-entry e-content
+//FIXME: split paragraphs
//FIXME: insert space after br
$indexDoc->text = array();
$indexDoc->text[] = trim(