git.cweiske.de
/
phinde.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
1de186b
)
remove multiple tags
author
Christian Weiske
<cweiske@cweiske.de>
Thu, 4 Feb 2016 22:46:45 +0000
(23:46 +0100)
committer
Christian Weiske
<cweiske@cweiske.de>
Thu, 4 Feb 2016 22:46:45 +0000
(23:46 +0100)
bin/index.php
patch
|
blob
|
history
diff --git
a/bin/index.php
b/bin/index.php
index 374923c393fba5d12faf4cf0478c5047e445de76..04cc9ac01c39b2244c465a1702ce37ae809694bc 100755
(executable)
--- a/
bin/index.php
+++ b/
bin/index.php
@@
-14,6
+14,16
@@
if ($argc < 2) {
exit(1);
}
exit(1);
}
+function removeTags($doc, $tag) {
+ $elems = array();
+ foreach ($doc->getElementsbyTagName($tag) as $elem) {
+ $elems[] = $elem;
+ }
+ foreach ($elems as $elem) {
+ $elem->parentNode->removeChild($elem);
+ }
+}
+
$es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']);
$url = $argv[1];
$es = new Elasticsearch($GLOBALS['phinde']['elasticsearch']);
$url = $argv[1];
@@
-61,13
+71,9
@@
$doc = new \DOMDocument();
$dx = new \DOMXPath($doc);
//remove script tags
$dx = new \DOMXPath($doc);
//remove script tags
-$elems = array();
-foreach ($doc->getElementsbyTagName('script') as $elem) {
- $elems[] = $elem;
-}
-foreach ($elems as $elem) {
- $elem->parentNode->removeChild($elem);
-}
+removeTags($doc, 'script');
+removeTags($doc, 'style');
+removeTags($doc, 'nav');
//default content: <body>
$xpContext = $doc->getElementsByTagName('body')->item(0);
//default content: <body>
$xpContext = $doc->getElementsByTagName('body')->item(0);
@@
-128,7
+134,7
@@
foreach (array('h1', 'h2', 'h3', 'h4', 'h5', 'h6') as $headlinetype) {
}
}
}
}
-//FIXME:
limit to h-entry e-content
+//FIXME:
split paragraphs
//FIXME: insert space after br
$indexDoc->text = array();
$indexDoc->text[] = trim(
//FIXME: insert space after br
$indexDoc->text = array();
$indexDoc->text[] = trim(