+$dx = new \DOMXPath($doc);
+
+$xbase = $dx->evaluate('/html/head/base[@href]')->item(0);
+if ($xbase) {
+ $base = $base->resolve(
+ $xbase->attributes->getNamedItem('href')->textContent
+ );
+}
+
+
+//remove script tags
+removeTags($doc, 'script');
+removeTags($doc, 'style');
+removeTags($doc, 'nav');
+
+//default content: <body>
+$xpContext = $doc->getElementsByTagName('body')->item(0);
+//FIXME: follow meta refresh, no body
+// example: https://www.gnu.org/software/coreutils/
+
+//use microformats content if it exists
+$xpElems = $dx->query(
+ "//*[contains(concat(' ', normalize-space(@class), ' '), ' e-content ')]"
+);
+if ($xpElems->length) {
+ $xpContext = $xpElems->item(0);
+} else if ($doc->getElementById('content')) {
+ //if there is an element with ID "content", we'll use this
+ $xpContext = $doc->getElementById('content');
+}