first version of comment extraction
[stapibas.git] / tests / stapibas / Content / Extractor / data / aaron-parecki.html
diff --git a/tests/stapibas/Content/Extractor/data/aaron-parecki.html b/tests/stapibas/Content/Extractor/data/aaron-parecki.html
new file mode 100644 (file)
index 0000000..3d0b91b
--- /dev/null
@@ -0,0 +1,232 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>@eschnou It worked! Now here's a reply! #indieweb - Aaron Parecki</title>
+        <meta name="author" content="Aaron Parecki">
+
+    <link href="/opensearch.xml" rel="search" title="Search aaronparecki.com" type="application/opensearchdescription+xml">
+    <link rel="http://webmention.org/" href="http://aaronparecki.com/webmention.php" />
+    <link rel="pingback" href="http://pingback.me/webmention?forward=http%3A%2F%2Faaronparecki.com%2Fwebmention.php" />
+
+    <!-- HTML5 shim, for IE6-8 support of HTML elements -->
+    <!--[if lt IE 9]>
+      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+    <![endif]-->
+
+    <meta name="viewport" content="width=device-width,initial-scale=1">
+
+    <link href="/bootstrap-2.2.2/css/bootstrap.min.css" rel="stylesheet">
+    <link href="/bootstrap-2.2.2/css/bootstrap-responsive.min.css" rel="stylesheet">
+    <link href="/css/style.css?body=1" rel="stylesheet" type="text/css" media="all">
+    <link href="/css/aaronpk.css" rel="stylesheet" type="text/css" media="all">
+
+    <link rel="icon" href="/favicon.ico">
+    <link rel="apple-touch-icon" sizes="54x54" href="/images/aaronpk-54.png">
+    <link rel="apple-touch-icon" sizes="72x72" href="/images/aaronpk-72.png">
+    <link rel="apple-touch-icon" sizes="114x114" href="/images/aaronpk-114.png">
+    <link rel="apple-touch-icon" sizes="144x144" href="/images/aaronpk-144.png">
+
+    <meta property="og:title" content="Reply from Aaron Parecki on 4/19 11:35am">
+    <meta property="og:type" content="article">
+    <meta property="og:url" content="http://aaronparecki.com/replies/2013/04/19/2/indieweb">
+    <meta property="og:image" content="http://aaronparecki.com/images/aaronpk-512.jpg">
+    <meta property="og:site_name" content="Aaron Parecki">
+    <meta property="og:description" content="@eschnou It worked! Now here's a reply! #indieweb">
+    <meta property="fb:admins" content="11500459">
+
+    <meta name="twitter:card" content="summary">
+<meta name="twitter:site" content="@aaronpk">
+<meta name="twitter:creator" content="@aaronpk">
+<meta name="twitter:url" content="http://aaronparecki.com/replies/2013/04/19/2/indieweb">
+<meta name="twitter:title" content="@eschnou It worked! Now here's a reply! #indieweb">
+<meta name="twitter:description" content="@eschnou It worked! Now here's a reply! #indieweb">
+<meta name="twitter:image" content="">
+<meta name="twitter:domain" content="aaronparecki.com">
+
+    <script type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-4617305-1']);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</script>
+
+  </head>
+
+  <body>
+    <div id="fb-root"></div>
+<script>(function(d, s, id) {
+  var js, fjs = d.getElementsByTagName(s)[0];
+  if (d.getElementById(id)) return;
+  js = d.createElement(s); js.id = id;
+  js.src = "//connect.facebook.net/en_US/all.js#xfbml=1&appId=163993483698061";
+  fjs.parentNode.insertBefore(js, fjs);
+}(document, 'script', 'facebook-jssdk'));</script>
+<script>
+window.fbAsyncInit = function() {
+  FB.Event.subscribe('edge.create', function(targetUrl) {
+    _gaq.push(['_trackSocial', 'facebook', 'like', targetUrl]);
+  });
+  FB.Event.subscribe('edge.remove', function(targetUrl) {
+    _gaq.push(['_trackSocial', 'facebook', 'unlike', targetUrl]);
+  });
+  FB.Event.subscribe('message.send', function(targetUrl) {
+    _gaq.push(['_trackSocial', 'facebook', 'send', targetUrl]);
+  });
+};
+</script>
+    <div id="site-title" class="search container-fluid"><div class="row-fluid">
+              <ul class="top-nav" style="float: right;">
+          <li><a href="/articles">Articles</a></li>
+          <li><a href="/notes">Notes</a></li>
+          <li><a href="/metrics">Metrics</a></li>
+          <li><a href="/contact">Contact</a></li>
+          <li>&bull;</li>
+          <li><a href="http://map.geoloqi.com/45.5,-122.7?z=11">9:52pm PDT</a></li>
+        </ul>
+      
+      <h1><a href="/">Aaron Parecki</a></h1>
+    </div></div>
+
+    <div id="page">
+
+<div class="container-fluid container-narrow h-entry">
+  <div class="row-fluid">
+    <div class="span8 note">
+
+          <div class="span12 note-context external-note">
+        <ul class="single">
+        <li><div class="inner">    <div class="minicard h-card vcard">
+      <img class="photo logo u-photo" src="http://eschnou.com/thumbnail/83mY3tVWgoSt7QGfe6f4COJ1hziC4PWF" alt="Laurent Eschenauer" width="48" />
+      <a class="p-name fn value" href="http://eschnou.com">Laurent Eschenauer</a>
+      <a href="http://eschnou.com" class="u-url">eschnou.com</a>
+    </div>
+<div class="quote-text">Testing <a href="/tag/indieweb">#<span class="p-category">indieweb</span></a> federation with <a href="http://waterpigs.co.uk">@waterpigs.co.uk</a>, <a href="http://aaronparecki.com">@aaronparecki.com</a> and <a href="http://indiewebcamp.com">@indiewebcamp.com</a> !</div><a href="http://eschnou.com/entry/testing-indieweb-federation-with-waterpigscouk-aaronpareckicom-and--62-24908.html" class="u-in-reply-to"><time class="date" datetime="2013-04-19T20:26:16+02:00">April 19, 2013 8:26pm GMT+0200</time></a></div></li>        </ul>
+      </div>
+    
+      <div class="content reply">
+
+            <div class="minicard h-card vcard author p-author">
+      <img class="photo logo u-photo" src="http://aaronparecki.com/images/aaronpk.png" alt="Aaron Parecki" />
+      <a class="p-name fn value" href="http://aaronparecki.com/">Aaron Parecki</a>
+      <a href="http://aaronparecki.com/" rel="author" class="u-url url">aaronparecki.com</a>
+      <a href="https://plus.google.com/117847912875913905493" rel="author" class="google-profile">Aaron Parecki</a>
+    </div>
+    <div style="clear:both;"></div>
+
+        <div class="note-text entry-content e-content p-name"><a href="http://eschnou.com/">@eschnou</a> It worked! Now here's a reply! <a href="/tag/indieweb">#<span class="p-category">indieweb</span></a>        </div> <!-- e-content -->
+
+        
+        <div class="meta">
+          <a href="http://aaronparecki.com/replies/2013/04/19/2/indieweb" class="u-url"><time class="date dt-published" datetime="2013-04-19T11:35:50-07:00">April 19, 2013 11:35am</time></a> PDT        </div>
+
+      </div><!-- content -->
+
+      
+    </div>
+    <div class="span4">
+
+      <div class="sidebar"><div>
+            <nav class="site-navigation">
+              <a class="prev" href="/replies/2013/04/19/1/" title="@maxticket Yes! Did you write it down? Also how's the game going? @caseorganic @clobbr" rel="prev"><abbr>&larr;</abbr></a>
+                    <a class="next" href="/replies/2013/04/19/3/" title="@hzlzh Thanks for noticing! It looks like there is a problem at the moment, it's getting stuck on some photos! Will look into it. Thanks!" rel="next"><abbr>&rarr;</abbr></a>
+            <a class="up" href="/replies?before=2013-04-19-2"><abbr>Replies</abbr></a>
+    </nav>
+            <div id="mainnav"><ul id="mainnav-1"><li><a href="/articles">Articles</a></li><li><a href="/notes">Notes</a></li><li><a href="/replies">Replies</a></li></ul><ul id="mainnav-2"><li><a href="/presentations">Presentations</a></li><li><a href="/pages">Pages</a></li><li><a href="/contact">Contact</a></li></ul></div>        <ul class="tag-list"><li><a href="/tag/indieweb" rel="tag" class="p-category">indieweb <span></span></a></li></ul>
+        <div class="share-and-respond">
+          <h3>Share &amp; Respond</h3>
+
+                        <label for="permalink" class="permalink-label">Permalink</label> 
+    <input type="text" name="permalink" value="http://aaronparecki.com/replies/2013/04/19/2/indieweb" class="permalink" onclick="this.focus(); this.select();" />
+
+          <label for="shortlink" class="permalink-label">Shortlink</label> 
+      <input type="text" name="shortlink" value="http://aaron.pk/r4P_2" class="shortlink" onclick="this.focus(); this.select();" />
+    
+    
+          <div class="web-actions">
+                        <div class="fb-like" data-send="false" data-width="265" data-show-faces="false" href="http://aaronparecki.com/replies/2013/04/19/2/indieweb"></div>
+          </div>
+          
+          <div style="clear: both;"></div>
+        </div>
+              <form action="http://www.google.com/search" method="get" class="search">
+        <div class="input-append">
+          <input type="text" name="q" placeholder="Search" class="span10" />
+          <button type="submit" class="btn"><i class="icon-search"></i></button>
+        </div>
+        <input type="hidden" name="as_sitesearch" value="aaronparecki.com">
+        <input type="hidden" name="tbs" value="sbd:1,cdr:1,cd_min:1/1/1999">
+      </form>
+    
+      </div></div>
+
+    </div>
+  </div><!-- row -->
+</div><!-- container -->
+
+
+      <footer class="page-footer">
+
+        <div class="container-fluid"><div class="row-fluid"><div class="span12">
+
+                      <form action="https://indieauth.com/auth" method="get" class="web-signin">
+              <div class="input-prepend input-append pull-right">
+                <span class="add-on"><i class="icon-globe"></i></span>
+                <input type="text" name="me" placeholder="yourdomain.com" class="" />
+                <input type="submit" class="btn" value="Sign In" />
+              </div>
+              <input type="hidden" name="redirect_uri" value="http://aaronparecki.com/signin.php?redirect=%2Freplies%2F2013%2F04%2F19%2F2%2Findieweb" />
+            </form>
+          
+        </div></div></div>
+
+        <div class="container-fluid"><div class="row-fluid">
+          <p>&copy; 1999-2013 by Aaron Parecki.</p>
+          <p class="license">
+            Except where otherwise noted, text content on this site is licensed under a <a href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution 3.0 License</a>. <a href="http://creativecommons.org/licenses/by/3.0/" rel="license"><img src="/images/cc-by.png" alt="Creative Commons Attribution 3.0" /></a>
+          </p>
+          <p class="credit">
+            This site is powered by <a href="http://indiewebcamp.com/p3k">p3k</a>.
+          </p>
+        </div>
+      </footer>
+    </div>
+
+    <script type="text/javascript" src="/js/jquery-1.7.1.min.js"></script>
+<script type="text/javascript" src="/bootstrap-2.2.2/js/bootstrap.min.js"></script>
+<script type="text/javascript">
+$(document).keydown(function(e){
+    if(e.metaKey || e.ctrlKey || e.altKey || e.shiftKey) return;
+    if(e.keyCode == 37) { 
+      // Left
+      if($("a[rel='prev']").length > 0){
+        $("a[rel='prev']").addClass('hover');
+        window.location.href = $("a[rel='prev']").attr('href');
+      }
+      return false;
+    }
+    if(e.keyCode == 39) {
+      // Right
+      if($("a[rel='next']").length > 0){
+        $("a[rel='next']").addClass('hover');
+        window.location.href = $("a[rel='next']").attr('href');
+      }
+      return false;
+    }
+});
+$(function(){
+  // Activate popovers on any element with rel=popover
+  $('*[rel="popover"]').popover();
+});
+</script>
+
+<script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0];if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src="//platform.twitter.com/widgets.js";fjs.parentNode.insertBefore(js,fjs);}}(document,"script","twitter-wjs");</script>
+  </body>
+</html>