rework crawler; add atom link extraction
[phinde.git] / src / phinde / LinkExtractor / Atom.php
1 <?php
2 namespace phinde\LinkExtractor;
3
4 use phinde\LinkInfo;
5
6 class Atom
7 {
8     public function extract(\HTTP_Request2_Response $res)
9     {
10         $url  = $res->getEffectiveUrl();
11         $base = new \Net_URL2($url);
12
13         $sx = simplexml_load_string($res->getBody());
14         $linkInfos   = array();
15         $alreadySeen = array();
16
17         foreach ($sx->entry as $entry) {
18             $linkTitle = (string) $entry->title;
19             foreach ($entry->link as $xlink) {
20                 $linkUrl = (string) $base->resolve((string) $xlink['href']);
21                 if (isset($alreadySeen[$linkUrl])) {
22                     continue;
23                 }
24
25                 if ($xlink['rel'] == 'alternate') {
26                     $linkInfos[] = new LinkInfo($linkUrl, $linkTitle, $url);
27                 }
28                 $alreadySeen[$linkUrl] = true;
29             }
30         }
31
32         return $linkInfos;
33     }
34 }
35 ?>