5 * Fetches entries that need an update and extracts their links
7 class Feed_UpdateEntries
12 public function updateAll()
14 $this->log->info('Updating feed entries..');
15 $res = $this->db->query(
16 'SELECT * FROM feedentries'
17 . ' WHERE fe_needs_update = 1 OR fe_updated = "0000-00-00 00:00:00"'
19 while ($entryRow = $res->fetch(\PDO::FETCH_OBJ)) {
22 'Updating feed entry #%d: %s',
23 $entryRow->fe_id, $entryRow->fe_url
26 $this->updateEntry($entryRow);
28 $this->log->info('Finished updating entries.');
31 protected function updateEntry($entryRow)
33 $req = new \HTTP_Request2($entryRow->fe_url);
34 $req->setHeader('User-Agent', 'stapibas');
37 'application/xhtml+xml; q=1'
38 . ', application/xml; q=0.9'
40 . ', text/html; q=0.5'
44 if ($entryRow->fe_updated != '0000-00-00 00:00:00') {
47 gmdate('r', strtotime($entryRow->fe_updated))
52 if ($res->getStatus() == 304) {
54 $this->setNoUpdate($entryRow);
55 $this->log->info('Not modified');
59 if (intval($res->getStatus() / 100) != 2) {
60 //no 2xx is an error for us
61 $this->log->err('Error fetching feed entry URL');
65 $urls = $this->extractUrls($entryRow, $res);
66 $this->updateUrls($entryRow, $urls);
67 $this->setUpdated($entryRow, $res);
70 protected function updateUrls($entryRow, $urls)
72 $res = $this->db->query(
73 'SELECT * FROM feedentryurls'
74 . ' WHERE feu_fe_id = ' . $this->db->quote($entryRow->fe_id)
77 while ($urlRow = $res->fetch(\PDO::FETCH_OBJ)) {
78 $urlRows[$urlRow->feu_url] = $urlRow;
81 $urls = array_unique($urls);
83 $new = $updated = $deleted = 0;
84 $items = count($urls);
86 foreach ($urls as $url) {
87 if (!isset($urlRows[$url])) {
88 //URL is not known - insert it
90 'INSERT INTO feedentryurls SET'
91 . ' feu_fe_id = ' . $this->db->quote($entryRow->fe_id)
92 . ', feu_url = ' . $this->db->quote($url)
95 . ', feu_updated = NOW()'
98 } else if ($urlRows[$url]->feu_active == 0) {
99 //URL is known already, but was once deleted and is back now
101 'UPDATE feedentryurls SET'
103 . ', feu_updated = NOW()'
104 . ' WHERE feu_id = ' . $this->db->quote($urlRows[$url]->feu_id)
107 unset($urlRows[$url]);
109 //already known, all fine
110 unset($urlRows[$url]);
114 //these URLs are in DB but not on the page anymore
115 foreach ($urlRows as $urlRow) {
118 'UPDATE feedentryurls SET'
120 . ', feu_updated = NOW()'
121 . ' WHERE feu_id = ' . $this->db->quote($urlRow->feu_id)
126 'Feed entry #%d: %d new, %d updated, %d deleted of %d URLs',
127 $entryRow->fe_id, $new, $updated, $deleted, $items
132 protected function extractUrls($entryRow, \HTTP_Request2_Response $res)
134 $doc = new \DOMDocument();
135 $typeParts = explode(';', $res->getHeader('content-type'));
136 $type = $typeParts[0];
137 if ($type == 'application/xhtml+xml'
138 || $type == 'application/xml'
139 || $type == 'text/xml'
141 $doc->loadXML($res->getBody());
143 $doc->loadHTML($res->getBody());
146 $xpath = new \DOMXPath($doc);
147 $xpath->registerNamespace('h', 'http://www.w3.org/1999/xhtml');
148 $query = '//*[' . $this->xpc('h-entry') . ' or ' . $this->xpc('hentry') . ']'
149 . '//*[' . $this->xpc('e-content') . ' or ' . $this->xpc('entry-content') . ']'
150 . '//*[(self::a or self::h:a) and @href and not(starts-with(@href, "#"))]';
151 $links = $xpath->query($query);
152 $this->log->info(sprintf('%d links found', $links->length));
154 $entryUrl = new \Net_URL2($entryRow->fe_url);
155 //FIXME: base URL in html code
158 foreach ($links as $link) {
159 $url = (string)$entryUrl->resolve(
160 $link->attributes->getNamedItem('href')->nodeValue
162 $this->log->info('URL in entry: ' . $url);
168 protected function xpc($class)
171 . 'concat(" ", normalize-space(@class), " "),'
172 . '" ' . $class . ' "'
176 protected function setNoUpdate($entryRow)
179 'UPDATE feedentries SET fe_needs_update = 0'
180 . ' WHERE fe_id = ' . $this->db->quote($entryRow->fe_id)
184 protected function setUpdated($entryRow, \HTTP_Request2_Response $res)
188 . ' SET fe_needs_update = 0'
189 . ', fe_updated = ' . $this->db->quote(
190 gmdate('Y-m-d H:i:s', strtotime($res->getHeader('last-modified')))
192 . ' WHERE fe_id = ' . $this->db->quote($entryRow->fe_id)