4 class Content_Extractor
9 public function __construct(Dependencies $deps)
12 $this->db = $deps->db;
13 $this->log = $deps->log;
17 * Extracts content from all pingbackcontent entries and puts it
18 * into rbookmarks/rcomments/rlinks.
20 public function updateAll()
22 $this->log->info('Extracting pingback content..');
23 $res = $this->db->query(
24 'SELECT * FROM pingbackcontent, pingbacks'
25 . ' WHERE p_id = pc_p_id' . $this->sqlNeedsUpdate()
28 while ($contentRow = $res->fetch(\PDO::FETCH_OBJ)) {
30 $this->extractContent($contentRow);
32 $this->log->info('Finished extracting %d pingback contents.', $items);
35 protected function extractContent($contentRow)
37 $doc = new \DOMDocument();
38 $typeParts = explode(';', $contentRow->pc_mime_type);
39 $type = $typeParts[0];
40 if ($type == 'application/xhtml+xml'
41 || $type == 'application/xml'
42 || $type == 'text/xml'
44 $doc->loadXML($contentRow->pc_fulltext);
46 $doc->loadHTML($contentRow->pc_fulltext);
49 //FIXME: delete old content
51 $ce = new Content_Extractor_Comment($this->deps->log);
52 $data = $ce->extract($doc, $contentRow->p_source, $contentRow->p_target);
54 $this->log->info('Comment found');
57 'INSERT INTO rcomments SET'
58 . ' rc_pc_id = ' . $this->db->quote($contentRow->pc_id)
59 . ', rc_source = ' . $this->db->quote($contentRow->p_source)
60 . ', rc_target = ' . $this->db->quote($contentRow->p_target)
61 . ', rc_title = ' . $this->db->quote($data['title'])
62 . ', rc_author_name = ' . $this->db->quote($data['author_name'])
63 . ', rc_author_url = ' . $this->db->quote($data['author_url'])
64 . ', rc_author_image = ' . $this->db->quote($data['author_image'])
65 . ', rc_content = ' . $this->db->quote($data['content'])
66 . ', rc_updated = NOW()'
70 //FIXME: bookmark, link
74 protected function sqlNeedsUpdate()
76 if ($this->deps->options['force']) {
79 return ' AND pc_detected_type = 1';