4 class Content_Extractor
9 public function __construct(Dependencies $deps)
12 $this->db = $deps->db;
13 $this->log = $deps->log;
17 * Extracts content from all pingbackcontent entries and puts it
18 * into rbookmarks/rcomments/rlinks.
20 public function updateAll()
22 $this->log->info('Extracting pingback content..');
23 $res = $this->db->query(
24 'SELECT * FROM pingbackcontent, pingbacks'
25 . ' WHERE p_id = pc_p_id' . $this->sqlNeedsUpdate()
28 while ($contentRow = $res->fetch(\PDO::FETCH_OBJ)) {
30 $this->extractContent($contentRow);
32 $this->log->info('Finished extracting %d pingback contents.', $items);
35 protected function extractContent($contentRow)
37 $doc = new \DOMDocument();
38 $typeParts = explode(';', $contentRow->pc_mime_type);
39 $type = $typeParts[0];
40 if ($type == 'application/xhtml+xml'
41 || $type == 'application/xml'
42 || $type == 'text/xml'
44 $doc->loadXML($contentRow->pc_fulltext);
46 $doc->loadHTML($contentRow->pc_fulltext);
51 'DELETE FROM rbookmarks WHERE'
52 . ' rb_pc_id = ' . $this->db->quote($contentRow->pc_id)
55 'DELETE FROM rcomments WHERE'
56 . ' rc_pc_id = ' . $this->db->quote($contentRow->pc_id)
59 'DELETE FROM rlinks WHERE'
60 . ' rl_pc_id = ' . $this->db->quote($contentRow->pc_id)
63 $ce = new Content_Extractor_Comment($this->deps->log);
64 $data = $ce->extract($doc, $contentRow->p_source, $contentRow->p_target);
66 $this->log->info('Comment found');
69 'INSERT INTO rcomments SET'
70 . ' rc_p_id = ' . $this->db->quote($contentRow->p_id)
71 . ', rc_pc_id = ' . $this->db->quote($contentRow->pc_id)
72 . ', rc_source = ' . $this->db->quote($contentRow->p_source)
73 . ', rc_target = ' . $this->db->quote($contentRow->p_target)
74 . ', rc_title = ' . $this->db->quote($data['title'])
75 . ', rc_author_name = ' . $this->db->quote($data['author_name'])
76 . ', rc_author_url = ' . $this->db->quote($data['author_url'])
77 . ', rc_author_image = ' . $this->db->quote($data['author_image'])
78 . ', rc_content = ' . $this->db->quote($data['content'])
79 . ', rc_updated = NOW()'
81 $this->setDetectedType($contentRow, 'comment');
87 $ce = new Content_Extractor_Link($this->deps->log);
88 $data = $ce->extract($doc, $contentRow->p_source, $contentRow->p_target);
90 $this->log->info('Link found');
92 'INSERT INTO rlinks SET'
93 . ' rl_p_id = ' . $this->db->quote($contentRow->p_id)
94 . ', rl_pc_id = ' . $this->db->quote($contentRow->pc_id)
95 . ', rl_source = ' . $this->db->quote($contentRow->p_source)
96 . ', rl_target = ' . $this->db->quote($contentRow->p_target)
97 . ', rl_title = ' . $this->db->quote($data['title'])
98 . ', rl_author_name = ' . $this->db->quote($data['author_name'])
99 . ', rl_author_url = ' . $this->db->quote($data['author_url'])
100 . ', rl_author_image = ' . $this->db->quote($data['author_image'])
101 . ', rl_updated = NOW()'
103 $this->setDetectedType($contentRow, 'link');
107 $this->setDetectedType($contentRow, 'nothing');
108 $this->log->info('Nothing found');
111 protected function setDetectedType($contentRow, $type)
114 'UPDATE pingbackcontent'
115 . ' SET pc_detected_type = ' . $this->db->quote($type)
116 . ' WHERE pc_id = ' . $this->db->quote($contentRow->pc_id)
121 protected function sqlNeedsUpdate()
123 if ($this->deps->options['force']) {
126 return ' AND pc_detected_type = ""';