1 from twisted.web.client import getPage
2 from xml.etree.cElementTree import fromstring
5 """Simple XML parser that extracts pages from a atom feed
7 ns = "{http://www.w3.org/2005/Atom}"
8 def __init__(self, url, callback):
9 getPage(url).addCallback(self.parse, callback).addErrback(self.fail)
15 def parse(self, data, callback):
16 xml = fromstring(data)
18 for entry in xml.findall("{0}entry".format(self.ns)):
19 titleE = entry.find("{0}title".format(self.ns))
20 url = self.bestLink(entry.findall("{0}link".format(self.ns)))
21 if titleE != None and titleE.text != "" and url != None:
22 pages.append({"title": titleE.text, "url": url})
26 def bestLink(self, list):
30 if link.get("rel") != "alternate" and link.get("rel") != "":
32 level = self.level(link)
33 if foundLevel > level:
36 foundHref = link.get("href")
39 def level(self, link):
40 type = link.get("type")
41 if type == "text/plain":