1 # -*- coding: utf-8 -*-
2 # CurlyTx Atom feed parser
3 # Copyright (C) 2011 Christian Weiske <cweiske@cweiske.de>
4 # License: GPLv3 or later
6 from twisted.web.client import getPage
7 from xml.etree.cElementTree import fromstring, ParseError
12 """ Simple XML parser that extracts pages from a atom feed """
13 ns = "{http://www.w3.org/2005/Atom}"
14 nsc = "{http://ns.cweiske.de/curlytx}"
17 def __init__(self, url, callback, errorCallback):
20 Parsed pages are sent back to callback by parse()
22 self.errorCallback = errorCallback
23 if (url.startswith('file://')):
25 if not os.path.exists(file):
26 errorCallback('Settings atom feed file does not exist: ' + file)
29 with open(file, 'r') as f:
30 self.parse(f.read(), callback)
32 getPage(url).addCallback(self.parse, callback).addErrback(self.onError)
34 def parse(self, data, callback):
35 """ Parse atom feed data into pages list and run callback """
37 xml = fromstring(data)
39 return self.errorCallback("Invalid XML")
42 for entry in xml.findall("{0}entry".format(self.ns)):
43 titleE = entry.find("{0}title".format(self.ns))
44 url = self.bestLink(entry.findall("{0}link".format(self.ns)))
45 if titleE != None and titleE.text != "" and url != None:
46 pages.append({"title": titleE.text, "url": url})
49 for entry in list(xml):
50 if (entry.tag.startswith(self.nsc)):
51 settings[entry.tag[len(self.nsc):]] = entry.text
53 callback(pages, settings)
55 def bestLink(self, list):
56 """ Fetch the best matching link from an atom feed entry """
60 if link.get("rel") != "alternate" and link.get("rel") != "":
62 level = self.level(link)
63 if foundLevel > level:
66 foundHref = link.get("href")
69 def level(self, link):
70 """ Determines the level of a link
72 "text/plain" type links are best, links without type are second.
73 All others have the lowest level 1.
75 type = link.get("type")
76 if type == "text/plain":
82 def onError(self, error):
83 """ Pass the error message only """
84 self.errorCallback(error.getErrorMessage())