# -*- coding: utf-8 -*-
# CurlyTx Atom feed parser
# Copyright (C) 2011 Christian Weiske <cweiske@cweiske.de>
+# License: GPLv3 or later
from twisted.web.client import getPage
from xml.etree.cElementTree import fromstring
+import os
+
class AtomFeed:
- """Simple XML parser that extracts pages from a atom feed
- """
+ """ Simple XML parser that extracts pages from a atom feed """
ns = "{http://www.w3.org/2005/Atom}"
- def __init__(self, url, callback):
- getPage(url).addCallback(self.parse, callback).addErrback(self.fail)
+ nsc = "{http://ns.cweiske.de/curlytx}"
+ errorCallback = None
+
+ def __init__(self, url, callback, errorCallback):
+ """ Fetches the URL
+ Parsed pages are sent back to callback by parse()
+ """
+ self.errorCallback = errorCallback
+ if (url.startswith('file://')):
+ file = url[7:]
+ if not os.path.exists(file):
+ errorCallback('Settings atom feed file does not exist: ' + file)
+ return
- def fail(self, msg):
- print("CurlyTx", msg)
+ with open(file, 'r') as f:
+ self.parse(f.read(), callback)
+ else:
+ getPage(url).addCallback(self.parse, callback).addErrback(self.onError)
def parse(self, data, callback):
- xml = fromstring(data)
+ """ Parse atom feed data into pages list and run callback """
+ try:
+ xml = fromstring(data)
+ except Exception:
+ return self.errorCallback("Invalid XML")
+
pages = []
for entry in xml.findall("{0}entry".format(self.ns)):
titleE = entry.find("{0}title".format(self.ns))
if titleE != None and titleE.text != "" and url != None:
pages.append({"title": titleE.text, "url": url})
- callback(pages)
+ settings = dict()
+ for entry in list(xml):
+ if (entry.tag.startswith(self.nsc)):
+ settings[entry.tag[len(self.nsc):]] = entry.text
+
+ callback(pages, settings)
def bestLink(self, list):
+ """ Fetch the best matching link from an atom feed entry """
foundLevel = -1
foundHref = None
for link in list:
return foundHref
def level(self, link):
+ """ Determines the level of a link
+
+ "text/plain" type links are best, links without type are second.
+ All others have the lowest level 1.
+ """
type = link.get("type")
if type == "text/plain":
return 3
elif type == "":
return 2
return 1
+
+ def onError(self, error):
+ """ Pass the error message only """
+ self.errorCallback(error.getErrorMessage())