Mercurial > hg > Feedworm
diff backend/AbstractFeedUpdater.py @ 141:6ea813cfac33
pull out common code for updating a feed into an abstract class, have the sqlalchemy backend use that class.
author | Dirk Olmes <dirk@xanthippe.ping.de> |
---|---|
date | Wed, 24 Aug 2011 10:53:46 +0200 |
parents | |
children | 74217db92993 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/AbstractFeedUpdater.py Wed Aug 24 10:53:46 2011 +0200 @@ -0,0 +1,54 @@ + +from datetime import datetime +import feedparser +import logging + +STATUS_ERROR = 400 +log = logging.getLogger("FeedUpdater") + +class AbstractFeedUpdater(object): + ''' + Abstract base class for FeedUpdater implementations - handles all the parsing of the feed. + Subclasses need to implement creating and storing the new feed entries. + ''' + + def __init__(self, feed): + self.feed = feed + + def update(self): + log.info("updating " + self.feed.rss_url) + result = self._retrieveFeed() + for entry in result.entries: + self._normalize(entry) + self._processEntry(entry) + self.feed.incrementNextUpdateDate() + + def _retrieveFeed(self): + result = feedparser.parse(self.feed.rss_url) + # bozo flags if a feed is well-formed. +# if result["bozo"] > 0: +# raise FeedUpdateException() + status = result["status"] + if status >= STATUS_ERROR: + raise FeedUpdateException("HTTP status " + str(status)) + return result + + def _normalize(self, entry): + if not hasattr(entry, "id"): + entry.id = entry.link + if not hasattr(entry, "updated_parsed"): + entry.updated_parsed = datetime.today() + else: + entry.updated_parsed = datetime(*entry.updated_parsed[:6]) + if not hasattr(entry, "summary"): + if hasattr(entry, "content"): + entry.summary = entry.content[0].value + else: + entry.summary = "" + + def _processEntry(self, entry): + raise Exception("_processEntry is abstract, subclasses must override") + + +class FeedUpdateException(Exception): + pass