Mercurial > hg > Feedworm
view backend/AbstractFeedUpdater.py @ 160:86f828096aaf
Do not fetch and parse the feed twice when creating a new one. Pass the parsed info into the update method instead to reuse.
author | dirk |
---|---|
date | Mon, 29 Aug 2011 03:07:50 +0200 |
parents | 74217db92993 |
children | 04c3b9796b89 |
line wrap: on
line source
from datetime import datetime import feedparser import logging STATUS_ERROR = 400 log = logging.getLogger("FeedUpdater") class AbstractFeedUpdater(object): ''' Abstract base class for FeedUpdater implementations - handles all the parsing of the feed. Subclasses need to implement creating and storing the new feed entries. ''' def __init__(self, feed): self.feed = feed def update(self, feedDict=None): log.info("updating " + self.feed.rss_url) if feedDict is None: result = self._retrieveFeed() else: result = feedDict self._processEntries(result) def _retrieveFeed(self): result = feedparser.parse(self.feed.rss_url) # bozo flags if a feed is well-formed. # if result["bozo"] > 0: # raise FeedUpdateException() status = result["status"] if status >= STATUS_ERROR: raise FeedUpdateException("HTTP status " + str(status)) return result def _processEntries(self, feedDict): for entry in feedDict.entries: self._normalize(entry) self._processEntry(entry) self._incrementFeedUpdateDate() def _normalize(self, entry): if not hasattr(entry, "id"): entry.id = entry.link if not hasattr(entry, "updated_parsed"): entry.updated_parsed = datetime.today() else: entry.updated_parsed = datetime(*entry.updated_parsed[:6]) if not hasattr(entry, "summary"): if hasattr(entry, "content"): entry.summary = entry.content[0].value else: entry.summary = "" def _processEntry(self, entry): raise Exception("_processEntry is abstract, subclasses must override") def _incrementFeedUpdateDate(self): raise Exception("_incrementNextUpdateDate is abstract, subclasses must override") class FeedUpdateException(Exception): pass