view backend/AbstractFeedUpdater.py @ 160:86f828096aaf

Do not fetch and parse the feed twice when creating a new one. Pass the parsed info into the update method instead to reuse.
author dirk
date Mon, 29 Aug 2011 03:07:50 +0200
parents 74217db92993
children 04c3b9796b89
line wrap: on
line source


from datetime import datetime
import feedparser
import logging

STATUS_ERROR = 400
log = logging.getLogger("FeedUpdater")

class AbstractFeedUpdater(object):
    '''
    Abstract base class for FeedUpdater implementations - handles all the parsing of the feed.
    Subclasses need to implement creating and storing the new feed entries.
    '''

    def __init__(self, feed):
        self.feed = feed

    def update(self, feedDict=None):
        log.info("updating " + self.feed.rss_url)
        if feedDict is None:
            result = self._retrieveFeed()
        else:
            result = feedDict
        self._processEntries(result)

    def _retrieveFeed(self):
        result = feedparser.parse(self.feed.rss_url)
        # bozo flags if a feed is well-formed.
#        if result["bozo"] > 0:
#            raise FeedUpdateException()
        status = result["status"]
        if status >= STATUS_ERROR:
            raise FeedUpdateException("HTTP status " + str(status))
        return result

    def _processEntries(self, feedDict):
        for entry in feedDict.entries:
            self._normalize(entry)
            self._processEntry(entry)
        self._incrementFeedUpdateDate()

    def _normalize(self, entry):
        if not hasattr(entry, "id"):
            entry.id = entry.link
        if not hasattr(entry, "updated_parsed"):
            entry.updated_parsed = datetime.today()
        else:
            entry.updated_parsed = datetime(*entry.updated_parsed[:6])
        if not hasattr(entry, "summary"):
            if hasattr(entry, "content"):
                entry.summary = entry.content[0].value
            else:
                entry.summary = ""

    def _processEntry(self, entry):
        raise Exception("_processEntry is abstract, subclasses must override")

    def _incrementFeedUpdateDate(self):
        raise Exception("_incrementNextUpdateDate is abstract, subclasses must override")


class FeedUpdateException(Exception):
    pass