view backend/sqlalchemy/FeedUpdater.py @ 128:32a173cb081c backend

move updating the feeds to the backend
author Dirk Olmes <dirk@xanthippe.ping.de>
date Mon, 22 Aug 2011 15:04:53 +0200
parents 862760b161b4
children 6ea813cfac33
line wrap: on
line source


from datetime import datetime
from Feed import Feed
from FeedEntry import FeedEntry
import feedparser
import logging

STATUS_ERROR = 400
log = logging.getLogger("FeedUpdater")

def updateAllFeeds(session):
    allFeeds = findFeedsToUpdate(session)
    for feed in allFeeds:
        try:
            FeedUpdater(session, feed).update()
        except FeedUpdateException, fue:
            log.warn("problems while updating feed " + feed.rss_url + ": " + str(fue))
        session.commit()

def findFeedsToUpdate(session):
    return session.query(Feed).filter(Feed.next_update < datetime.now())

def normalize(entry):
    if not hasattr(entry, "id"):
        entry.id = entry.link
    if not hasattr(entry, "updated_parsed"):
        entry.updated_parsed = datetime.today()
    else:
        entry.updated_parsed = datetime(*entry.updated_parsed[:6])
    if not hasattr(entry, "summary"):
        if hasattr(entry, "content"):
            entry.summary = entry.content[0].value
        else:
            entry.summary = ""

def createNewFeed(url, session):
    # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260
    result = feedparser.parse(url)
    if result.has_key("title"):
        title = result["feed"].title
    else:
        title = url
    newFeed = Feed(title, url)
    session.add(newFeed)

    FeedUpdater(session, newFeed).update()


class FeedUpdater(object):
    def __init__(self, session, feed):
        self.session = session
        self.feed = feed

    def update(self):
        log.info("updating " + self.feed.rss_url)
        result = self.getFeed()
        for entry in result.entries:
            self.processEntry(entry)
        self.feed.incrementNextUpdateDate()

    def getFeed(self):
        result = feedparser.parse(self.feed.rss_url)
        # bozo flags if a feed is well-formed.
#        if result["bozo"] > 0:
#            raise FeedUpdateException()
        status = result["status"]
        if status >= STATUS_ERROR:
            raise FeedUpdateException("HTTP status " + str(status))
        return result

    def processEntry(self, entry):
        normalize(entry)
        feedEntry = FeedEntry.findById(entry.id, self.session)
        if feedEntry is None:
            self.createFeedEntry(entry)

    def createFeedEntry(self, entry):
        new = FeedEntry.create(entry)
        new.feed = self.feed
        self.session.add(new)
        log.info("new feed entry: " + entry.title)

class FeedUpdateException(Exception):
    pass