view backend/sqlalchemy/FeedUpdater.py @ 121:510a5d00e98a backend

re-enabled AddFeed - does not work yet
author Dirk Olmes <dirk@xanthippe.ping.de>
date Sun, 21 Aug 2011 04:17:13 +0200
parents FeedUpdater.py@e4038dd8cc0e
children 862760b161b4
line wrap: on
line source


from datetime import datetime
from Feed import Feed
from FeedEntry import FeedEntry
import feedparser
import logging

STATUS_ERROR = 400
log = logging.getLogger("FeedUpdater")

def updateAllFeeds(session):
    allFeeds = findFeedsToUpdate(session)
    for feed in allFeeds:
        try:
            FeedUpdater(session, feed).update()
        except FeedUpdateException, fue:
            log.warn("problems while updating feed " + feed.rss_url + ": " + str(fue))
        session.commit()

def findFeedsToUpdate(session):
    return session.query(Feed).filter(Feed.next_update < datetime.now())

def normalize(entry):
    if not hasattr(entry, "id"):
        entry.id = entry.link
    if not hasattr(entry, "updated_parsed"):
        entry.updated_parsed = datetime.today()
    else:
        entry.updated_parsed = datetime(*entry.updated_parsed[:6])
    if not hasattr(entry, "summary"):
        if hasattr(entry, "content"):
            entry.summary = entry.content[0].value
        else:
            entry.summary = ""

class FeedUpdater(object):
    def __init__(self, session, feed):
        self.session = session
        self.feed = feed

    # TODO this is a HACK! creating new instances from itself is bad but required due to the storage of the session.
    def createNewFeed(self, url):
        # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260
        result = feedparser.parse(url)
        if result.has_key("title"):
            title = result["feed"].title
        else:
            title = url
        newFeed = Feed(title, url)
        self.session.add(newFeed)

        FeedUpdater(self.session, newFeed).update()

    def update(self):
        log.info("updating " + self.feed.rss_url)
        result = self.getFeed()
        for entry in result.entries:
            self.processEntry(entry)
        self.feed.incrementNextUpdateDate()

    def getFeed(self):
        result = feedparser.parse(self.feed.rss_url)
        # bozo flags if a feed is well-formed.
#        if result["bozo"] > 0:
#            raise FeedUpdateException()
        status = result["status"]
        if status >= STATUS_ERROR:
            raise FeedUpdateException("HTTP status " + str(status))
        return result

    def processEntry(self, entry):
        normalize(entry)
        feedEntry = FeedEntry.findById(entry.id, self.session)
        if feedEntry is None:
            self.createFeedEntry(entry)

    def createFeedEntry(self, entry):
        new = FeedEntry.create(entry)
        new.feed = self.feed
        self.session.add(new)
        log.info("new feed entry: " + entry.title)

class FeedUpdateException(Exception):
    pass