Mercurial > hg > Feedworm
diff backend/sqlalchemy/FeedUpdater.py @ 121:510a5d00e98a backend
re-enabled AddFeed - does not work yet
author | Dirk Olmes <dirk@xanthippe.ping.de> |
---|---|
date | Sun, 21 Aug 2011 04:17:13 +0200 |
parents | FeedUpdater.py@e4038dd8cc0e |
children | 862760b161b4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/sqlalchemy/FeedUpdater.py Sun Aug 21 04:17:13 2011 +0200 @@ -0,0 +1,84 @@ + +from datetime import datetime +from Feed import Feed +from FeedEntry import FeedEntry +import feedparser +import logging + +STATUS_ERROR = 400 +log = logging.getLogger("FeedUpdater") + +def updateAllFeeds(session): + allFeeds = findFeedsToUpdate(session) + for feed in allFeeds: + try: + FeedUpdater(session, feed).update() + except FeedUpdateException, fue: + log.warn("problems while updating feed " + feed.rss_url + ": " + str(fue)) + session.commit() + +def findFeedsToUpdate(session): + return session.query(Feed).filter(Feed.next_update < datetime.now()) + +def normalize(entry): + if not hasattr(entry, "id"): + entry.id = entry.link + if not hasattr(entry, "updated_parsed"): + entry.updated_parsed = datetime.today() + else: + entry.updated_parsed = datetime(*entry.updated_parsed[:6]) + if not hasattr(entry, "summary"): + if hasattr(entry, "content"): + entry.summary = entry.content[0].value + else: + entry.summary = "" + +class FeedUpdater(object): + def __init__(self, session, feed): + self.session = session + self.feed = feed + + # TODO this is a HACK! creating new instances from itself is bad but required due to the storage of the session. + def createNewFeed(self, url): + # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260 + result = feedparser.parse(url) + if result.has_key("title"): + title = result["feed"].title + else: + title = url + newFeed = Feed(title, url) + self.session.add(newFeed) + + FeedUpdater(self.session, newFeed).update() + + def update(self): + log.info("updating " + self.feed.rss_url) + result = self.getFeed() + for entry in result.entries: + self.processEntry(entry) + self.feed.incrementNextUpdateDate() + + def getFeed(self): + result = feedparser.parse(self.feed.rss_url) + # bozo flags if a feed is well-formed. +# if result["bozo"] > 0: +# raise FeedUpdateException() + status = result["status"] + if status >= STATUS_ERROR: + raise FeedUpdateException("HTTP status " + str(status)) + return result + + def processEntry(self, entry): + normalize(entry) + feedEntry = FeedEntry.findById(entry.id, self.session) + if feedEntry is None: + self.createFeedEntry(entry) + + def createFeedEntry(self, entry): + new = FeedEntry.create(entry) + new.feed = self.feed + self.session.add(new) + log.info("new feed entry: " + entry.title) + +class FeedUpdateException(Exception): + pass