Mercurial > hg > Feedworm
view backend/sqlalchemy/FeedUpdater.py @ 136:7217b060b39c
implement a Feed class that can be used to query feed and that wraps view results
author | Dirk Olmes <dirk@xanthippe.ping.de> |
---|---|
date | Tue, 23 Aug 2011 04:15:46 +0200 |
parents | 862760b161b4 |
children | 6ea813cfac33 |
line wrap: on
line source
from datetime import datetime from Feed import Feed from FeedEntry import FeedEntry import feedparser import logging STATUS_ERROR = 400 log = logging.getLogger("FeedUpdater") def updateAllFeeds(session): allFeeds = findFeedsToUpdate(session) for feed in allFeeds: try: FeedUpdater(session, feed).update() except FeedUpdateException, fue: log.warn("problems while updating feed " + feed.rss_url + ": " + str(fue)) session.commit() def findFeedsToUpdate(session): return session.query(Feed).filter(Feed.next_update < datetime.now()) def normalize(entry): if not hasattr(entry, "id"): entry.id = entry.link if not hasattr(entry, "updated_parsed"): entry.updated_parsed = datetime.today() else: entry.updated_parsed = datetime(*entry.updated_parsed[:6]) if not hasattr(entry, "summary"): if hasattr(entry, "content"): entry.summary = entry.content[0].value else: entry.summary = "" def createNewFeed(url, session): # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260 result = feedparser.parse(url) if result.has_key("title"): title = result["feed"].title else: title = url newFeed = Feed(title, url) session.add(newFeed) FeedUpdater(session, newFeed).update() class FeedUpdater(object): def __init__(self, session, feed): self.session = session self.feed = feed def update(self): log.info("updating " + self.feed.rss_url) result = self.getFeed() for entry in result.entries: self.processEntry(entry) self.feed.incrementNextUpdateDate() def getFeed(self): result = feedparser.parse(self.feed.rss_url) # bozo flags if a feed is well-formed. # if result["bozo"] > 0: # raise FeedUpdateException() status = result["status"] if status >= STATUS_ERROR: raise FeedUpdateException("HTTP status " + str(status)) return result def processEntry(self, entry): normalize(entry) feedEntry = FeedEntry.findById(entry.id, self.session) if feedEntry is None: self.createFeedEntry(entry) def createFeedEntry(self, entry): new = FeedEntry.create(entry) new.feed = self.feed self.session.add(new) log.info("new feed entry: " + entry.title) class FeedUpdateException(Exception): pass